LLVM 19.0.0git
VectorUtils.h
Go to the documentation of this file.
1//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ANALYSIS_VECTORUTILS_H
14#define LLVM_ANALYSIS_VECTORUTILS_H
15
16#include "llvm/ADT/MapVector.h"
21
22namespace llvm {
23class TargetLibraryInfo;
24
25/// The Vector Function Database.
26///
27/// Helper class used to find the vector functions associated to a
28/// scalar CallInst.
30 /// The Module of the CallInst CI.
31 const Module *M;
32 /// The CallInst instance being queried for scalar to vector mappings.
33 const CallInst &CI;
34 /// List of vector functions descriptors associated to the call
35 /// instruction.
36 const SmallVector<VFInfo, 8> ScalarToVectorMappings;
37
38 /// Retrieve the scalar-to-vector mappings associated to the rule of
39 /// a vector Function ABI.
40 static void getVFABIMappings(const CallInst &CI,
41 SmallVectorImpl<VFInfo> &Mappings) {
42 if (!CI.getCalledFunction())
43 return;
44
45 const StringRef ScalarName = CI.getCalledFunction()->getName();
46
47 SmallVector<std::string, 8> ListOfStrings;
48 // The check for the vector-function-abi-variant attribute is done when
49 // retrieving the vector variant names here.
50 VFABI::getVectorVariantNames(CI, ListOfStrings);
51 if (ListOfStrings.empty())
52 return;
53 for (const auto &MangledName : ListOfStrings) {
54 const std::optional<VFInfo> Shape =
56 // A match is found via scalar and vector names, and also by
57 // ensuring that the variant described in the attribute has a
58 // corresponding definition or declaration of the vector
59 // function in the Module M.
60 if (Shape && (Shape->ScalarName == ScalarName)) {
61 assert(CI.getModule()->getFunction(Shape->VectorName) &&
62 "Vector function is missing.");
63 Mappings.push_back(*Shape);
64 }
65 }
66 }
67
68public:
69 /// Retrieve all the VFInfo instances associated to the CallInst CI.
72
73 // Get mappings from the Vector Function ABI variants.
74 getVFABIMappings(CI, Ret);
75
76 // Other non-VFABI variants should be retrieved here.
77
78 return Ret;
79 }
80
81 static bool hasMaskedVariant(const CallInst &CI,
82 std::optional<ElementCount> VF = std::nullopt) {
83 // Check whether we have at least one masked vector version of a scalar
84 // function. If no VF is specified then we check for any masked variant,
85 // otherwise we look for one that matches the supplied VF.
86 auto Mappings = VFDatabase::getMappings(CI);
87 for (VFInfo Info : Mappings)
88 if (!VF || Info.Shape.VF == *VF)
89 if (Info.isMasked())
90 return true;
91
92 return false;
93 }
94
95 /// Constructor, requires a CallInst instance.
97 : M(CI.getModule()), CI(CI),
98 ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
99
100 /// \defgroup VFDatabase query interface.
101 ///
102 /// @{
103 /// Retrieve the Function with VFShape \p Shape.
105 if (Shape == VFShape::getScalarShape(CI.getFunctionType()))
106 return CI.getCalledFunction();
107
108 for (const auto &Info : ScalarToVectorMappings)
109 if (Info.Shape == Shape)
110 return M->getFunction(Info.VectorName);
111
112 return nullptr;
113 }
114 /// @}
115};
116
117template <typename T> class ArrayRef;
118class DemandedBits;
119template <typename InstTy> class InterleaveGroup;
120class IRBuilderBase;
121class Loop;
122class ScalarEvolution;
123class TargetTransformInfo;
124class Type;
125class Value;
126
127namespace Intrinsic {
128typedef unsigned ID;
129}
130
131/// A helper function for converting Scalar types to vector types. If
132/// the incoming type is void, we return void. If the EC represents a
133/// scalar, we return the scalar type.
134inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
135 if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
136 return Scalar;
137 return VectorType::get(Scalar, EC);
138}
139
140inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
141 return ToVectorTy(Scalar, ElementCount::getFixed(VF));
142}
143
144/// Identify if the intrinsic is trivially vectorizable.
145/// This method returns true if the intrinsic's argument types are all scalars
146/// for the scalar form of the intrinsic and all vectors (or scalars handled by
147/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
149
150/// Identifies if the vector form of the intrinsic has a scalar operand.
152 unsigned ScalarOpdIdx);
153
154/// Identifies if the vector form of the intrinsic is overloaded on the type of
155/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
157
158/// Returns intrinsic ID for call.
159/// For the input call instruction it finds mapping intrinsic and returns
160/// its intrinsic ID, in case it does not found it return not_intrinsic.
162 const TargetLibraryInfo *TLI);
163
164/// Given a vector and an element number, see if the scalar value is
165/// already around as a register, for example if it were inserted then extracted
166/// from the vector.
167Value *findScalarElement(Value *V, unsigned EltNo);
168
169/// If all non-negative \p Mask elements are the same value, return that value.
170/// If all elements are negative (undefined) or \p Mask contains different
171/// non-negative values, return -1.
172int getSplatIndex(ArrayRef<int> Mask);
173
174/// Get splat value if the input is a splat vector or return nullptr.
175/// The value may be extracted from a splat constants vector or from
176/// a sequence of instructions that broadcast a single value into a vector.
177Value *getSplatValue(const Value *V);
178
179/// Return true if each element of the vector value \p V is poisoned or equal to
180/// every other non-poisoned element. If an index element is specified, either
181/// every element of the vector is poisoned or the element at that index is not
182/// poisoned and equal to every other non-poisoned element.
183/// This may be more powerful than the related getSplatValue() because it is
184/// not limited by finding a scalar source value to a splatted vector.
185bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
186
187/// Transform a shuffle mask's output demanded element mask into demanded
188/// element masks for the 2 operands, returns false if the mask isn't valid.
189/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
190/// \p AllowUndefElts permits "-1" indices to be treated as undef.
191bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
192 const APInt &DemandedElts, APInt &DemandedLHS,
193 APInt &DemandedRHS, bool AllowUndefElts = false);
194
195/// Replace each shuffle mask index with the scaled sequential indices for an
196/// equivalent mask of narrowed elements. Mask elements that are less than 0
197/// (sentinel values) are repeated in the output mask.
198///
199/// Example with Scale = 4:
200/// <4 x i32> <3, 2, 0, -1> -->
201/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
202///
203/// This is the reverse process of widening shuffle mask elements, but it always
204/// succeeds because the indexes can always be multiplied (scaled up) to map to
205/// narrower vector elements.
206void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
207 SmallVectorImpl<int> &ScaledMask);
208
209/// Try to transform a shuffle mask by replacing elements with the scaled index
210/// for an equivalent mask of widened elements. If all mask elements that would
211/// map to a wider element of the new mask are the same negative number
212/// (sentinel value), that element of the new mask is the same value. If any
213/// element in a given slice is negative and some other element in that slice is
214/// not the same value, return false (partial matches with sentinel values are
215/// not allowed).
216///
217/// Example with Scale = 4:
218/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
219/// <4 x i32> <3, 2, 0, -1>
220///
221/// This is the reverse process of narrowing shuffle mask elements if it
222/// succeeds. This transform is not always possible because indexes may not
223/// divide evenly (scale down) to map to wider vector elements.
224bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
225 SmallVectorImpl<int> &ScaledMask);
226
227/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
228/// to get the shuffle mask with widest possible elements.
229void getShuffleMaskWithWidestElts(ArrayRef<int> Mask,
230 SmallVectorImpl<int> &ScaledMask);
231
232/// Splits and processes shuffle mask depending on the number of input and
233/// output registers. The function does 2 main things: 1) splits the
234/// source/destination vectors into real registers; 2) do the mask analysis to
235/// identify which real registers are permuted. Then the function processes
236/// resulting registers mask using provided action items. If no input register
237/// is defined, \p NoInputAction action is used. If only 1 input register is
238/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
239/// process > 2 input registers and masks.
240/// \param Mask Original shuffle mask.
241/// \param NumOfSrcRegs Number of source registers.
242/// \param NumOfDestRegs Number of destination registers.
243/// \param NumOfUsedRegs Number of actually used destination registers.
245 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
246 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
247 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
248 function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
249
250/// Compute a map of integer instructions to their minimum legal type
251/// size.
252///
253/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
254/// type (e.g. i32) whenever arithmetic is performed on them.
255///
256/// For targets with native i8 or i16 operations, usually InstCombine can shrink
257/// the arithmetic type down again. However InstCombine refuses to create
258/// illegal types, so for targets without i8 or i16 registers, the lengthening
259/// and shrinking remains.
260///
261/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
262/// their scalar equivalents do not, so during vectorization it is important to
263/// remove these lengthens and truncates when deciding the profitability of
264/// vectorization.
265///
266/// This function analyzes the given range of instructions and determines the
267/// minimum type size each can be converted to. It attempts to remove or
268/// minimize type size changes across each def-use chain, so for example in the
269/// following code:
270///
271/// %1 = load i8, i8*
272/// %2 = add i8 %1, 2
273/// %3 = load i16, i16*
274/// %4 = zext i8 %2 to i32
275/// %5 = zext i16 %3 to i32
276/// %6 = add i32 %4, %5
277/// %7 = trunc i32 %6 to i16
278///
279/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
280/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
281///
282/// If the optional TargetTransformInfo is provided, this function tries harder
283/// to do less work by only looking at illegal types.
284MapVector<Instruction*, uint64_t>
285computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
286 DemandedBits &DB,
287 const TargetTransformInfo *TTI=nullptr);
288
289/// Compute the union of two access-group lists.
290///
291/// If the list contains just one access group, it is returned directly. If the
292/// list is empty, returns nullptr.
293MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
294
295/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
296/// are both in. If either instruction does not access memory at all, it is
297/// considered to be in every list.
298///
299/// If the list contains just one access group, it is returned directly. If the
300/// list is empty, returns nullptr.
301MDNode *intersectAccessGroups(const Instruction *Inst1,
302 const Instruction *Inst2);
303
304/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
305/// MD_nontemporal, MD_access_group, MD_mmra].
306/// For K in Kinds, we get the MDNode for K from each of the
307/// elements of VL, compute their "intersection" (i.e., the most generic
308/// metadata value that covers all of the individual values), and set I's
309/// metadata for M equal to the intersection value.
310///
311/// This function always sets a (possibly null) value for each K in Kinds.
312Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
313
314/// Create a mask that filters the members of an interleave group where there
315/// are gaps.
316///
317/// For example, the mask for \p Group with interleave-factor 3
318/// and \p VF 4, that has only its first member present is:
319///
320/// <1,0,0,1,0,0,1,0,0,1,0,0>
321///
322/// Note: The result is a mask of 0's and 1's, as opposed to the other
323/// create[*]Mask() utilities which create a shuffle mask (mask that
324/// consists of indices).
325Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
326 const InterleaveGroup<Instruction> &Group);
327
328/// Create a mask with replicated elements.
329///
330/// This function creates a shuffle mask for replicating each of the \p VF
331/// elements in a vector \p ReplicationFactor times. It can be used to
332/// transform a mask of \p VF elements into a mask of
333/// \p VF * \p ReplicationFactor elements used by a predicated
334/// interleaved-group of loads/stores whose Interleaved-factor ==
335/// \p ReplicationFactor.
336///
337/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
338///
339/// <0,0,0,1,1,1,2,2,2,3,3,3>
340llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
341 unsigned VF);
342
343/// Create an interleave shuffle mask.
344///
345/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
346/// vectorization factor \p VF into a single wide vector. The mask is of the
347/// form:
348///
349/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
350///
351/// For example, the mask for VF = 4 and NumVecs = 2 is:
352///
353/// <0, 4, 1, 5, 2, 6, 3, 7>.
354llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
355
356/// Create a stride shuffle mask.
357///
358/// This function creates a shuffle mask whose elements begin at \p Start and
359/// are incremented by \p Stride. The mask can be used to deinterleave an
360/// interleaved vector into separate vectors of vectorization factor \p VF. The
361/// mask is of the form:
362///
363/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
364///
365/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
366///
367/// <0, 2, 4, 6>
368llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
369 unsigned VF);
370
371/// Create a sequential shuffle mask.
372///
373/// This function creates shuffle mask whose elements are sequential and begin
374/// at \p Start. The mask contains \p NumInts integers and is padded with \p
375/// NumUndefs undef values. The mask is of the form:
376///
377/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
378///
379/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
380///
381/// <0, 1, 2, 3, undef, undef, undef, undef>
383createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
384
385/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
386/// mask assuming both operands are identical. This assumes that the unary
387/// shuffle will use elements from operand 0 (operand 1 will be unused).
389 unsigned NumElts);
390
391/// Concatenate a list of vectors.
392///
393/// This function generates code that concatenate the vectors in \p Vecs into a
394/// single large vector. The number of vectors should be greater than one, and
395/// their element types should be the same. The number of elements in the
396/// vectors should also be the same; however, if the last vector has fewer
397/// elements, it will be padded with undefs.
398Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
399
400/// Given a mask vector of i1, Return true if all of the elements of this
401/// predicate mask are known to be false or undef. That is, return true if all
402/// lanes can be assumed inactive.
403bool maskIsAllZeroOrUndef(Value *Mask);
404
405/// Given a mask vector of i1, Return true if all of the elements of this
406/// predicate mask are known to be true or undef. That is, return true if all
407/// lanes can be assumed active.
408bool maskIsAllOneOrUndef(Value *Mask);
409
410/// Given a mask vector of i1, Return true if any of the elements of this
411/// predicate mask are known to be true or undef. That is, return true if at
412/// least one lane can be assumed active.
413bool maskContainsAllOneOrUndef(Value *Mask);
414
415/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
416/// for each lane which may be active.
417APInt possiblyDemandedEltsInMask(Value *Mask);
418
419/// The group of interleaved loads/stores sharing the same stride and
420/// close to each other.
421///
422/// Each member in this group has an index starting from 0, and the largest
423/// index should be less than interleaved factor, which is equal to the absolute
424/// value of the access's stride.
425///
426/// E.g. An interleaved load group of factor 4:
427/// for (unsigned i = 0; i < 1024; i+=4) {
428/// a = A[i]; // Member of index 0
429/// b = A[i+1]; // Member of index 1
430/// d = A[i+3]; // Member of index 3
431/// ...
432/// }
433///
434/// An interleaved store group of factor 4:
435/// for (unsigned i = 0; i < 1024; i+=4) {
436/// ...
437/// A[i] = a; // Member of index 0
438/// A[i+1] = b; // Member of index 1
439/// A[i+2] = c; // Member of index 2
440/// A[i+3] = d; // Member of index 3
441/// }
442///
443/// Note: the interleaved load group could have gaps (missing members), but
444/// the interleaved store group doesn't allow gaps.
445template <typename InstTy> class InterleaveGroup {
446public:
447 InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
448 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
449 InsertPos(nullptr) {}
450
451 InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
452 : Alignment(Alignment), InsertPos(Instr) {
453 Factor = std::abs(Stride);
454 assert(Factor > 1 && "Invalid interleave factor");
455
456 Reverse = Stride < 0;
457 Members[0] = Instr;
458 }
459
460 bool isReverse() const { return Reverse; }
461 uint32_t getFactor() const { return Factor; }
462 Align getAlign() const { return Alignment; }
463 uint32_t getNumMembers() const { return Members.size(); }
464
465 /// Try to insert a new member \p Instr with index \p Index and
466 /// alignment \p NewAlign. The index is related to the leader and it could be
467 /// negative if it is the new leader.
468 ///
469 /// \returns false if the instruction doesn't belong to the group.
470 bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
471 // Make sure the key fits in an int32_t.
472 std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
473 if (!MaybeKey)
474 return false;
475 int32_t Key = *MaybeKey;
476
477 // Skip if the key is used for either the tombstone or empty special values.
480 return false;
481
482 // Skip if there is already a member with the same index.
483 if (Members.contains(Key))
484 return false;
485
486 if (Key > LargestKey) {
487 // The largest index is always less than the interleave factor.
488 if (Index >= static_cast<int32_t>(Factor))
489 return false;
490
491 LargestKey = Key;
492 } else if (Key < SmallestKey) {
493
494 // Make sure the largest index fits in an int32_t.
495 std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
496 if (!MaybeLargestIndex)
497 return false;
498
499 // The largest index is always less than the interleave factor.
500 if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
501 return false;
502
503 SmallestKey = Key;
504 }
505
506 // It's always safe to select the minimum alignment.
507 Alignment = std::min(Alignment, NewAlign);
508 Members[Key] = Instr;
509 return true;
510 }
511
512 /// Get the member with the given index \p Index
513 ///
514 /// \returns nullptr if contains no such member.
515 InstTy *getMember(uint32_t Index) const {
516 int32_t Key = SmallestKey + Index;
517 return Members.lookup(Key);
518 }
519
520 /// Get the index for the given member. Unlike the key in the member
521 /// map, the index starts from 0.
522 uint32_t getIndex(const InstTy *Instr) const {
523 for (auto I : Members) {
524 if (I.second == Instr)
525 return I.first - SmallestKey;
526 }
527
528 llvm_unreachable("InterleaveGroup contains no such member");
529 }
530
531 InstTy *getInsertPos() const { return InsertPos; }
532 void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
533
534 /// Add metadata (e.g. alias info) from the instructions in this group to \p
535 /// NewInst.
536 ///
537 /// FIXME: this function currently does not add noalias metadata a'la
538 /// addNewMedata. To do that we need to compute the intersection of the
539 /// noalias info from all members.
540 void addMetadata(InstTy *NewInst) const;
541
542 /// Returns true if this Group requires a scalar iteration to handle gaps.
544 // If the last member of the Group exists, then a scalar epilog is not
545 // needed for this group.
546 if (getMember(getFactor() - 1))
547 return false;
548
549 // We have a group with gaps. It therefore can't be a reversed access,
550 // because such groups get invalidated (TODO).
551 assert(!isReverse() && "Group should have been invalidated");
552
553 // This is a group of loads, with gaps, and without a last-member
554 return true;
555 }
556
557private:
558 uint32_t Factor; // Interleave Factor.
559 bool Reverse;
560 Align Alignment;
562 int32_t SmallestKey = 0;
563 int32_t LargestKey = 0;
564
565 // To avoid breaking dependences, vectorized instructions of an interleave
566 // group should be inserted at either the first load or the last store in
567 // program order.
568 //
569 // E.g. %even = load i32 // Insert Position
570 // %add = add i32 %even // Use of %even
571 // %odd = load i32
572 //
573 // store i32 %even
574 // %odd = add i32 // Def of %odd
575 // store i32 %odd // Insert Position
576 InstTy *InsertPos;
577};
578
579/// Drive the analysis of interleaved memory accesses in the loop.
580///
581/// Use this class to analyze interleaved accesses only when we can vectorize
582/// a loop. Otherwise it's meaningless to do analysis as the vectorization
583/// on interleaved accesses is unsafe.
584///
585/// The analysis collects interleave groups and records the relationships
586/// between the member and the group in a map.
588public:
590 DominatorTree *DT, LoopInfo *LI,
591 const LoopAccessInfo *LAI)
592 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
593
595
596 /// Analyze the interleaved accesses and collect them in interleave
597 /// groups. Substitute symbolic strides using \p Strides.
598 /// Consider also predicated loads/stores in the analysis if
599 /// \p EnableMaskedInterleavedGroup is true.
600 void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
601
602 /// Invalidate groups, e.g., in case all blocks in loop will be predicated
603 /// contrary to original assumption. Although we currently prevent group
604 /// formation for predicated accesses, we may be able to relax this limitation
605 /// in the future once we handle more complicated blocks. Returns true if any
606 /// groups were invalidated.
608 if (InterleaveGroups.empty()) {
609 assert(
610 !RequiresScalarEpilogue &&
611 "RequiresScalarEpilog should not be set without interleave groups");
612 return false;
613 }
614
615 InterleaveGroupMap.clear();
616 for (auto *Ptr : InterleaveGroups)
617 delete Ptr;
618 InterleaveGroups.clear();
619 RequiresScalarEpilogue = false;
620 return true;
621 }
622
623 /// Check if \p Instr belongs to any interleave group.
624 bool isInterleaved(Instruction *Instr) const {
625 return InterleaveGroupMap.contains(Instr);
626 }
627
628 /// Get the interleave group that \p Instr belongs to.
629 ///
630 /// \returns nullptr if doesn't have such group.
632 getInterleaveGroup(const Instruction *Instr) const {
633 return InterleaveGroupMap.lookup(Instr);
634 }
635
638 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
639 }
640
641 /// Returns true if an interleaved group that may access memory
642 /// out-of-bounds requires a scalar epilogue iteration for correctness.
643 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
644
645 /// Invalidate groups that require a scalar epilogue (due to gaps). This can
646 /// happen when optimizing for size forbids a scalar epilogue, and the gap
647 /// cannot be filtered by masking the load/store.
649
650 /// Returns true if we have any interleave groups.
651 bool hasGroups() const { return !InterleaveGroups.empty(); }
652
653private:
654 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
655 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
656 /// The interleaved access analysis can also add new predicates (for example
657 /// by versioning strides of pointers).
659
660 Loop *TheLoop;
661 DominatorTree *DT;
662 LoopInfo *LI;
663 const LoopAccessInfo *LAI;
664
665 /// True if the loop may contain non-reversed interleaved groups with
666 /// out-of-bounds accesses. We ensure we don't speculatively access memory
667 /// out-of-bounds by executing at least one scalar epilogue iteration.
668 bool RequiresScalarEpilogue = false;
669
670 /// Holds the relationships between the members and the interleave group.
672
673 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
674
675 /// Holds dependences among the memory accesses in the loop. It maps a source
676 /// access to a set of dependent sink accesses.
678
679 /// The descriptor for a strided memory access.
680 struct StrideDescriptor {
681 StrideDescriptor() = default;
682 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
683 Align Alignment)
684 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
685
686 // The access's stride. It is negative for a reverse access.
687 int64_t Stride = 0;
688
689 // The scalar expression of this access.
690 const SCEV *Scev = nullptr;
691
692 // The size of the memory object.
693 uint64_t Size = 0;
694
695 // The alignment of this access.
696 Align Alignment;
697 };
698
699 /// A type for holding instructions and their stride descriptors.
700 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
701
702 /// Create a new interleave group with the given instruction \p Instr,
703 /// stride \p Stride and alignment \p Align.
704 ///
705 /// \returns the newly created interleave group.
706 InterleaveGroup<Instruction> *
707 createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
708 assert(!InterleaveGroupMap.count(Instr) &&
709 "Already in an interleaved access group");
710 InterleaveGroupMap[Instr] =
711 new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
712 InterleaveGroups.insert(InterleaveGroupMap[Instr]);
713 return InterleaveGroupMap[Instr];
714 }
715
716 /// Release the group and remove all the relationships.
717 void releaseGroup(InterleaveGroup<Instruction> *Group) {
718 for (unsigned i = 0; i < Group->getFactor(); i++)
719 if (Instruction *Member = Group->getMember(i))
720 InterleaveGroupMap.erase(Member);
721
722 InterleaveGroups.erase(Group);
723 delete Group;
724 }
725
726 /// Collect all the accesses with a constant stride in program order.
727 void collectConstStrideAccesses(
728 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
729 const DenseMap<Value *, const SCEV *> &Strides);
730
731 /// Returns true if \p Stride is allowed in an interleaved group.
732 static bool isStrided(int Stride);
733
734 /// Returns true if \p BB is a predicated block.
735 bool isPredicated(BasicBlock *BB) const {
736 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
737 }
738
739 /// Returns true if LoopAccessInfo can be used for dependence queries.
740 bool areDependencesValid() const {
741 return LAI && LAI->getDepChecker().getDependences();
742 }
743
744 /// Returns true if memory accesses \p A and \p B can be reordered, if
745 /// necessary, when constructing interleaved groups.
746 ///
747 /// \p A must precede \p B in program order. We return false if reordering is
748 /// not necessary or is prevented because \p A and \p B may be dependent.
749 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
750 StrideEntry *B) const {
751 // Code motion for interleaved accesses can potentially hoist strided loads
752 // and sink strided stores. The code below checks the legality of the
753 // following two conditions:
754 //
755 // 1. Potentially moving a strided load (B) before any store (A) that
756 // precedes B, or
757 //
758 // 2. Potentially moving a strided store (A) after any load or store (B)
759 // that A precedes.
760 //
761 // It's legal to reorder A and B if we know there isn't a dependence from A
762 // to B. Note that this determination is conservative since some
763 // dependences could potentially be reordered safely.
764
765 // A is potentially the source of a dependence.
766 auto *Src = A->first;
767 auto SrcDes = A->second;
768
769 // B is potentially the sink of a dependence.
770 auto *Sink = B->first;
771 auto SinkDes = B->second;
772
773 // Code motion for interleaved accesses can't violate WAR dependences.
774 // Thus, reordering is legal if the source isn't a write.
775 if (!Src->mayWriteToMemory())
776 return true;
777
778 // At least one of the accesses must be strided.
779 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
780 return true;
781
782 // If dependence information is not available from LoopAccessInfo,
783 // conservatively assume the instructions can't be reordered.
784 if (!areDependencesValid())
785 return false;
786
787 // If we know there is a dependence from source to sink, assume the
788 // instructions can't be reordered. Otherwise, reordering is legal.
789 return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
790 }
791
792 /// Collect the dependences from LoopAccessInfo.
793 ///
794 /// We process the dependences once during the interleaved access analysis to
795 /// enable constant-time dependence queries.
796 void collectDependences() {
797 if (!areDependencesValid())
798 return;
799 const auto &DepChecker = LAI->getDepChecker();
800 auto *Deps = DepChecker.getDependences();
801 for (auto Dep : *Deps)
802 Dependences[Dep.getSource(DepChecker)].insert(
803 Dep.getDestination(DepChecker));
804 }
805};
806
807} // llvm namespace
808
809#endif
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
RelocType Type
Definition: COFFYAML.cpp:391
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1600
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:445
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:543
uint32_t getFactor() const
Definition: VectorUtils.h:461
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:515
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:447
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:522
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:532
bool isReverse() const
Definition: VectorUtils.h:460
InstTy * getInsertPos() const
Definition: VectorUtils.h:531
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:462
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:451
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:470
uint32_t getNumMembers() const
Definition: VectorUtils.h:463
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:587
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:632
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:643
bool hasGroups() const
Returns true if we have any interleave groups.
Definition: VectorUtils.h:651
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:624
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:607
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:637
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:589
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:191
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
The Vector Function Database.
Definition: VectorUtils.h:29
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:96
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:81
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:70
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
A range adaptor for a pair of iterators.
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:104
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:134
TargetTransformInfo TTI
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:46
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:50
Holds the VFShape for a specific scalar to vector function mapping.
Contains the information about the kind of vectorization available.
static VFShape getScalarShape(const FunctionType *FTy)
Retrieve the VFShape that can be used to map a scalar function to itself, with VF = 1.