LLVM 23.0.0git
VectorUtils.h
Go to the documentation of this file.
1//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ANALYSIS_VECTORUTILS_H
14#define LLVM_ANALYSIS_VECTORUTILS_H
15
16#include "llvm/ADT/MapVector.h"
19#include "llvm/IR/Module.h"
24
25namespace llvm {
27class IntrinsicInst;
28
29/// The Vector Function Database.
30///
31/// Helper class used to find the vector functions associated to a
32/// scalar CallInst.
34 /// The Module of the CallInst CI.
35 const Module *M;
36 /// The CallInst instance being queried for scalar to vector mappings.
37 const CallInst &CI;
38 /// List of vector functions descriptors associated to the call
39 /// instruction.
40 const SmallVector<VFInfo, 8> ScalarToVectorMappings;
41
42 /// Retrieve the scalar-to-vector mappings associated to the rule of
43 /// a vector Function ABI.
44 static void getVFABIMappings(const CallInst &CI,
45 SmallVectorImpl<VFInfo> &Mappings) {
46 if (!CI.getCalledFunction())
47 return;
48
49 const StringRef ScalarName = CI.getCalledFunction()->getName();
50
51 SmallVector<std::string, 8> ListOfStrings;
52 // The check for the vector-function-abi-variant attribute is done when
53 // retrieving the vector variant names here.
54 VFABI::getVectorVariantNames(CI, ListOfStrings);
55 if (ListOfStrings.empty())
56 return;
57 for (const auto &MangledName : ListOfStrings) {
58 const std::optional<VFInfo> Shape =
59 VFABI::tryDemangleForVFABI(MangledName, CI.getFunctionType());
60 // A match is found via scalar and vector names, and also by
61 // ensuring that the variant described in the attribute has a
62 // corresponding definition or declaration of the vector
63 // function in the Module M.
64 if (Shape && (Shape->ScalarName == ScalarName)) {
65 assert(CI.getModule()->getFunction(Shape->VectorName) &&
66 "Vector function is missing.");
67 Mappings.push_back(*Shape);
68 }
69 }
70 }
71
72public:
73 /// Retrieve all the VFInfo instances associated to the CallInst CI.
76
77 // Get mappings from the Vector Function ABI variants.
78 getVFABIMappings(CI, Ret);
79
80 // Other non-VFABI variants should be retrieved here.
81
82 return Ret;
83 }
84
85 static bool hasMaskedVariant(const CallInst &CI,
86 std::optional<ElementCount> VF = std::nullopt) {
87 // Check whether we have at least one masked vector version of a scalar
88 // function. If no VF is specified then we check for any masked variant,
89 // otherwise we look for one that matches the supplied VF.
90 auto Mappings = VFDatabase::getMappings(CI);
91 for (VFInfo Info : Mappings)
92 if (!VF || Info.Shape.VF == *VF)
93 if (Info.isMasked())
94 return true;
95
96 return false;
97 }
98
99 /// Constructor, requires a CallInst instance.
101 : M(CI.getModule()), CI(CI),
102 ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
103
104 /// \defgroup VFDatabase query interface.
105 ///
106 /// @{
107 /// Retrieve the Function with VFShape \p Shape.
109 if (Shape == VFShape::getScalarShape(CI.getFunctionType()))
110 return CI.getCalledFunction();
111
112 for (const auto &Info : ScalarToVectorMappings)
113 if (Info.Shape == Shape)
114 return M->getFunction(Info.VectorName);
115
116 return nullptr;
117 }
118 /// @}
119};
120
121template <typename T> class ArrayRef;
122class DemandedBits;
123template <typename InstTy> class InterleaveGroup;
124class IRBuilderBase;
125class Loop;
126class TargetTransformInfo;
127class Value;
128
129namespace Intrinsic {
130typedef unsigned ID;
131}
132
133/// Identify if the intrinsic is trivially vectorizable.
134/// This method returns true if the intrinsic's argument types are all scalars
135/// for the scalar form of the intrinsic and all vectors (or scalars handled by
136/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
137///
138/// Note: isTriviallyVectorizable implies isTriviallyScalarizable.
140
141/// Identify if the intrinsic is trivially scalarizable.
142/// This method returns true following the same predicates of
143/// isTriviallyVectorizable.
144
145/// Note: There are intrinsics where implementing vectorization for the
146/// intrinsic is redundant, but we want to implement scalarization of the
147/// vector. To prevent the requirement that an intrinsic also implements
148/// vectorization we provide this separate function.
150
151/// Identifies if the vector form of the intrinsic has a scalar operand.
152/// \p TTI is used to consider target specific intrinsics, if no target specific
153/// intrinsics will be considered then it is appropriate to pass in nullptr.
154LLVM_ABI bool
156 const TargetTransformInfo *TTI);
157
158/// Identifies if the vector form of the intrinsic is overloaded on the type of
159/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
160/// \p TTI is used to consider target specific intrinsics, if no target specific
161/// intrinsics will be considered then it is appropriate to pass in nullptr.
162LLVM_ABI bool
164 const TargetTransformInfo *TTI);
165
166/// Identifies if the vector form of the intrinsic that returns a struct is
167/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to
168/// consider target specific intrinsics, if no target specific intrinsics
169/// will be considered then it is appropriate to pass in nullptr.
171 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI);
172
173/// Returns intrinsic ID for call.
174/// For the input call instruction it finds mapping intrinsic and returns
175/// its intrinsic ID, in case it does not found it return not_intrinsic.
178
179/// Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
181
182/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
184
185/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each
186/// factor.
188
189/// Given a vector and an element number, see if the scalar value is
190/// already around as a register, for example if it were inserted then extracted
191/// from the vector.
192LLVM_ABI Value *findScalarElement(Value *V, unsigned EltNo);
193
194/// If all non-negative \p Mask elements are the same value, return that value.
195/// If all elements are negative (undefined) or \p Mask contains different
196/// non-negative values, return -1.
198
199/// Get splat value if the input is a splat vector or return nullptr.
200/// The value may be extracted from a splat constants vector or from
201/// a sequence of instructions that broadcast a single value into a vector.
203
204/// Return true if each element of the vector value \p V is poisoned or equal to
205/// every other non-poisoned element. If an index element is specified, either
206/// every element of the vector is poisoned or the element at that index is not
207/// poisoned and equal to every other non-poisoned element.
208/// This may be more powerful than the related getSplatValue() because it is
209/// not limited by finding a scalar source value to a splatted vector.
210LLVM_ABI bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
211
212/// Transform a shuffle mask's output demanded element mask into demanded
213/// element masks for the 2 operands, returns false if the mask isn't valid.
214/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
215/// \p AllowUndefElts permits "-1" indices to be treated as undef.
216LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
217 const APInt &DemandedElts,
218 APInt &DemandedLHS, APInt &DemandedRHS,
219 bool AllowUndefElts = false);
220
221/// Does this shuffle mask represent either one slide shuffle or a pair of
222/// two slide shuffles, combined with a select on some constant vector mask?
223/// A slide is a shuffle mask which shifts some set of elements up or down
224/// the vector, with all other elements being undefined. An identity shuffle
225/// will be matched a slide by 0. The output parameter provides the source
226/// (-1 means no source), and slide direction for each slide.
227LLVM_ABI bool isMaskedSlidePair(ArrayRef<int> Mask, int NumElts,
228 std::array<std::pair<int, int>, 2> &SrcInfo);
229
230/// Replace each shuffle mask index with the scaled sequential indices for an
231/// equivalent mask of narrowed elements. Mask elements that are less than 0
232/// (sentinel values) are repeated in the output mask.
233///
234/// Example with Scale = 4:
235/// <4 x i32> <3, 2, 0, -1> -->
236/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
237///
238/// This is the reverse process of widening shuffle mask elements, but it always
239/// succeeds because the indexes can always be multiplied (scaled up) to map to
240/// narrower vector elements.
241LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
242 SmallVectorImpl<int> &ScaledMask);
243
244/// Try to transform a shuffle mask by replacing elements with the scaled index
245/// for an equivalent mask of widened elements. If all mask elements that would
246/// map to a wider element of the new mask are the same negative number
247/// (sentinel value), that element of the new mask is the same value. If any
248/// element in a given slice is negative and some other element in that slice is
249/// not the same value, return false (partial matches with sentinel values are
250/// not allowed).
251///
252/// Example with Scale = 4:
253/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
254/// <4 x i32> <3, 2, 0, -1>
255///
256/// This is the reverse process of narrowing shuffle mask elements if it
257/// succeeds. This transform is not always possible because indexes may not
258/// divide evenly (scale down) to map to wider vector elements.
259LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
260 SmallVectorImpl<int> &ScaledMask);
261
262/// A variant of the previous method which is specialized for Scale=2, and
263/// treats -1 as undef and allows widening when a wider element is partially
264/// undef in the narrow form of the mask. This transformation discards
265/// information about which bytes in the original shuffle were undef.
267 SmallVectorImpl<int> &NewMask);
268
269/// Attempt to narrow/widen the \p Mask shuffle mask to the \p NumDstElts target
270/// width. Internally this will call narrowShuffleMaskElts/widenShuffleMaskElts.
271/// This will assert unless NumDstElts is a multiple of Mask.size (or
272/// vice-versa). Returns false on failure, and ScaledMask will be in an
273/// undefined state.
274LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
275 SmallVectorImpl<int> &ScaledMask);
276
277/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
278/// to get the shuffle mask with widest possible elements.
280 SmallVectorImpl<int> &ScaledMask);
281
282/// Splits and processes shuffle mask depending on the number of input and
283/// output registers. The function does 2 main things: 1) splits the
284/// source/destination vectors into real registers; 2) do the mask analysis to
285/// identify which real registers are permuted. Then the function processes
286/// resulting registers mask using provided action items. If no input register
287/// is defined, \p NoInputAction action is used. If only 1 input register is
288/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
289/// process > 2 input registers and masks.
290/// \param Mask Original shuffle mask.
291/// \param NumOfSrcRegs Number of source registers.
292/// \param NumOfDestRegs Number of destination registers.
293/// \param NumOfUsedRegs Number of actually used destination registers.
295 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
296 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
297 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
298 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
299 ManyInputsAction);
300
301/// Compute the demanded elements mask of horizontal binary operations. A
302/// horizontal operation combines two adjacent elements in a vector operand.
303/// This function returns a mask for the elements that correspond to the first
304/// operand of this horizontal combination. For example, for two vectors
305/// [X1, X2, X3, X4] and [Y1, Y2, Y3, Y4], the resulting mask can include the
306/// elements X1, X3, Y1, and Y3. To get the other operands, simply shift the
307/// result of this function to the left by 1.
308///
309/// \param VectorBitWidth the total bit width of the vector
310/// \param DemandedElts the demanded elements mask for the operation
311/// \param DemandedLHS the demanded elements mask for the left operand
312/// \param DemandedRHS the demanded elements mask for the right operand
313LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
314 const APInt &DemandedElts,
315 APInt &DemandedLHS,
316 APInt &DemandedRHS);
317
318/// Compute a map of integer instructions to their minimum legal type
319/// size.
320///
321/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
322/// type (e.g. i32) whenever arithmetic is performed on them.
323///
324/// For targets with native i8 or i16 operations, usually InstCombine can shrink
325/// the arithmetic type down again. However InstCombine refuses to create
326/// illegal types, so for targets without i8 or i16 registers, the lengthening
327/// and shrinking remains.
328///
329/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
330/// their scalar equivalents do not, so during vectorization it is important to
331/// remove these lengthens and truncates when deciding the profitability of
332/// vectorization.
333///
334/// This function analyzes the given range of instructions and determines the
335/// minimum type size each can be converted to. It attempts to remove or
336/// minimize type size changes across each def-use chain, so for example in the
337/// following code:
338///
339/// %1 = load i8, i8*
340/// %2 = add i8 %1, 2
341/// %3 = load i16, i16*
342/// %4 = zext i8 %2 to i32
343/// %5 = zext i16 %3 to i32
344/// %6 = add i32 %4, %5
345/// %7 = trunc i32 %6 to i16
346///
347/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
348/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
349///
350/// If the optional TargetTransformInfo is provided, this function tries harder
351/// to do less work by only looking at illegal types.
354 const TargetTransformInfo *TTI = nullptr);
355
356/// Compute the union of two access-group lists.
357///
358/// If the list contains just one access group, it is returned directly. If the
359/// list is empty, returns nullptr.
360LLVM_ABI MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
361
362/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
363/// are both in. If either instruction does not access memory at all, it is
364/// considered to be in every list.
365///
366/// If the list contains just one access group, it is returned directly. If the
367/// list is empty, returns nullptr.
369 const Instruction *Inst2);
370
371/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
372/// vectorization. It can be preserved after vectorization if the kind is one of
373/// [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,
374/// MD_access_group, MD_mmra].
376 Instruction *Inst,
377 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata);
378
379/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
380/// MD_nontemporal, MD_access_group, MD_mmra].
381/// For K in Kinds, we get the MDNode for K from each of the
382/// elements of VL, compute their "intersection" (i.e., the most generic
383/// metadata value that covers all of the individual values), and set I's
384/// metadata for M equal to the intersection value.
385///
386/// This function always sets a (possibly null) value for each K in Kinds.
388
389/// Create a mask that filters the members of an interleave group where there
390/// are gaps.
391///
392/// For example, the mask for \p Group with interleave-factor 3
393/// and \p VF 4, that has only its first member present is:
394///
395/// <1,0,0,1,0,0,1,0,0,1,0,0>
396///
397/// Note: The result is a mask of 0's and 1's, as opposed to the other
398/// create[*]Mask() utilities which create a shuffle mask (mask that
399/// consists of indices).
401createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
402 const InterleaveGroup<Instruction> &Group);
403
404/// Create a mask with replicated elements.
405///
406/// This function creates a shuffle mask for replicating each of the \p VF
407/// elements in a vector \p ReplicationFactor times. It can be used to
408/// transform a mask of \p VF elements into a mask of
409/// \p VF * \p ReplicationFactor elements used by a predicated
410/// interleaved-group of loads/stores whose Interleaved-factor ==
411/// \p ReplicationFactor.
412///
413/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
414///
415/// <0,0,0,1,1,1,2,2,2,3,3,3>
417createReplicatedMask(unsigned ReplicationFactor, unsigned VF);
418
419/// Create an interleave shuffle mask.
420///
421/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
422/// vectorization factor \p VF into a single wide vector. The mask is of the
423/// form:
424///
425/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
426///
427/// For example, the mask for VF = 4 and NumVecs = 2 is:
428///
429/// <0, 4, 1, 5, 2, 6, 3, 7>.
431 unsigned NumVecs);
432
433/// Create a stride shuffle mask.
434///
435/// This function creates a shuffle mask whose elements begin at \p Start and
436/// are incremented by \p Stride. The mask can be used to deinterleave an
437/// interleaved vector into separate vectors of vectorization factor \p VF. The
438/// mask is of the form:
439///
440/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
441///
442/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
443///
444/// <0, 2, 4, 6>
446createStrideMask(unsigned Start, unsigned Stride, unsigned VF);
447
448/// Create a sequential shuffle mask.
449///
450/// This function creates shuffle mask whose elements are sequential and begin
451/// at \p Start. The mask contains \p NumInts integers and is padded with \p
452/// NumUndefs undef values. The mask is of the form:
453///
454/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
455///
456/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
457///
458/// <0, 1, 2, 3, undef, undef, undef, undef>
460createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
461
462/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
463/// mask assuming both operands are identical. This assumes that the unary
464/// shuffle will use elements from operand 0 (operand 1 will be unused).
466 unsigned NumElts);
467
468/// Concatenate a list of vectors.
469///
470/// This function generates code that concatenate the vectors in \p Vecs into a
471/// single large vector. The number of vectors should be greater than one, and
472/// their element types should be the same. The number of elements in the
473/// vectors should also be the same; however, if the last vector has fewer
474/// elements, it will be padded with undefs.
476 ArrayRef<Value *> Vecs);
477
478/// Given a mask vector of i1, Return true if all of the elements of this
479/// predicate mask are known to be false or undef. That is, return true if all
480/// lanes can be assumed inactive.
482
483/// Given a mask vector of i1, Return true if all of the elements of this
484/// predicate mask are known to be true or undef. That is, return true if all
485/// lanes can be assumed active.
487
488/// Given a mask vector of i1, Return true if any of the elements of this
489/// predicate mask are known to be true or undef. That is, return true if at
490/// least one lane can be assumed active.
492
493/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
494/// for each lane which may be active.
496
497/// The group of interleaved loads/stores sharing the same stride and
498/// close to each other.
499///
500/// Each member in this group has an index starting from 0, and the largest
501/// index should be less than interleaved factor, which is equal to the absolute
502/// value of the access's stride.
503///
504/// E.g. An interleaved load group of factor 4:
505/// for (unsigned i = 0; i < 1024; i+=4) {
506/// a = A[i]; // Member of index 0
507/// b = A[i+1]; // Member of index 1
508/// d = A[i+3]; // Member of index 3
509/// ...
510/// }
511///
512/// An interleaved store group of factor 4:
513/// for (unsigned i = 0; i < 1024; i+=4) {
514/// ...
515/// A[i] = a; // Member of index 0
516/// A[i+1] = b; // Member of index 1
517/// A[i+2] = c; // Member of index 2
518/// A[i+3] = d; // Member of index 3
519/// }
520///
521/// Note: the interleaved load group could have gaps (missing members), but
522/// the interleaved store group doesn't allow gaps.
523template <typename InstTy> class InterleaveGroup {
524public:
525 InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
526 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
527 InsertPos(nullptr) {}
528
529 InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
530 : Alignment(Alignment), InsertPos(Instr) {
531 Factor = std::abs(Stride);
532 assert(Factor > 1 && "Invalid interleave factor");
533
534 Reverse = Stride < 0;
535 Members[0] = Instr;
536 }
537
538 bool isReverse() const { return Reverse; }
539 uint32_t getFactor() const { return Factor; }
540 Align getAlign() const { return Alignment; }
541 uint32_t getNumMembers() const { return Members.size(); }
542
543 /// Try to insert a new member \p Instr with index \p Index and
544 /// alignment \p NewAlign. The index is related to the leader and it could be
545 /// negative if it is the new leader.
546 ///
547 /// \returns false if the instruction doesn't belong to the group.
548 bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
549 // Make sure the key fits in an int32_t.
550 std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
551 if (!MaybeKey)
552 return false;
553 int32_t Key = *MaybeKey;
554
555 // Skip if the key is used for either the tombstone or empty special values.
558 return false;
559
560 // Skip if there is already a member with the same index.
561 if (Members.contains(Key))
562 return false;
563
564 if (Key > LargestKey) {
565 // The largest index is always less than the interleave factor.
566 if (Index >= static_cast<int32_t>(Factor))
567 return false;
568
569 LargestKey = Key;
570 } else if (Key < SmallestKey) {
571
572 // Make sure the largest index fits in an int32_t.
573 std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
574 if (!MaybeLargestIndex)
575 return false;
576
577 // The largest index is always less than the interleave factor.
578 if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
579 return false;
580
581 SmallestKey = Key;
582 }
583
584 // It's always safe to select the minimum alignment.
585 Alignment = std::min(Alignment, NewAlign);
586 Members[Key] = Instr;
587 return true;
588 }
589
590 /// Get the member with the given index \p Index
591 ///
592 /// \returns nullptr if contains no such member.
593 InstTy *getMember(uint32_t Index) const {
594 int32_t Key = SmallestKey + Index;
595 return Members.lookup(Key);
596 }
597
598 /// Get the index for the given member. Unlike the key in the member
599 /// map, the index starts from 0.
600 uint32_t getIndex(const InstTy *Instr) const {
601 for (auto I : Members) {
602 if (I.second == Instr)
603 return I.first - SmallestKey;
604 }
605
606 llvm_unreachable("InterleaveGroup contains no such member");
607 }
608
609 InstTy *getInsertPos() const { return InsertPos; }
610 void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
611
612 /// Add metadata (e.g. alias info) from the instructions in this group to \p
613 /// NewInst.
614 ///
615 /// FIXME: this function currently does not add noalias metadata a'la
616 /// addNewMedata. To do that we need to compute the intersection of the
617 /// noalias info from all members.
618 void addMetadata(InstTy *NewInst) const;
619
620 /// Returns true if this Group requires a scalar iteration to handle gaps.
622 // If the last member of the Group exists, then a scalar epilog is not
623 // needed for this group.
624 if (getMember(getFactor() - 1))
625 return false;
626
627 // We have a group with gaps. It therefore can't be a reversed access,
628 // because such groups get invalidated (TODO).
629 assert(!isReverse() && "Group should have been invalidated");
630
631 // This is a group of loads, with gaps, and without a last-member
632 return true;
633 }
634
635 /// Return true if this group is full, i.e. it has no gaps.
636 bool isFull() const { return getNumMembers() == getFactor(); }
637
638private:
639 uint32_t Factor; // Interleave Factor.
640 bool Reverse;
641 Align Alignment;
643 int32_t SmallestKey = 0;
644 int32_t LargestKey = 0;
645
646 // To avoid breaking dependences, vectorized instructions of an interleave
647 // group should be inserted at either the first load or the last store in
648 // program order.
649 //
650 // E.g. %even = load i32 // Insert Position
651 // %add = add i32 %even // Use of %even
652 // %odd = load i32
653 //
654 // store i32 %even
655 // %odd = add i32 // Def of %odd
656 // store i32 %odd // Insert Position
657 InstTy *InsertPos;
658};
659
660/// Drive the analysis of interleaved memory accesses in the loop.
661///
662/// Use this class to analyze interleaved accesses only when we can vectorize
663/// a loop. Otherwise it's meaningless to do analysis as the vectorization
664/// on interleaved accesses is unsafe.
665///
666/// The analysis collects interleave groups and records the relationships
667/// between the member and the group in a map.
669public:
671 DominatorTree *DT, LoopInfo *LI,
672 const LoopAccessInfo *LAI)
673 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
674
676
677 /// Analyze the interleaved accesses and collect them in interleave
678 /// groups. Substitute symbolic strides using \p Strides.
679 /// Consider also predicated loads/stores in the analysis if
680 /// \p EnableMaskedInterleavedGroup is true.
681 LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
682
683 /// Invalidate groups, e.g., in case all blocks in loop will be predicated
684 /// contrary to original assumption. Although we currently prevent group
685 /// formation for predicated accesses, we may be able to relax this limitation
686 /// in the future once we handle more complicated blocks. Returns true if any
687 /// groups were invalidated.
689 if (InterleaveGroups.empty()) {
690 assert(
691 !RequiresScalarEpilogue &&
692 "RequiresScalarEpilog should not be set without interleave groups");
693 return false;
694 }
695
696 InterleaveGroupMap.clear();
697 for (auto *Ptr : InterleaveGroups)
698 delete Ptr;
699 InterleaveGroups.clear();
700 RequiresScalarEpilogue = false;
701 return true;
702 }
703
704 /// Check if \p Instr belongs to any interleave group.
705 bool isInterleaved(Instruction *Instr) const {
706 return InterleaveGroupMap.contains(Instr);
707 }
708
709 /// Get the interleave group that \p Instr belongs to.
710 ///
711 /// \returns nullptr if doesn't have such group.
713 getInterleaveGroup(const Instruction *Instr) const {
714 return InterleaveGroupMap.lookup(Instr);
715 }
716
719 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
720 }
721
722 /// Returns true if an interleaved group that may access memory
723 /// out-of-bounds requires a scalar epilogue iteration for correctness.
724 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
725
726 /// Invalidate groups that require a scalar epilogue (due to gaps). This can
727 /// happen when optimizing for size forbids a scalar epilogue, and the gap
728 /// cannot be filtered by masking the load/store.
730
731 /// Returns true if we have any interleave groups.
732 bool hasGroups() const { return !InterleaveGroups.empty(); }
733
734private:
735 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
736 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
737 /// The interleaved access analysis can also add new predicates (for example
738 /// by versioning strides of pointers).
740
741 Loop *TheLoop;
742 DominatorTree *DT;
743 LoopInfo *LI;
744 const LoopAccessInfo *LAI;
745
746 /// True if the loop may contain non-reversed interleaved groups with
747 /// out-of-bounds accesses. We ensure we don't speculatively access memory
748 /// out-of-bounds by executing at least one scalar epilogue iteration.
749 bool RequiresScalarEpilogue = false;
750
751 /// Holds the relationships between the members and the interleave group.
753
754 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
755
756 /// Holds dependences among the memory accesses in the loop. It maps a source
757 /// access to a set of dependent sink accesses.
759
760 /// The descriptor for a strided memory access.
761 struct StrideDescriptor {
762 StrideDescriptor() = default;
763 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
764 Align Alignment)
765 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
766
767 // The access's stride. It is negative for a reverse access.
768 int64_t Stride = 0;
769
770 // The scalar expression of this access.
771 const SCEV *Scev = nullptr;
772
773 // The size of the memory object.
774 uint64_t Size = 0;
775
776 // The alignment of this access.
777 Align Alignment;
778 };
779
780 /// A type for holding instructions and their stride descriptors.
781 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
782
783 /// Create a new interleave group with the given instruction \p Instr,
784 /// stride \p Stride and alignment \p Align.
785 ///
786 /// \returns the newly created interleave group.
787 InterleaveGroup<Instruction> *
788 createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
789 auto [It, Inserted] = InterleaveGroupMap.try_emplace(Instr);
790 assert(Inserted && "Already in an interleaved access group");
791 It->second = new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
792 InterleaveGroups.insert(It->second);
793 return It->second;
794 }
795
796 /// Release the group and remove all the relationships.
797 void releaseGroup(InterleaveGroup<Instruction> *Group) {
798 InterleaveGroups.erase(Group);
799 releaseGroupWithoutRemovingFromSet(Group);
800 }
801
802 /// Do everything necessary to release the group, apart from removing it from
803 /// the InterleaveGroups set.
804 void releaseGroupWithoutRemovingFromSet(InterleaveGroup<Instruction> *Group) {
805 for (unsigned i = 0; i < Group->getFactor(); i++)
806 if (Instruction *Member = Group->getMember(i))
807 InterleaveGroupMap.erase(Member);
808
809 delete Group;
810 }
811
812 /// Collect all the accesses with a constant stride in program order.
813 void collectConstStrideAccesses(
814 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
815 const DenseMap<Value *, const SCEV *> &Strides);
816
817 /// Returns true if \p Stride is allowed in an interleaved group.
818 LLVM_ABI static bool isStrided(int Stride);
819
820 /// Returns true if \p BB is a predicated block.
821 bool isPredicated(BasicBlock *BB) const {
822 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
823 }
824
825 /// Returns true if LoopAccessInfo can be used for dependence queries.
826 bool areDependencesValid() const {
827 return LAI && LAI->getDepChecker().getDependences();
828 }
829
830 /// Returns true if memory accesses \p A and \p B can be reordered, if
831 /// necessary, when constructing interleaved groups.
832 ///
833 /// \p A must precede \p B in program order. We return false if reordering is
834 /// not necessary or is prevented because \p A and \p B may be dependent.
835 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
836 StrideEntry *B) const {
837 // Code motion for interleaved accesses can potentially hoist strided loads
838 // and sink strided stores. The code below checks the legality of the
839 // following two conditions:
840 //
841 // 1. Potentially moving a strided load (B) before any store (A) that
842 // precedes B, or
843 //
844 // 2. Potentially moving a strided store (A) after any load or store (B)
845 // that A precedes.
846 //
847 // It's legal to reorder A and B if we know there isn't a dependence from A
848 // to B. Note that this determination is conservative since some
849 // dependences could potentially be reordered safely.
850
851 // A is potentially the source of a dependence.
852 auto *Src = A->first;
853 auto SrcDes = A->second;
854
855 // B is potentially the sink of a dependence.
856 auto *Sink = B->first;
857 auto SinkDes = B->second;
858
859 // Code motion for interleaved accesses can't violate WAR dependences.
860 // Thus, reordering is legal if the source isn't a write.
861 if (!Src->mayWriteToMemory())
862 return true;
863
864 // At least one of the accesses must be strided.
865 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
866 return true;
867
868 // If dependence information is not available from LoopAccessInfo,
869 // conservatively assume the instructions can't be reordered.
870 if (!areDependencesValid())
871 return false;
872
873 // If we know there is a dependence from source to sink, assume the
874 // instructions can't be reordered. Otherwise, reordering is legal.
875 return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
876 }
877
878 /// Collect the dependences from LoopAccessInfo.
879 ///
880 /// We process the dependences once during the interleaved access analysis to
881 /// enable constant-time dependence queries.
882 void collectDependences() {
883 if (!areDependencesValid())
884 return;
885 const auto &DepChecker = LAI->getDepChecker();
886 auto *Deps = DepChecker.getDependences();
887 for (auto Dep : *Deps)
888 Dependences[Dep.getSource(DepChecker)].insert(
889 Dep.getDestination(DepChecker));
890 }
891};
892
893} // llvm namespace
894
895#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
Module.h This file contains the declarations for the Module class.
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file defines the SmallVector class.
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
Definition Constant.h:43
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
const Function & getFunction() const
Definition Function.h:166
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
The group of interleaved loads/stores sharing the same stride and close to each other.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
bool isReverse() const
InstTy * getInsertPos() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
uint32_t getNumMembers() const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool hasGroups() const
Returns true if we have any interleave groups.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
A wrapper class for inspecting calls to intrinsic functions.
Drive the analysis of memory accesses in the loop.
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:74
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
An efficient, type-erasing, non-owning reference to a callable.
A range adaptor for a pair of iterators.
Function * getVectorizedFunction(const VFShape &Shape) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
LLVM_ABI void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially scalarizable.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
An information struct used to provide DenseMap with the various necessary components for a given valu...
Holds the VFShape for a specific scalar to vector function mapping.
Contains the information about the kind of vectorization available.
static VFShape getScalarShape(const FunctionType *FTy)
Retrieve the VFShape that can be used to map a scalar function to itself, with VF = 1.