LLVM 17.0.0git
VectorUtils.h
Go to the documentation of this file.
1//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ANALYSIS_VECTORUTILS_H
14#define LLVM_ANALYSIS_VECTORUTILS_H
15
16#include "llvm/ADT/MapVector.h"
20
21namespace llvm {
22class TargetLibraryInfo;
23
24/// Describes the type of Parameters
25enum class VFParamKind {
26 Vector, // No semantic information.
27 OMP_Linear, // declare simd linear(i)
28 OMP_LinearRef, // declare simd linear(ref(i))
29 OMP_LinearVal, // declare simd linear(val(i))
30 OMP_LinearUVal, // declare simd linear(uval(i))
31 OMP_LinearPos, // declare simd linear(i:c) uniform(c)
32 OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
33 OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
34 OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c)
35 OMP_Uniform, // declare simd uniform(i)
36 GlobalPredicate, // Global logical predicate that acts on all lanes
37 // of the input and output mask concurrently. For
38 // example, it is implied by the `M` token in the
39 // Vector Function ABI mangled name.
41};
42
43/// Describes the type of Instruction Set Architecture
44enum class VFISAKind {
45 AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
46 SVE, // AArch64 Scalable Vector Extension
47 SSE, // x86 SSE
48 AVX, // x86 AVX
49 AVX2, // x86 AVX2
50 AVX512, // x86 AVX512
51 LLVM, // LLVM internal ISA for functions that are not
52 // attached to an existing ABI via name mangling.
53 Unknown // Unknown ISA
54};
55
56/// Encapsulates information needed to describe a parameter.
57///
58/// The description of the parameter is not linked directly to
59/// OpenMP or any other vector function description. This structure
60/// is extendible to handle other paradigms that describe vector
61/// functions and their parameters.
63 unsigned ParamPos; // Parameter Position in Scalar Function.
64 VFParamKind ParamKind; // Kind of Parameter.
65 int LinearStepOrPos = 0; // Step or Position of the Parameter.
66 Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
67
68 // Comparison operator.
69 bool operator==(const VFParameter &Other) const {
70 return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
71 std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
72 Other.Alignment);
73 }
74};
75
76/// Contains the information about the kind of vectorization
77/// available.
78///
79/// This object in independent on the paradigm used to
80/// represent vector functions. in particular, it is not attached to
81/// any target-specific ABI.
82struct VFShape {
83 ElementCount VF; // Vectorization factor.
84 SmallVector<VFParameter, 8> Parameters; // List of parameter information.
85 // Comparison operator.
86 bool operator==(const VFShape &Other) const {
87 return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters);
88 }
89
90 /// Update the parameter in position P.ParamPos to P.
92 assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
93 Parameters[P.ParamPos] = P;
94 assert(hasValidParameterList() && "Invalid parameter list");
95 }
96
97 // Retrieve the VFShape that can be used to map a (scalar) function to itself,
98 // with VF = 1.
99 static VFShape getScalarShape(const CallInst &CI) {
101 /*HasGlobalPredicate*/ false);
102 }
103
104 // Retrieve the basic vectorization shape of the function, where all
105 // parameters are mapped to VFParamKind::Vector with \p EC
106 // lanes. Specifies whether the function has a Global Predicate
107 // argument via \p HasGlobalPred.
108 static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
110 for (unsigned I = 0; I < CI.arg_size(); ++I)
112 if (HasGlobalPred)
113 Parameters.push_back(
115
116 return {EC, Parameters};
117 }
118 /// Validation check on the Parameters in the VFShape.
119 bool hasValidParameterList() const;
120};
121
122/// Holds the VFShape for a specific scalar to vector function mapping.
123struct VFInfo {
124 VFShape Shape; /// Classification of the vector function.
125 std::string ScalarName; /// Scalar Function Name.
126 std::string VectorName; /// Vector Function Name associated to this VFInfo.
127 VFISAKind ISA; /// Instruction Set Architecture.
128
129 /// Returns the index of the first parameter with the kind 'GlobalPredicate',
130 /// if any exist.
131 std::optional<unsigned> getParamIndexForOptionalMask() const {
132 unsigned ParamCount = Shape.Parameters.size();
133 for (unsigned i = 0; i < ParamCount; ++i)
135 return i;
136
137 return std::nullopt;
138 }
139
140 /// Returns true if at least one of the operands to the vectorized function
141 /// has the kind 'GlobalPredicate'.
142 bool isMasked() const { return getParamIndexForOptionalMask().has_value(); }
143};
144
145namespace VFABI {
146/// LLVM Internal VFABI ISA token for vector functions.
147static constexpr char const *_LLVM_ = "_LLVM_";
148/// Prefix for internal name redirection for vector function that
149/// tells the compiler to scalarize the call using the scalar name
150/// of the function. For example, a mangled name like
151/// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
152/// vectorizer to vectorize the scalar call `foo`, and to scalarize
153/// it once vectorization is done.
154static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
155
156/// Function to construct a VFInfo out of a mangled names in the
157/// following format:
158///
159/// <VFABI_name>{(<redirection>)}
160///
161/// where <VFABI_name> is the name of the vector function, mangled according
162/// to the rules described in the Vector Function ABI of the target vector
163/// extension (or <isa> from now on). The <VFABI_name> is in the following
164/// format:
165///
166/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
167///
168/// This methods support demangling rules for the following <isa>:
169///
170/// * AArch64: https://developer.arm.com/docs/101129/latest
171///
172/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
173/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
174///
175/// \param MangledName -> input string in the format
176/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
177/// \param M -> Module used to retrieve informations about the vector
178/// function that are not possible to retrieve from the mangled
179/// name. At the moment, this parameter is needed only to retrieve the
180/// Vectorization Factor of scalable vector functions from their
181/// respective IR declarations.
182std::optional<VFInfo> tryDemangleForVFABI(StringRef MangledName,
183 const Module &M);
184
185/// This routine mangles the given VectorName according to the LangRef
186/// specification for vector-function-abi-variant attribute and is specific to
187/// the TLI mappings. It is the responsibility of the caller to make sure that
188/// this is only used if all parameters in the vector function are vector type.
189/// This returned string holds scalar-to-vector mapping:
190/// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
191///
192/// where:
193///
194/// <isa> = "_LLVM_"
195/// <mask> = "N". Note: TLI does not support masked interfaces.
196/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
197/// field of the `VecDesc` struct. If the number of lanes is scalable
198/// then 'x' is printed instead.
199/// <vparams> = "v", as many as are the numArgs.
200/// <scalarname> = the name of the scalar function.
201/// <vectorname> = the name of the vector function.
202std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
203 unsigned numArgs, ElementCount VF);
204
205/// Retrieve the `VFParamKind` from a string token.
207
208// Name of the attribute where the variant mappings are stored.
209static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
210
211/// Populates a set of strings representing the Vector Function ABI variants
212/// associated to the CallInst CI. If the CI does not contain the
213/// vector-function-abi-variant attribute, we return without populating
214/// VariantMappings, i.e. callers of getVectorVariantNames need not check for
215/// the presence of the attribute (see InjectTLIMappings).
216void getVectorVariantNames(const CallInst &CI,
217 SmallVectorImpl<std::string> &VariantMappings);
218} // end namespace VFABI
219
220/// The Vector Function Database.
221///
222/// Helper class used to find the vector functions associated to a
223/// scalar CallInst.
225 /// The Module of the CallInst CI.
226 const Module *M;
227 /// The CallInst instance being queried for scalar to vector mappings.
228 const CallInst &CI;
229 /// List of vector functions descriptors associated to the call
230 /// instruction.
231 const SmallVector<VFInfo, 8> ScalarToVectorMappings;
232
233 /// Retrieve the scalar-to-vector mappings associated to the rule of
234 /// a vector Function ABI.
235 static void getVFABIMappings(const CallInst &CI,
237 if (!CI.getCalledFunction())
238 return;
239
240 const StringRef ScalarName = CI.getCalledFunction()->getName();
241
242 SmallVector<std::string, 8> ListOfStrings;
243 // The check for the vector-function-abi-variant attribute is done when
244 // retrieving the vector variant names here.
245 VFABI::getVectorVariantNames(CI, ListOfStrings);
246 if (ListOfStrings.empty())
247 return;
248 for (const auto &MangledName : ListOfStrings) {
249 const std::optional<VFInfo> Shape =
250 VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
251 // A match is found via scalar and vector names, and also by
252 // ensuring that the variant described in the attribute has a
253 // corresponding definition or declaration of the vector
254 // function in the Module M.
255 if (Shape && (Shape->ScalarName == ScalarName)) {
256 assert(CI.getModule()->getFunction(Shape->VectorName) &&
257 "Vector function is missing.");
258 Mappings.push_back(*Shape);
259 }
260 }
261 }
262
263public:
264 /// Retrieve all the VFInfo instances associated to the CallInst CI.
267
268 // Get mappings from the Vector Function ABI variants.
269 getVFABIMappings(CI, Ret);
270
271 // Other non-VFABI variants should be retrieved here.
272
273 return Ret;
274 }
275
276 /// Constructor, requires a CallInst instance.
278 : M(CI.getModule()), CI(CI),
279 ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
280 /// \defgroup VFDatabase query interface.
281 ///
282 /// @{
283 /// Retrieve the Function with VFShape \p Shape.
285 if (Shape == VFShape::getScalarShape(CI))
286 return CI.getCalledFunction();
287
288 for (const auto &Info : ScalarToVectorMappings)
289 if (Info.Shape == Shape)
290 return M->getFunction(Info.VectorName);
291
292 return nullptr;
293 }
294 /// @}
295};
296
297template <typename T> class ArrayRef;
298class DemandedBits;
299class GetElementPtrInst;
300template <typename InstTy> class InterleaveGroup;
301class IRBuilderBase;
302class Loop;
303class ScalarEvolution;
304class TargetTransformInfo;
305class Type;
306class Value;
307
308namespace Intrinsic {
309typedef unsigned ID;
310}
311
312/// A helper function for converting Scalar types to vector types. If
313/// the incoming type is void, we return void. If the EC represents a
314/// scalar, we return the scalar type.
315inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
316 if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
317 return Scalar;
318 return VectorType::get(Scalar, EC);
319}
320
321inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
322 return ToVectorTy(Scalar, ElementCount::getFixed(VF));
323}
324
325/// Identify if the intrinsic is trivially vectorizable.
326/// This method returns true if the intrinsic's argument types are all scalars
327/// for the scalar form of the intrinsic and all vectors (or scalars handled by
328/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
330
331/// Identifies if the vector form of the intrinsic has a scalar operand.
333 unsigned ScalarOpdIdx);
334
335/// Identifies if the vector form of the intrinsic has a operand that has
336/// an overloaded type.
338
339/// Returns intrinsic ID for call.
340/// For the input call instruction it finds mapping intrinsic and returns
341/// its intrinsic ID, in case it does not found it return not_intrinsic.
343 const TargetLibraryInfo *TLI);
344
345/// Find the operand of the GEP that should be checked for consecutive
346/// stores. This ignores trailing indices that have no effect on the final
347/// pointer.
348unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
349
350/// If the argument is a GEP, then returns the operand identified by
351/// getGEPInductionOperand. However, if there is some other non-loop-invariant
352/// operand, it returns that instead.
353Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
354
355/// If a value has only one user that is a CastInst, return it.
356Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
357
358/// Get the stride of a pointer access in a loop. Looks for symbolic
359/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
360Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
361
362/// Given a vector and an element number, see if the scalar value is
363/// already around as a register, for example if it were inserted then extracted
364/// from the vector.
365Value *findScalarElement(Value *V, unsigned EltNo);
366
367/// If all non-negative \p Mask elements are the same value, return that value.
368/// If all elements are negative (undefined) or \p Mask contains different
369/// non-negative values, return -1.
370int getSplatIndex(ArrayRef<int> Mask);
371
372/// Get splat value if the input is a splat vector or return nullptr.
373/// The value may be extracted from a splat constants vector or from
374/// a sequence of instructions that broadcast a single value into a vector.
375Value *getSplatValue(const Value *V);
376
377/// Return true if each element of the vector value \p V is poisoned or equal to
378/// every other non-poisoned element. If an index element is specified, either
379/// every element of the vector is poisoned or the element at that index is not
380/// poisoned and equal to every other non-poisoned element.
381/// This may be more powerful than the related getSplatValue() because it is
382/// not limited by finding a scalar source value to a splatted vector.
383bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
384
385/// Transform a shuffle mask's output demanded element mask into demanded
386/// element masks for the 2 operands, returns false if the mask isn't valid.
387/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
388/// \p AllowUndefElts permits "-1" indices to be treated as undef.
389bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
390 const APInt &DemandedElts, APInt &DemandedLHS,
391 APInt &DemandedRHS, bool AllowUndefElts = false);
392
393/// Replace each shuffle mask index with the scaled sequential indices for an
394/// equivalent mask of narrowed elements. Mask elements that are less than 0
395/// (sentinel values) are repeated in the output mask.
396///
397/// Example with Scale = 4:
398/// <4 x i32> <3, 2, 0, -1> -->
399/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
400///
401/// This is the reverse process of widening shuffle mask elements, but it always
402/// succeeds because the indexes can always be multiplied (scaled up) to map to
403/// narrower vector elements.
404void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
405 SmallVectorImpl<int> &ScaledMask);
406
407/// Try to transform a shuffle mask by replacing elements with the scaled index
408/// for an equivalent mask of widened elements. If all mask elements that would
409/// map to a wider element of the new mask are the same negative number
410/// (sentinel value), that element of the new mask is the same value. If any
411/// element in a given slice is negative and some other element in that slice is
412/// not the same value, return false (partial matches with sentinel values are
413/// not allowed).
414///
415/// Example with Scale = 4:
416/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
417/// <4 x i32> <3, 2, 0, -1>
418///
419/// This is the reverse process of narrowing shuffle mask elements if it
420/// succeeds. This transform is not always possible because indexes may not
421/// divide evenly (scale down) to map to wider vector elements.
422bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
423 SmallVectorImpl<int> &ScaledMask);
424
425/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
426/// to get the shuffle mask with widest possible elements.
427void getShuffleMaskWithWidestElts(ArrayRef<int> Mask,
428 SmallVectorImpl<int> &ScaledMask);
429
430/// Splits and processes shuffle mask depending on the number of input and
431/// output registers. The function does 2 main things: 1) splits the
432/// source/destination vectors into real registers; 2) do the mask analysis to
433/// identify which real registers are permuted. Then the function processes
434/// resulting registers mask using provided action items. If no input register
435/// is defined, \p NoInputAction action is used. If only 1 input register is
436/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
437/// process > 2 input registers and masks.
438/// \param Mask Original shuffle mask.
439/// \param NumOfSrcRegs Number of source registers.
440/// \param NumOfDestRegs Number of destination registers.
441/// \param NumOfUsedRegs Number of actually used destination registers.
443 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
444 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
445 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
446 function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
447
448/// Compute a map of integer instructions to their minimum legal type
449/// size.
450///
451/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
452/// type (e.g. i32) whenever arithmetic is performed on them.
453///
454/// For targets with native i8 or i16 operations, usually InstCombine can shrink
455/// the arithmetic type down again. However InstCombine refuses to create
456/// illegal types, so for targets without i8 or i16 registers, the lengthening
457/// and shrinking remains.
458///
459/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
460/// their scalar equivalents do not, so during vectorization it is important to
461/// remove these lengthens and truncates when deciding the profitability of
462/// vectorization.
463///
464/// This function analyzes the given range of instructions and determines the
465/// minimum type size each can be converted to. It attempts to remove or
466/// minimize type size changes across each def-use chain, so for example in the
467/// following code:
468///
469/// %1 = load i8, i8*
470/// %2 = add i8 %1, 2
471/// %3 = load i16, i16*
472/// %4 = zext i8 %2 to i32
473/// %5 = zext i16 %3 to i32
474/// %6 = add i32 %4, %5
475/// %7 = trunc i32 %6 to i16
476///
477/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
478/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
479///
480/// If the optional TargetTransformInfo is provided, this function tries harder
481/// to do less work by only looking at illegal types.
482MapVector<Instruction*, uint64_t>
483computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
484 DemandedBits &DB,
485 const TargetTransformInfo *TTI=nullptr);
486
487/// Compute the union of two access-group lists.
488///
489/// If the list contains just one access group, it is returned directly. If the
490/// list is empty, returns nullptr.
491MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
492
493/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
494/// are both in. If either instruction does not access memory at all, it is
495/// considered to be in every list.
496///
497/// If the list contains just one access group, it is returned directly. If the
498/// list is empty, returns nullptr.
499MDNode *intersectAccessGroups(const Instruction *Inst1,
500 const Instruction *Inst2);
501
502/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
503/// MD_nontemporal, MD_access_group].
504/// For K in Kinds, we get the MDNode for K from each of the
505/// elements of VL, compute their "intersection" (i.e., the most generic
506/// metadata value that covers all of the individual values), and set I's
507/// metadata for M equal to the intersection value.
508///
509/// This function always sets a (possibly null) value for each K in Kinds.
510Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
511
512/// Create a mask that filters the members of an interleave group where there
513/// are gaps.
514///
515/// For example, the mask for \p Group with interleave-factor 3
516/// and \p VF 4, that has only its first member present is:
517///
518/// <1,0,0,1,0,0,1,0,0,1,0,0>
519///
520/// Note: The result is a mask of 0's and 1's, as opposed to the other
521/// create[*]Mask() utilities which create a shuffle mask (mask that
522/// consists of indices).
523Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
524 const InterleaveGroup<Instruction> &Group);
525
526/// Create a mask with replicated elements.
527///
528/// This function creates a shuffle mask for replicating each of the \p VF
529/// elements in a vector \p ReplicationFactor times. It can be used to
530/// transform a mask of \p VF elements into a mask of
531/// \p VF * \p ReplicationFactor elements used by a predicated
532/// interleaved-group of loads/stores whose Interleaved-factor ==
533/// \p ReplicationFactor.
534///
535/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
536///
537/// <0,0,0,1,1,1,2,2,2,3,3,3>
538llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
539 unsigned VF);
540
541/// Create an interleave shuffle mask.
542///
543/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
544/// vectorization factor \p VF into a single wide vector. The mask is of the
545/// form:
546///
547/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
548///
549/// For example, the mask for VF = 4 and NumVecs = 2 is:
550///
551/// <0, 4, 1, 5, 2, 6, 3, 7>.
552llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
553
554/// Create a stride shuffle mask.
555///
556/// This function creates a shuffle mask whose elements begin at \p Start and
557/// are incremented by \p Stride. The mask can be used to deinterleave an
558/// interleaved vector into separate vectors of vectorization factor \p VF. The
559/// mask is of the form:
560///
561/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
562///
563/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
564///
565/// <0, 2, 4, 6>
566llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
567 unsigned VF);
568
569/// Create a sequential shuffle mask.
570///
571/// This function creates shuffle mask whose elements are sequential and begin
572/// at \p Start. The mask contains \p NumInts integers and is padded with \p
573/// NumUndefs undef values. The mask is of the form:
574///
575/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
576///
577/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
578///
579/// <0, 1, 2, 3, undef, undef, undef, undef>
581createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
582
583/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
584/// mask assuming both operands are identical. This assumes that the unary
585/// shuffle will use elements from operand 0 (operand 1 will be unused).
587 unsigned NumElts);
588
589/// Concatenate a list of vectors.
590///
591/// This function generates code that concatenate the vectors in \p Vecs into a
592/// single large vector. The number of vectors should be greater than one, and
593/// their element types should be the same. The number of elements in the
594/// vectors should also be the same; however, if the last vector has fewer
595/// elements, it will be padded with undefs.
596Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
597
598/// Given a mask vector of i1, Return true if all of the elements of this
599/// predicate mask are known to be false or undef. That is, return true if all
600/// lanes can be assumed inactive.
601bool maskIsAllZeroOrUndef(Value *Mask);
602
603/// Given a mask vector of i1, Return true if all of the elements of this
604/// predicate mask are known to be true or undef. That is, return true if all
605/// lanes can be assumed active.
606bool maskIsAllOneOrUndef(Value *Mask);
607
608/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
609/// for each lane which may be active.
610APInt possiblyDemandedEltsInMask(Value *Mask);
611
612/// The group of interleaved loads/stores sharing the same stride and
613/// close to each other.
614///
615/// Each member in this group has an index starting from 0, and the largest
616/// index should be less than interleaved factor, which is equal to the absolute
617/// value of the access's stride.
618///
619/// E.g. An interleaved load group of factor 4:
620/// for (unsigned i = 0; i < 1024; i+=4) {
621/// a = A[i]; // Member of index 0
622/// b = A[i+1]; // Member of index 1
623/// d = A[i+3]; // Member of index 3
624/// ...
625/// }
626///
627/// An interleaved store group of factor 4:
628/// for (unsigned i = 0; i < 1024; i+=4) {
629/// ...
630/// A[i] = a; // Member of index 0
631/// A[i+1] = b; // Member of index 1
632/// A[i+2] = c; // Member of index 2
633/// A[i+3] = d; // Member of index 3
634/// }
635///
636/// Note: the interleaved load group could have gaps (missing members), but
637/// the interleaved store group doesn't allow gaps.
638template <typename InstTy> class InterleaveGroup {
639public:
640 InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
641 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
642 InsertPos(nullptr) {}
643
644 InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
645 : Alignment(Alignment), InsertPos(Instr) {
646 Factor = std::abs(Stride);
647 assert(Factor > 1 && "Invalid interleave factor");
648
649 Reverse = Stride < 0;
650 Members[0] = Instr;
651 }
652
653 bool isReverse() const { return Reverse; }
654 uint32_t getFactor() const { return Factor; }
655 Align getAlign() const { return Alignment; }
656 uint32_t getNumMembers() const { return Members.size(); }
657
658 /// Try to insert a new member \p Instr with index \p Index and
659 /// alignment \p NewAlign. The index is related to the leader and it could be
660 /// negative if it is the new leader.
661 ///
662 /// \returns false if the instruction doesn't belong to the group.
663 bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
664 // Make sure the key fits in an int32_t.
665 std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
666 if (!MaybeKey)
667 return false;
668 int32_t Key = *MaybeKey;
669
670 // Skip if the key is used for either the tombstone or empty special values.
673 return false;
674
675 // Skip if there is already a member with the same index.
676 if (Members.find(Key) != Members.end())
677 return false;
678
679 if (Key > LargestKey) {
680 // The largest index is always less than the interleave factor.
681 if (Index >= static_cast<int32_t>(Factor))
682 return false;
683
684 LargestKey = Key;
685 } else if (Key < SmallestKey) {
686
687 // Make sure the largest index fits in an int32_t.
688 std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
689 if (!MaybeLargestIndex)
690 return false;
691
692 // The largest index is always less than the interleave factor.
693 if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
694 return false;
695
696 SmallestKey = Key;
697 }
698
699 // It's always safe to select the minimum alignment.
700 Alignment = std::min(Alignment, NewAlign);
701 Members[Key] = Instr;
702 return true;
703 }
704
705 /// Get the member with the given index \p Index
706 ///
707 /// \returns nullptr if contains no such member.
708 InstTy *getMember(uint32_t Index) const {
709 int32_t Key = SmallestKey + Index;
710 return Members.lookup(Key);
711 }
712
713 /// Get the index for the given member. Unlike the key in the member
714 /// map, the index starts from 0.
715 uint32_t getIndex(const InstTy *Instr) const {
716 for (auto I : Members) {
717 if (I.second == Instr)
718 return I.first - SmallestKey;
719 }
720
721 llvm_unreachable("InterleaveGroup contains no such member");
722 }
723
724 InstTy *getInsertPos() const { return InsertPos; }
725 void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
726
727 /// Add metadata (e.g. alias info) from the instructions in this group to \p
728 /// NewInst.
729 ///
730 /// FIXME: this function currently does not add noalias metadata a'la
731 /// addNewMedata. To do that we need to compute the intersection of the
732 /// noalias info from all members.
733 void addMetadata(InstTy *NewInst) const;
734
735 /// Returns true if this Group requires a scalar iteration to handle gaps.
737 // If the last member of the Group exists, then a scalar epilog is not
738 // needed for this group.
739 if (getMember(getFactor() - 1))
740 return false;
741
742 // We have a group with gaps. It therefore can't be a reversed access,
743 // because such groups get invalidated (TODO).
744 assert(!isReverse() && "Group should have been invalidated");
745
746 // This is a group of loads, with gaps, and without a last-member
747 return true;
748 }
749
750private:
751 uint32_t Factor; // Interleave Factor.
752 bool Reverse;
753 Align Alignment;
755 int32_t SmallestKey = 0;
756 int32_t LargestKey = 0;
757
758 // To avoid breaking dependences, vectorized instructions of an interleave
759 // group should be inserted at either the first load or the last store in
760 // program order.
761 //
762 // E.g. %even = load i32 // Insert Position
763 // %add = add i32 %even // Use of %even
764 // %odd = load i32
765 //
766 // store i32 %even
767 // %odd = add i32 // Def of %odd
768 // store i32 %odd // Insert Position
769 InstTy *InsertPos;
770};
771
772/// Drive the analysis of interleaved memory accesses in the loop.
773///
774/// Use this class to analyze interleaved accesses only when we can vectorize
775/// a loop. Otherwise it's meaningless to do analysis as the vectorization
776/// on interleaved accesses is unsafe.
777///
778/// The analysis collects interleave groups and records the relationships
779/// between the member and the group in a map.
781public:
783 DominatorTree *DT, LoopInfo *LI,
784 const LoopAccessInfo *LAI)
785 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
786
788
789 /// Analyze the interleaved accesses and collect them in interleave
790 /// groups. Substitute symbolic strides using \p Strides.
791 /// Consider also predicated loads/stores in the analysis if
792 /// \p EnableMaskedInterleavedGroup is true.
793 void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
794
795 /// Invalidate groups, e.g., in case all blocks in loop will be predicated
796 /// contrary to original assumption. Although we currently prevent group
797 /// formation for predicated accesses, we may be able to relax this limitation
798 /// in the future once we handle more complicated blocks. Returns true if any
799 /// groups were invalidated.
801 if (InterleaveGroups.empty()) {
802 assert(
803 !RequiresScalarEpilogue &&
804 "RequiresScalarEpilog should not be set without interleave groups");
805 return false;
806 }
807
808 InterleaveGroupMap.clear();
809 for (auto *Ptr : InterleaveGroups)
810 delete Ptr;
811 InterleaveGroups.clear();
812 RequiresScalarEpilogue = false;
813 return true;
814 }
815
816 /// Check if \p Instr belongs to any interleave group.
817 bool isInterleaved(Instruction *Instr) const {
818 return InterleaveGroupMap.contains(Instr);
819 }
820
821 /// Get the interleave group that \p Instr belongs to.
822 ///
823 /// \returns nullptr if doesn't have such group.
825 getInterleaveGroup(const Instruction *Instr) const {
826 return InterleaveGroupMap.lookup(Instr);
827 }
828
831 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
832 }
833
834 /// Returns true if an interleaved group that may access memory
835 /// out-of-bounds requires a scalar epilogue iteration for correctness.
836 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
837
838 /// Invalidate groups that require a scalar epilogue (due to gaps). This can
839 /// happen when optimizing for size forbids a scalar epilogue, and the gap
840 /// cannot be filtered by masking the load/store.
842
843 /// Returns true if we have any interleave groups.
844 bool hasGroups() const { return !InterleaveGroups.empty(); }
845
846private:
847 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
848 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
849 /// The interleaved access analysis can also add new predicates (for example
850 /// by versioning strides of pointers).
852
853 Loop *TheLoop;
854 DominatorTree *DT;
855 LoopInfo *LI;
856 const LoopAccessInfo *LAI;
857
858 /// True if the loop may contain non-reversed interleaved groups with
859 /// out-of-bounds accesses. We ensure we don't speculatively access memory
860 /// out-of-bounds by executing at least one scalar epilogue iteration.
861 bool RequiresScalarEpilogue = false;
862
863 /// Holds the relationships between the members and the interleave group.
865
866 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
867
868 /// Holds dependences among the memory accesses in the loop. It maps a source
869 /// access to a set of dependent sink accesses.
871
872 /// The descriptor for a strided memory access.
873 struct StrideDescriptor {
874 StrideDescriptor() = default;
875 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
876 Align Alignment)
877 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
878
879 // The access's stride. It is negative for a reverse access.
880 int64_t Stride = 0;
881
882 // The scalar expression of this access.
883 const SCEV *Scev = nullptr;
884
885 // The size of the memory object.
886 uint64_t Size = 0;
887
888 // The alignment of this access.
889 Align Alignment;
890 };
891
892 /// A type for holding instructions and their stride descriptors.
893 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
894
895 /// Create a new interleave group with the given instruction \p Instr,
896 /// stride \p Stride and alignment \p Align.
897 ///
898 /// \returns the newly created interleave group.
899 InterleaveGroup<Instruction> *
900 createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
901 assert(!InterleaveGroupMap.count(Instr) &&
902 "Already in an interleaved access group");
903 InterleaveGroupMap[Instr] =
904 new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
905 InterleaveGroups.insert(InterleaveGroupMap[Instr]);
906 return InterleaveGroupMap[Instr];
907 }
908
909 /// Release the group and remove all the relationships.
910 void releaseGroup(InterleaveGroup<Instruction> *Group) {
911 for (unsigned i = 0; i < Group->getFactor(); i++)
912 if (Instruction *Member = Group->getMember(i))
913 InterleaveGroupMap.erase(Member);
914
915 InterleaveGroups.erase(Group);
916 delete Group;
917 }
918
919 /// Collect all the accesses with a constant stride in program order.
920 void collectConstStrideAccesses(
921 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
922 const ValueToValueMap &Strides);
923
924 /// Returns true if \p Stride is allowed in an interleaved group.
925 static bool isStrided(int Stride);
926
927 /// Returns true if \p BB is a predicated block.
928 bool isPredicated(BasicBlock *BB) const {
929 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
930 }
931
932 /// Returns true if LoopAccessInfo can be used for dependence queries.
933 bool areDependencesValid() const {
934 return LAI && LAI->getDepChecker().getDependences();
935 }
936
937 /// Returns true if memory accesses \p A and \p B can be reordered, if
938 /// necessary, when constructing interleaved groups.
939 ///
940 /// \p A must precede \p B in program order. We return false if reordering is
941 /// not necessary or is prevented because \p A and \p B may be dependent.
942 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
943 StrideEntry *B) const {
944 // Code motion for interleaved accesses can potentially hoist strided loads
945 // and sink strided stores. The code below checks the legality of the
946 // following two conditions:
947 //
948 // 1. Potentially moving a strided load (B) before any store (A) that
949 // precedes B, or
950 //
951 // 2. Potentially moving a strided store (A) after any load or store (B)
952 // that A precedes.
953 //
954 // It's legal to reorder A and B if we know there isn't a dependence from A
955 // to B. Note that this determination is conservative since some
956 // dependences could potentially be reordered safely.
957
958 // A is potentially the source of a dependence.
959 auto *Src = A->first;
960 auto SrcDes = A->second;
961
962 // B is potentially the sink of a dependence.
963 auto *Sink = B->first;
964 auto SinkDes = B->second;
965
966 // Code motion for interleaved accesses can't violate WAR dependences.
967 // Thus, reordering is legal if the source isn't a write.
968 if (!Src->mayWriteToMemory())
969 return true;
970
971 // At least one of the accesses must be strided.
972 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
973 return true;
974
975 // If dependence information is not available from LoopAccessInfo,
976 // conservatively assume the instructions can't be reordered.
977 if (!areDependencesValid())
978 return false;
979
980 // If we know there is a dependence from source to sink, assume the
981 // instructions can't be reordered. Otherwise, reordering is legal.
982 return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
983 }
984
985 /// Collect the dependences from LoopAccessInfo.
986 ///
987 /// We process the dependences once during the interleaved access analysis to
988 /// enable constant-time dependence queries.
989 void collectDependences() {
990 if (!areDependencesValid())
991 return;
992 auto *Deps = LAI->getDepChecker().getDependences();
993 for (auto Dep : *Deps)
994 Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
995 }
996};
997
998} // llvm namespace
999
1000#endif
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
RelocType Type
Definition: COFFYAML.cpp:390
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Inject TLI Mappings
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1408
unsigned arg_size() const
Definition: InstrTypes.h:1351
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:315
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:638
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:736
uint32_t getFactor() const
Definition: VectorUtils.h:654
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:708
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:640
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:715
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:725
bool isReverse() const
Definition: VectorUtils.h:653
InstTy * getInsertPos() const
Definition: VectorUtils.h:724
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:655
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:644
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:663
uint32_t getNumMembers() const
Definition: VectorUtils.h:656
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:780
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:825
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:836
bool hasGroups() const
Returns true if we have any interleave groups.
Definition: VectorUtils.h:844
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:817
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:800
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:830
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:782
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:175
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
The Vector Function Database.
Definition: VectorUtils.h:224
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:277
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:265
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:688
A range adaptor for a pair of iterators.
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:284
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static constexpr char const * MappingsAttrName
Definition: VectorUtils.h:209
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
static constexpr char const * _LLVM_Scalarize_
Prefix for internal name redirection for vector function that tells the compiler to scalarize the cal...
Definition: VectorUtils.h:154
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
static constexpr char const * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:147
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, Value * > ValueToValueMap
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:315
TargetTransformInfo TTI
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
VFISAKind
Describes the type of Instruction Set Architecture.
Definition: VectorUtils.h:44
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
VFParamKind
Describes the type of Parameters.
Definition: VectorUtils.h:25
std::enable_if_t< std::is_signed< T >::value, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
std::enable_if_t< std::is_signed< T >::value, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:51
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:123
bool isMasked() const
Returns true if at least one of the operands to the vectorized function has the kind 'GlobalPredicate...
Definition: VectorUtils.h:142
std::string VectorName
Scalar Function Name.
Definition: VectorUtils.h:126
std::optional< unsigned > getParamIndexForOptionalMask() const
Instruction Set Architecture.
Definition: VectorUtils.h:131
VFISAKind ISA
Vector Function Name associated to this VFInfo.
Definition: VectorUtils.h:127
std::string ScalarName
Classification of the vector function.
Definition: VectorUtils.h:125
VFShape Shape
Definition: VectorUtils.h:124
Encapsulates information needed to describe a parameter.
Definition: VectorUtils.h:62
bool operator==(const VFParameter &Other) const
Definition: VectorUtils.h:69
VFParamKind ParamKind
Definition: VectorUtils.h:64
unsigned ParamPos
Definition: VectorUtils.h:63
Contains the information about the kind of vectorization available.
Definition: VectorUtils.h:82
static VFShape getScalarShape(const CallInst &CI)
Definition: VectorUtils.h:99
static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred)
Definition: VectorUtils.h:108
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
ElementCount VF
Definition: VectorUtils.h:83
void updateParam(VFParameter P)
Update the parameter in position P.ParamPos to P.
Definition: VectorUtils.h:91
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:84
bool operator==(const VFShape &Other) const
Definition: VectorUtils.h:86