LLVM  16.0.0git
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallVector.h"
20 
21 namespace llvm {
22 class TargetLibraryInfo;
23 
24 /// Describes the type of Parameters
25 enum class VFParamKind {
26  Vector, // No semantic information.
27  OMP_Linear, // declare simd linear(i)
28  OMP_LinearRef, // declare simd linear(ref(i))
29  OMP_LinearVal, // declare simd linear(val(i))
30  OMP_LinearUVal, // declare simd linear(uval(i))
31  OMP_LinearPos, // declare simd linear(i:c) uniform(c)
32  OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
33  OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
34  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c)
35  OMP_Uniform, // declare simd uniform(i)
36  GlobalPredicate, // Global logical predicate that acts on all lanes
37  // of the input and output mask concurrently. For
38  // example, it is implied by the `M` token in the
39  // Vector Function ABI mangled name.
40  Unknown
41 };
42 
43 /// Describes the type of Instruction Set Architecture
44 enum class VFISAKind {
45  AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
46  SVE, // AArch64 Scalable Vector Extension
47  SSE, // x86 SSE
48  AVX, // x86 AVX
49  AVX2, // x86 AVX2
50  AVX512, // x86 AVX512
51  LLVM, // LLVM internal ISA for functions that are not
52  // attached to an existing ABI via name mangling.
53  Unknown // Unknown ISA
54 };
55 
56 /// Encapsulates information needed to describe a parameter.
57 ///
58 /// The description of the parameter is not linked directly to
59 /// OpenMP or any other vector function description. This structure
60 /// is extendible to handle other paradigms that describe vector
61 /// functions and their parameters.
62 struct VFParameter {
63  unsigned ParamPos; // Parameter Position in Scalar Function.
64  VFParamKind ParamKind; // Kind of Parameter.
65  int LinearStepOrPos = 0; // Step or Position of the Parameter.
66  Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
67 
68  // Comparison operator.
69  bool operator==(const VFParameter &Other) const {
70  return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
71  std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
72  Other.Alignment);
73  }
74 };
75 
76 /// Contains the information about the kind of vectorization
77 /// available.
78 ///
79 /// This object in independent on the paradigm used to
80 /// represent vector functions. in particular, it is not attached to
81 /// any target-specific ABI.
82 struct VFShape {
83  ElementCount VF; // Vectorization factor.
84  SmallVector<VFParameter, 8> Parameters; // List of parameter information.
85  // Comparison operator.
86  bool operator==(const VFShape &Other) const {
87  return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters);
88  }
89 
90  /// Update the parameter in position P.ParamPos to P.
92  assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
93  Parameters[P.ParamPos] = P;
94  assert(hasValidParameterList() && "Invalid parameter list");
95  }
96 
97  // Retrieve the VFShape that can be used to map a (scalar) function to itself,
98  // with VF = 1.
99  static VFShape getScalarShape(const CallInst &CI) {
100  return VFShape::get(CI, ElementCount::getFixed(1),
101  /*HasGlobalPredicate*/ false);
102  }
103 
104  // Retrieve the basic vectorization shape of the function, where all
105  // parameters are mapped to VFParamKind::Vector with \p EC
106  // lanes. Specifies whether the function has a Global Predicate
107  // argument via \p HasGlobalPred.
108  static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
110  for (unsigned I = 0; I < CI.arg_size(); ++I)
112  if (HasGlobalPred)
113  Parameters.push_back(
115 
116  return {EC, Parameters};
117  }
118  /// Validation check on the Parameters in the VFShape.
119  bool hasValidParameterList() const;
120 };
121 
122 /// Holds the VFShape for a specific scalar to vector function mapping.
123 struct VFInfo {
124  VFShape Shape; /// Classification of the vector function.
125  std::string ScalarName; /// Scalar Function Name.
126  std::string VectorName; /// Vector Function Name associated to this VFInfo.
127  VFISAKind ISA; /// Instruction Set Architecture.
128 };
129 
130 namespace VFABI {
131 /// LLVM Internal VFABI ISA token for vector functions.
132 static constexpr char const *_LLVM_ = "_LLVM_";
133 /// Prefix for internal name redirection for vector function that
134 /// tells the compiler to scalarize the call using the scalar name
135 /// of the function. For example, a mangled name like
136 /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
137 /// vectorizer to vectorize the scalar call `foo`, and to scalarize
138 /// it once vectorization is done.
139 static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
140 
141 /// Function to construct a VFInfo out of a mangled names in the
142 /// following format:
143 ///
144 /// <VFABI_name>{(<redirection>)}
145 ///
146 /// where <VFABI_name> is the name of the vector function, mangled according
147 /// to the rules described in the Vector Function ABI of the target vector
148 /// extension (or <isa> from now on). The <VFABI_name> is in the following
149 /// format:
150 ///
151 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
152 ///
153 /// This methods support demangling rules for the following <isa>:
154 ///
155 /// * AArch64: https://developer.arm.com/docs/101129/latest
156 ///
157 /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
158 /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
159 ///
160 /// \param MangledName -> input string in the format
161 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
162 /// \param M -> Module used to retrieve informations about the vector
163 /// function that are not possible to retrieve from the mangled
164 /// name. At the moment, this parameter is needed only to retrieve the
165 /// Vectorization Factor of scalable vector functions from their
166 /// respective IR declarations.
168 
169 /// This routine mangles the given VectorName according to the LangRef
170 /// specification for vector-function-abi-variant attribute and is specific to
171 /// the TLI mappings. It is the responsibility of the caller to make sure that
172 /// this is only used if all parameters in the vector function are vector type.
173 /// This returned string holds scalar-to-vector mapping:
174 /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
175 ///
176 /// where:
177 ///
178 /// <isa> = "_LLVM_"
179 /// <mask> = "N". Note: TLI does not support masked interfaces.
180 /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
181 /// field of the `VecDesc` struct. If the number of lanes is scalable
182 /// then 'x' is printed instead.
183 /// <vparams> = "v", as many as are the numArgs.
184 /// <scalarname> = the name of the scalar function.
185 /// <vectorname> = the name of the vector function.
186 std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
187  unsigned numArgs, ElementCount VF);
188 
189 /// Retrieve the `VFParamKind` from a string token.
191 
192 // Name of the attribute where the variant mappings are stored.
193 static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
194 
195 /// Populates a set of strings representing the Vector Function ABI variants
196 /// associated to the CallInst CI. If the CI does not contain the
197 /// vector-function-abi-variant attribute, we return without populating
198 /// VariantMappings, i.e. callers of getVectorVariantNames need not check for
199 /// the presence of the attribute (see InjectTLIMappings).
200 void getVectorVariantNames(const CallInst &CI,
201  SmallVectorImpl<std::string> &VariantMappings);
202 } // end namespace VFABI
203 
204 /// The Vector Function Database.
205 ///
206 /// Helper class used to find the vector functions associated to a
207 /// scalar CallInst.
208 class VFDatabase {
209  /// The Module of the CallInst CI.
210  const Module *M;
211  /// The CallInst instance being queried for scalar to vector mappings.
212  const CallInst &CI;
213  /// List of vector functions descriptors associated to the call
214  /// instruction.
215  const SmallVector<VFInfo, 8> ScalarToVectorMappings;
216 
217  /// Retrieve the scalar-to-vector mappings associated to the rule of
218  /// a vector Function ABI.
219  static void getVFABIMappings(const CallInst &CI,
221  if (!CI.getCalledFunction())
222  return;
223 
224  const StringRef ScalarName = CI.getCalledFunction()->getName();
225 
226  SmallVector<std::string, 8> ListOfStrings;
227  // The check for the vector-function-abi-variant attribute is done when
228  // retrieving the vector variant names here.
229  VFABI::getVectorVariantNames(CI, ListOfStrings);
230  if (ListOfStrings.empty())
231  return;
232  for (const auto &MangledName : ListOfStrings) {
233  const Optional<VFInfo> Shape =
234  VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
235  // A match is found via scalar and vector names, and also by
236  // ensuring that the variant described in the attribute has a
237  // corresponding definition or declaration of the vector
238  // function in the Module M.
239  if (Shape && (Shape.value().ScalarName == ScalarName)) {
240  assert(CI.getModule()->getFunction(Shape.value().VectorName) &&
241  "Vector function is missing.");
242  Mappings.push_back(Shape.value());
243  }
244  }
245  }
246 
247 public:
248  /// Retrieve all the VFInfo instances associated to the CallInst CI.
251 
252  // Get mappings from the Vector Function ABI variants.
253  getVFABIMappings(CI, Ret);
254 
255  // Other non-VFABI variants should be retrieved here.
256 
257  return Ret;
258  }
259 
260  /// Constructor, requires a CallInst instance.
262  : M(CI.getModule()), CI(CI),
263  ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
264  /// \defgroup VFDatabase query interface.
265  ///
266  /// @{
267  /// Retrieve the Function with VFShape \p Shape.
268  Function *getVectorizedFunction(const VFShape &Shape) const {
269  if (Shape == VFShape::getScalarShape(CI))
270  return CI.getCalledFunction();
271 
272  for (const auto &Info : ScalarToVectorMappings)
273  if (Info.Shape == Shape)
274  return M->getFunction(Info.VectorName);
275 
276  return nullptr;
277  }
278  /// @}
279 };
280 
281 template <typename T> class ArrayRef;
282 class DemandedBits;
283 class GetElementPtrInst;
284 template <typename InstTy> class InterleaveGroup;
285 class IRBuilderBase;
286 class Loop;
287 class ScalarEvolution;
288 class TargetTransformInfo;
289 class Type;
290 class Value;
291 
292 namespace Intrinsic {
293 typedef unsigned ID;
294 }
295 
296 /// A helper function for converting Scalar types to vector types. If
297 /// the incoming type is void, we return void. If the EC represents a
298 /// scalar, we return the scalar type.
299 inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
300  if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
301  return Scalar;
302  return VectorType::get(Scalar, EC);
303 }
304 
305 inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
306  return ToVectorTy(Scalar, ElementCount::getFixed(VF));
307 }
308 
309 /// Identify if the intrinsic is trivially vectorizable.
310 /// This method returns true if the intrinsic's argument types are all scalars
311 /// for the scalar form of the intrinsic and all vectors (or scalars handled by
312 /// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
314 
315 /// Identifies if the vector form of the intrinsic has a scalar operand.
317  unsigned ScalarOpdIdx);
318 
319 /// Identifies if the vector form of the intrinsic has a operand that has
320 /// an overloaded type.
321 bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx);
322 
323 /// Returns intrinsic ID for call.
324 /// For the input call instruction it finds mapping intrinsic and returns
325 /// its intrinsic ID, in case it does not found it return not_intrinsic.
326 Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
327  const TargetLibraryInfo *TLI);
328 
329 /// Find the operand of the GEP that should be checked for consecutive
330 /// stores. This ignores trailing indices that have no effect on the final
331 /// pointer.
332 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
333 
334 /// If the argument is a GEP, then returns the operand identified by
335 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
336 /// operand, it returns that instead.
337 Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
338 
339 /// If a value has only one user that is a CastInst, return it.
340 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
341 
342 /// Get the stride of a pointer access in a loop. Looks for symbolic
343 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
344 Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
345 
346 /// Given a vector and an element number, see if the scalar value is
347 /// already around as a register, for example if it were inserted then extracted
348 /// from the vector.
349 Value *findScalarElement(Value *V, unsigned EltNo);
350 
351 /// If all non-negative \p Mask elements are the same value, return that value.
352 /// If all elements are negative (undefined) or \p Mask contains different
353 /// non-negative values, return -1.
354 int getSplatIndex(ArrayRef<int> Mask);
355 
356 /// Get splat value if the input is a splat vector or return nullptr.
357 /// The value may be extracted from a splat constants vector or from
358 /// a sequence of instructions that broadcast a single value into a vector.
359 Value *getSplatValue(const Value *V);
360 
361 /// Return true if each element of the vector value \p V is poisoned or equal to
362 /// every other non-poisoned element. If an index element is specified, either
363 /// every element of the vector is poisoned or the element at that index is not
364 /// poisoned and equal to every other non-poisoned element.
365 /// This may be more powerful than the related getSplatValue() because it is
366 /// not limited by finding a scalar source value to a splatted vector.
367 bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
368 
369 /// Transform a shuffle mask's output demanded element mask into demanded
370 /// element masks for the 2 operands, returns false if the mask isn't valid.
371 /// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
372 /// \p AllowUndefElts permits "-1" indices to be treated as undef.
373 bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
374  const APInt &DemandedElts, APInt &DemandedLHS,
375  APInt &DemandedRHS, bool AllowUndefElts = false);
376 
377 /// Replace each shuffle mask index with the scaled sequential indices for an
378 /// equivalent mask of narrowed elements. Mask elements that are less than 0
379 /// (sentinel values) are repeated in the output mask.
380 ///
381 /// Example with Scale = 4:
382 /// <4 x i32> <3, 2, 0, -1> -->
383 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
384 ///
385 /// This is the reverse process of widening shuffle mask elements, but it always
386 /// succeeds because the indexes can always be multiplied (scaled up) to map to
387 /// narrower vector elements.
388 void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
389  SmallVectorImpl<int> &ScaledMask);
390 
391 /// Try to transform a shuffle mask by replacing elements with the scaled index
392 /// for an equivalent mask of widened elements. If all mask elements that would
393 /// map to a wider element of the new mask are the same negative number
394 /// (sentinel value), that element of the new mask is the same value. If any
395 /// element in a given slice is negative and some other element in that slice is
396 /// not the same value, return false (partial matches with sentinel values are
397 /// not allowed).
398 ///
399 /// Example with Scale = 4:
400 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
401 /// <4 x i32> <3, 2, 0, -1>
402 ///
403 /// This is the reverse process of narrowing shuffle mask elements if it
404 /// succeeds. This transform is not always possible because indexes may not
405 /// divide evenly (scale down) to map to wider vector elements.
406 bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
407  SmallVectorImpl<int> &ScaledMask);
408 
409 /// Splits and processes shuffle mask depending on the number of input and
410 /// output registers. The function does 2 main things: 1) splits the
411 /// source/destination vectors into real registers; 2) do the mask analysis to
412 /// identify which real registers are permuted. Then the function processes
413 /// resulting registers mask using provided action items. If no input register
414 /// is defined, \p NoInputAction action is used. If only 1 input register is
415 /// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
416 /// process > 2 input registers and masks.
417 /// \param Mask Original shuffle mask.
418 /// \param NumOfSrcRegs Number of source registers.
419 /// \param NumOfDestRegs Number of destination registers.
420 /// \param NumOfUsedRegs Number of actually used destination registers.
422  ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
423  unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
424  function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
425  function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
426 
427 /// Compute a map of integer instructions to their minimum legal type
428 /// size.
429 ///
430 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
431 /// type (e.g. i32) whenever arithmetic is performed on them.
432 ///
433 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
434 /// the arithmetic type down again. However InstCombine refuses to create
435 /// illegal types, so for targets without i8 or i16 registers, the lengthening
436 /// and shrinking remains.
437 ///
438 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
439 /// their scalar equivalents do not, so during vectorization it is important to
440 /// remove these lengthens and truncates when deciding the profitability of
441 /// vectorization.
442 ///
443 /// This function analyzes the given range of instructions and determines the
444 /// minimum type size each can be converted to. It attempts to remove or
445 /// minimize type size changes across each def-use chain, so for example in the
446 /// following code:
447 ///
448 /// %1 = load i8, i8*
449 /// %2 = add i8 %1, 2
450 /// %3 = load i16, i16*
451 /// %4 = zext i8 %2 to i32
452 /// %5 = zext i16 %3 to i32
453 /// %6 = add i32 %4, %5
454 /// %7 = trunc i32 %6 to i16
455 ///
456 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
457 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
458 ///
459 /// If the optional TargetTransformInfo is provided, this function tries harder
460 /// to do less work by only looking at illegal types.
461 MapVector<Instruction*, uint64_t>
462 computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
463  DemandedBits &DB,
464  const TargetTransformInfo *TTI=nullptr);
465 
466 /// Compute the union of two access-group lists.
467 ///
468 /// If the list contains just one access group, it is returned directly. If the
469 /// list is empty, returns nullptr.
470 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
471 
472 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
473 /// are both in. If either instruction does not access memory at all, it is
474 /// considered to be in every list.
475 ///
476 /// If the list contains just one access group, it is returned directly. If the
477 /// list is empty, returns nullptr.
478 MDNode *intersectAccessGroups(const Instruction *Inst1,
479  const Instruction *Inst2);
480 
481 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
482 /// MD_nontemporal, MD_access_group].
483 /// For K in Kinds, we get the MDNode for K from each of the
484 /// elements of VL, compute their "intersection" (i.e., the most generic
485 /// metadata value that covers all of the individual values), and set I's
486 /// metadata for M equal to the intersection value.
487 ///
488 /// This function always sets a (possibly null) value for each K in Kinds.
489 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
490 
491 /// Create a mask that filters the members of an interleave group where there
492 /// are gaps.
493 ///
494 /// For example, the mask for \p Group with interleave-factor 3
495 /// and \p VF 4, that has only its first member present is:
496 ///
497 /// <1,0,0,1,0,0,1,0,0,1,0,0>
498 ///
499 /// Note: The result is a mask of 0's and 1's, as opposed to the other
500 /// create[*]Mask() utilities which create a shuffle mask (mask that
501 /// consists of indices).
502 Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
503  const InterleaveGroup<Instruction> &Group);
504 
505 /// Create a mask with replicated elements.
506 ///
507 /// This function creates a shuffle mask for replicating each of the \p VF
508 /// elements in a vector \p ReplicationFactor times. It can be used to
509 /// transform a mask of \p VF elements into a mask of
510 /// \p VF * \p ReplicationFactor elements used by a predicated
511 /// interleaved-group of loads/stores whose Interleaved-factor ==
512 /// \p ReplicationFactor.
513 ///
514 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
515 ///
516 /// <0,0,0,1,1,1,2,2,2,3,3,3>
517 llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
518  unsigned VF);
519 
520 /// Create an interleave shuffle mask.
521 ///
522 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
523 /// vectorization factor \p VF into a single wide vector. The mask is of the
524 /// form:
525 ///
526 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
527 ///
528 /// For example, the mask for VF = 4 and NumVecs = 2 is:
529 ///
530 /// <0, 4, 1, 5, 2, 6, 3, 7>.
531 llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
532 
533 /// Create a stride shuffle mask.
534 ///
535 /// This function creates a shuffle mask whose elements begin at \p Start and
536 /// are incremented by \p Stride. The mask can be used to deinterleave an
537 /// interleaved vector into separate vectors of vectorization factor \p VF. The
538 /// mask is of the form:
539 ///
540 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
541 ///
542 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
543 ///
544 /// <0, 2, 4, 6>
545 llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
546  unsigned VF);
547 
548 /// Create a sequential shuffle mask.
549 ///
550 /// This function creates shuffle mask whose elements are sequential and begin
551 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
552 /// NumUndefs undef values. The mask is of the form:
553 ///
554 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
555 ///
556 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
557 ///
558 /// <0, 1, 2, 3, undef, undef, undef, undef>
560 createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
561 
562 /// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
563 /// mask assuming both operands are identical. This assumes that the unary
564 /// shuffle will use elements from operand 0 (operand 1 will be unused).
566  unsigned NumElts);
567 
568 /// Concatenate a list of vectors.
569 ///
570 /// This function generates code that concatenate the vectors in \p Vecs into a
571 /// single large vector. The number of vectors should be greater than one, and
572 /// their element types should be the same. The number of elements in the
573 /// vectors should also be the same; however, if the last vector has fewer
574 /// elements, it will be padded with undefs.
575 Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
576 
577 /// Given a mask vector of i1, Return true if all of the elements of this
578 /// predicate mask are known to be false or undef. That is, return true if all
579 /// lanes can be assumed inactive.
581 
582 /// Given a mask vector of i1, Return true if all of the elements of this
583 /// predicate mask are known to be true or undef. That is, return true if all
584 /// lanes can be assumed active.
586 
587 /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
588 /// for each lane which may be active.
590 
591 /// The group of interleaved loads/stores sharing the same stride and
592 /// close to each other.
593 ///
594 /// Each member in this group has an index starting from 0, and the largest
595 /// index should be less than interleaved factor, which is equal to the absolute
596 /// value of the access's stride.
597 ///
598 /// E.g. An interleaved load group of factor 4:
599 /// for (unsigned i = 0; i < 1024; i+=4) {
600 /// a = A[i]; // Member of index 0
601 /// b = A[i+1]; // Member of index 1
602 /// d = A[i+3]; // Member of index 3
603 /// ...
604 /// }
605 ///
606 /// An interleaved store group of factor 4:
607 /// for (unsigned i = 0; i < 1024; i+=4) {
608 /// ...
609 /// A[i] = a; // Member of index 0
610 /// A[i+1] = b; // Member of index 1
611 /// A[i+2] = c; // Member of index 2
612 /// A[i+3] = d; // Member of index 3
613 /// }
614 ///
615 /// Note: the interleaved load group could have gaps (missing members), but
616 /// the interleaved store group doesn't allow gaps.
617 template <typename InstTy> class InterleaveGroup {
618 public:
619  InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
620  : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
621  InsertPos(nullptr) {}
622 
623  InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
624  : Alignment(Alignment), InsertPos(Instr) {
625  Factor = std::abs(Stride);
626  assert(Factor > 1 && "Invalid interleave factor");
627 
628  Reverse = Stride < 0;
629  Members[0] = Instr;
630  }
631 
632  bool isReverse() const { return Reverse; }
633  uint32_t getFactor() const { return Factor; }
634  Align getAlign() const { return Alignment; }
635  uint32_t getNumMembers() const { return Members.size(); }
636 
637  /// Try to insert a new member \p Instr with index \p Index and
638  /// alignment \p NewAlign. The index is related to the leader and it could be
639  /// negative if it is the new leader.
640  ///
641  /// \returns false if the instruction doesn't belong to the group.
642  bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
643  // Make sure the key fits in an int32_t.
644  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
645  if (!MaybeKey)
646  return false;
647  int32_t Key = *MaybeKey;
648 
649  // Skip if the key is used for either the tombstone or empty special values.
652  return false;
653 
654  // Skip if there is already a member with the same index.
655  if (Members.find(Key) != Members.end())
656  return false;
657 
658  if (Key > LargestKey) {
659  // The largest index is always less than the interleave factor.
660  if (Index >= static_cast<int32_t>(Factor))
661  return false;
662 
663  LargestKey = Key;
664  } else if (Key < SmallestKey) {
665 
666  // Make sure the largest index fits in an int32_t.
667  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
668  if (!MaybeLargestIndex)
669  return false;
670 
671  // The largest index is always less than the interleave factor.
672  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
673  return false;
674 
675  SmallestKey = Key;
676  }
677 
678  // It's always safe to select the minimum alignment.
679  Alignment = std::min(Alignment, NewAlign);
680  Members[Key] = Instr;
681  return true;
682  }
683 
684  /// Get the member with the given index \p Index
685  ///
686  /// \returns nullptr if contains no such member.
687  InstTy *getMember(uint32_t Index) const {
688  int32_t Key = SmallestKey + Index;
689  return Members.lookup(Key);
690  }
691 
692  /// Get the index for the given member. Unlike the key in the member
693  /// map, the index starts from 0.
694  uint32_t getIndex(const InstTy *Instr) const {
695  for (auto I : Members) {
696  if (I.second == Instr)
697  return I.first - SmallestKey;
698  }
699 
700  llvm_unreachable("InterleaveGroup contains no such member");
701  }
702 
703  InstTy *getInsertPos() const { return InsertPos; }
704  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
705 
706  /// Add metadata (e.g. alias info) from the instructions in this group to \p
707  /// NewInst.
708  ///
709  /// FIXME: this function currently does not add noalias metadata a'la
710  /// addNewMedata. To do that we need to compute the intersection of the
711  /// noalias info from all members.
712  void addMetadata(InstTy *NewInst) const;
713 
714  /// Returns true if this Group requires a scalar iteration to handle gaps.
715  bool requiresScalarEpilogue() const {
716  // If the last member of the Group exists, then a scalar epilog is not
717  // needed for this group.
718  if (getMember(getFactor() - 1))
719  return false;
720 
721  // We have a group with gaps. It therefore can't be a reversed access,
722  // because such groups get invalidated (TODO).
723  assert(!isReverse() && "Group should have been invalidated");
724 
725  // This is a group of loads, with gaps, and without a last-member
726  return true;
727  }
728 
729 private:
730  uint32_t Factor; // Interleave Factor.
731  bool Reverse;
732  Align Alignment;
734  int32_t SmallestKey = 0;
735  int32_t LargestKey = 0;
736 
737  // To avoid breaking dependences, vectorized instructions of an interleave
738  // group should be inserted at either the first load or the last store in
739  // program order.
740  //
741  // E.g. %even = load i32 // Insert Position
742  // %add = add i32 %even // Use of %even
743  // %odd = load i32
744  //
745  // store i32 %even
746  // %odd = add i32 // Def of %odd
747  // store i32 %odd // Insert Position
748  InstTy *InsertPos;
749 };
750 
751 /// Drive the analysis of interleaved memory accesses in the loop.
752 ///
753 /// Use this class to analyze interleaved accesses only when we can vectorize
754 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
755 /// on interleaved accesses is unsafe.
756 ///
757 /// The analysis collects interleave groups and records the relationships
758 /// between the member and the group in a map.
760 public:
762  DominatorTree *DT, LoopInfo *LI,
763  const LoopAccessInfo *LAI)
764  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
765 
767 
768  /// Analyze the interleaved accesses and collect them in interleave
769  /// groups. Substitute symbolic strides using \p Strides.
770  /// Consider also predicated loads/stores in the analysis if
771  /// \p EnableMaskedInterleavedGroup is true.
772  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
773 
774  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
775  /// contrary to original assumption. Although we currently prevent group
776  /// formation for predicated accesses, we may be able to relax this limitation
777  /// in the future once we handle more complicated blocks. Returns true if any
778  /// groups were invalidated.
780  if (InterleaveGroups.empty()) {
781  assert(
782  !RequiresScalarEpilogue &&
783  "RequiresScalarEpilog should not be set without interleave groups");
784  return false;
785  }
786 
787  InterleaveGroupMap.clear();
788  for (auto *Ptr : InterleaveGroups)
789  delete Ptr;
790  InterleaveGroups.clear();
791  RequiresScalarEpilogue = false;
792  return true;
793  }
794 
795  /// Check if \p Instr belongs to any interleave group.
796  bool isInterleaved(Instruction *Instr) const {
797  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
798  }
799 
800  /// Get the interleave group that \p Instr belongs to.
801  ///
802  /// \returns nullptr if doesn't have such group.
804  getInterleaveGroup(const Instruction *Instr) const {
805  return InterleaveGroupMap.lookup(Instr);
806  }
807 
810  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
811  }
812 
813  /// Returns true if an interleaved group that may access memory
814  /// out-of-bounds requires a scalar epilogue iteration for correctness.
815  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
816 
817  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
818  /// happen when optimizing for size forbids a scalar epilogue, and the gap
819  /// cannot be filtered by masking the load/store.
821 
822  /// Returns true if we have any interleave groups.
823  bool hasGroups() const { return !InterleaveGroups.empty(); }
824 
825 private:
826  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
827  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
828  /// The interleaved access analysis can also add new predicates (for example
829  /// by versioning strides of pointers).
831 
832  Loop *TheLoop;
833  DominatorTree *DT;
834  LoopInfo *LI;
835  const LoopAccessInfo *LAI;
836 
837  /// True if the loop may contain non-reversed interleaved groups with
838  /// out-of-bounds accesses. We ensure we don't speculatively access memory
839  /// out-of-bounds by executing at least one scalar epilogue iteration.
840  bool RequiresScalarEpilogue = false;
841 
842  /// Holds the relationships between the members and the interleave group.
844 
845  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
846 
847  /// Holds dependences among the memory accesses in the loop. It maps a source
848  /// access to a set of dependent sink accesses.
850 
851  /// The descriptor for a strided memory access.
852  struct StrideDescriptor {
853  StrideDescriptor() = default;
854  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
855  Align Alignment)
856  : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
857 
858  // The access's stride. It is negative for a reverse access.
859  int64_t Stride = 0;
860 
861  // The scalar expression of this access.
862  const SCEV *Scev = nullptr;
863 
864  // The size of the memory object.
865  uint64_t Size = 0;
866 
867  // The alignment of this access.
868  Align Alignment;
869  };
870 
871  /// A type for holding instructions and their stride descriptors.
872  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
873 
874  /// Create a new interleave group with the given instruction \p Instr,
875  /// stride \p Stride and alignment \p Align.
876  ///
877  /// \returns the newly created interleave group.
878  InterleaveGroup<Instruction> *
879  createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
880  assert(!InterleaveGroupMap.count(Instr) &&
881  "Already in an interleaved access group");
882  InterleaveGroupMap[Instr] =
883  new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
884  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
885  return InterleaveGroupMap[Instr];
886  }
887 
888  /// Release the group and remove all the relationships.
889  void releaseGroup(InterleaveGroup<Instruction> *Group) {
890  for (unsigned i = 0; i < Group->getFactor(); i++)
891  if (Instruction *Member = Group->getMember(i))
892  InterleaveGroupMap.erase(Member);
893 
894  InterleaveGroups.erase(Group);
895  delete Group;
896  }
897 
898  /// Collect all the accesses with a constant stride in program order.
899  void collectConstStrideAccesses(
900  MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
901  const ValueToValueMap &Strides);
902 
903  /// Returns true if \p Stride is allowed in an interleaved group.
904  static bool isStrided(int Stride);
905 
906  /// Returns true if \p BB is a predicated block.
907  bool isPredicated(BasicBlock *BB) const {
908  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
909  }
910 
911  /// Returns true if LoopAccessInfo can be used for dependence queries.
912  bool areDependencesValid() const {
913  return LAI && LAI->getDepChecker().getDependences();
914  }
915 
916  /// Returns true if memory accesses \p A and \p B can be reordered, if
917  /// necessary, when constructing interleaved groups.
918  ///
919  /// \p A must precede \p B in program order. We return false if reordering is
920  /// not necessary or is prevented because \p A and \p B may be dependent.
921  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
922  StrideEntry *B) const {
923  // Code motion for interleaved accesses can potentially hoist strided loads
924  // and sink strided stores. The code below checks the legality of the
925  // following two conditions:
926  //
927  // 1. Potentially moving a strided load (B) before any store (A) that
928  // precedes B, or
929  //
930  // 2. Potentially moving a strided store (A) after any load or store (B)
931  // that A precedes.
932  //
933  // It's legal to reorder A and B if we know there isn't a dependence from A
934  // to B. Note that this determination is conservative since some
935  // dependences could potentially be reordered safely.
936 
937  // A is potentially the source of a dependence.
938  auto *Src = A->first;
939  auto SrcDes = A->second;
940 
941  // B is potentially the sink of a dependence.
942  auto *Sink = B->first;
943  auto SinkDes = B->second;
944 
945  // Code motion for interleaved accesses can't violate WAR dependences.
946  // Thus, reordering is legal if the source isn't a write.
947  if (!Src->mayWriteToMemory())
948  return true;
949 
950  // At least one of the accesses must be strided.
951  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
952  return true;
953 
954  // If dependence information is not available from LoopAccessInfo,
955  // conservatively assume the instructions can't be reordered.
956  if (!areDependencesValid())
957  return false;
958 
959  // If we know there is a dependence from source to sink, assume the
960  // instructions can't be reordered. Otherwise, reordering is legal.
961  return Dependences.find(Src) == Dependences.end() ||
962  !Dependences.lookup(Src).count(Sink);
963  }
964 
965  /// Collect the dependences from LoopAccessInfo.
966  ///
967  /// We process the dependences once during the interleaved access analysis to
968  /// enable constant-time dependence queries.
969  void collectDependences() {
970  if (!areDependencesValid())
971  return;
972  auto *Deps = LAI->getDepChecker().getDependences();
973  for (auto Dep : *Deps)
974  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
975  }
976 };
977 
978 } // llvm namespace
979 
980 #endif
i
i
Definition: README.txt:29
llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition: VectorUtils.h:634
llvm::VFInfo::Shape
VFShape Shape
Definition: VectorUtils.h:124
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1080
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:69
llvm::VFParameter::Alignment
Align Alignment
Definition: VectorUtils.h:66
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::VFParamKind::Vector
@ Vector
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
llvm::VFISAKind::AVX512
@ AVX512
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:642
llvm::ElementCount
Definition: TypeSize.h:404
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:135
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:152
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:1108
llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:619
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2219
llvm::VFISAKind::Unknown
@ Unknown
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::InterleavedAccessInfo::InterleavedAccessInfo
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:761
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:371
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
LoopAccessAnalysis.h
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1534
MapVector.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1054
llvm::VFParameter::ParamKind
VFParamKind ParamKind
Definition: VectorUtils.h:64
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:101
llvm::VFShape::getScalarShape
static VFShape getScalarShape(const CallInst &CI)
Definition: VectorUtils.h:99
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:652
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::VFParameter::ParamPos
unsigned ParamPos
Definition: VectorUtils.h:63
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:469
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:193
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:984
llvm::Optional
Definition: APInt.h:33
llvm::isVectorIntrinsicWithOverloadTypeAtArg
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
Definition: VectorUtils.cpp:119
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:877
llvm::VFShape::Parameters
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:84
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:176
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:809
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:284
llvm::VFISAKind
VFISAKind
Describes the type of Instruction Set Architecture.
Definition: VectorUtils.h:44
llvm::AArch64PACKey::DB
@ DB
Definition: AArch64BaseInfo.h:822
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1555
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:171
llvm::VFISAKind::SSE
@ SSE
llvm::VFISAKind::SVE
@ SVE
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::DenseMapInfo
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: APInt.h:34
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:386
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1518
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1477
llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:623
llvm::VFParamKind
VFParamKind
Describes the type of Parameters.
Definition: VectorUtils.h:25
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:952
llvm::VFInfo::VectorName
std::string VectorName
Scalar Function Name.
Definition: VectorUtils.h:126
llvm::checkedSub
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:57
llvm::VFDatabase
The Vector Function Database.
Definition: VectorUtils.h:208
CheckedArithmetic.h
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2518
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:906
llvm::VFInfo
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:123
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:209
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1397
llvm::InterleavedAccessInfo::~InterleavedAccessInfo
~InterleavedAccessInfo()
Definition: VectorUtils.h:766
llvm::ToVectorTy
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:299
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
llvm::VFISAKind::LLVM
@ LLVM
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:486
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::VFISAKind::AVX2
@ AVX2
llvm::Instruction
Definition: Instruction.h:42
llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:759
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition: LoopAccessAnalysis.h:221
llvm::VFShape
Contains the information about the kind of vectorization available.
Definition: VectorUtils.h:82
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:632
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:687
llvm::InterleavedAccessInfo::getInterleaveGroups
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:809
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::InterleavedAccessInfo::isInterleaved
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:796
llvm::checkedAdd
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:48
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:704
llvm::VFParameter
Encapsulates information needed to describe a parameter.
Definition: VectorUtils.h:62
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:132
llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition: VectorUtils.h:703
llvm::VFParameter::LinearStepOrPos
int LinearStepOrPos
Definition: VectorUtils.h:65
llvm::getShuffleDemandedElts
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
Definition: VectorUtils.cpp:432
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:694
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:633
llvm::VFInfo::ISA
VFISAKind ISA
Vector Function Name associated to this VFInfo.
Definition: VectorUtils.h:127
llvm::VFDatabase::getVectorizedFunction
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:268
llvm::DenseMap< int32_t, InstTy * >
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:562
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:1026
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1209
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:168
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:923
llvm::VFInfo::ScalarName
std::string ScalarName
Classification of the vector function.
Definition: VectorUtils.h:125
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::VFShape::operator==
bool operator==(const VFShape &Other) const
Definition: VectorUtils.h:86
llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition: LoopAccessAnalysis.h:604
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:830
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::VFParamKind::Unknown
@ Unknown
llvm::VFISAKind::AdvancedSIMD
@ AdvancedSIMD
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::cl::Sink
@ Sink
Definition: CommandLine.h:168
llvm::processShuffleMasks
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition: VectorUtils.cpp:541
uint32_t
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:93
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:943
llvm::InterleaveGroup::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:715
llvm::VFABI::getVFParamKindFromString
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
Definition: VFABIDemangling.cpp:458
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:490
llvm::VFShape::VF
ElementCount VF
Definition: VectorUtils.h:83
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1340
llvm::VFShape::updateParam
void updateParam(VFParameter P)
Update the parameter in position P.ParamPos to P.
Definition: VectorUtils.h:91
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::InterleavedAccessInfo::hasGroups
bool hasGroups() const
Returns true if we have any interleave groups.
Definition: VectorUtils.h:823
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:99
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:285
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
llvm::InterleavedAccessInfo::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:815
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:349
SmallVector.h
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:193
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:176
llvm::VFShape::get
static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred)
Definition: VectorUtils.h:108
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::Optional::value
constexpr const T & value() const &
Definition: Optional.h:281
llvm::SmallVectorImpl< std::string >
llvm::VFABI::tryDemangleForVFABI
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:317
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:963
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::VFABI::_LLVM_Scalarize_
static constexpr const char * _LLVM_Scalarize_
Prefix for internal name redirection for vector function that tells the compiler to scalarize the cal...
Definition: VectorUtils.h:139
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:971
llvm::VFISAKind::AVX
@ AVX
llvm::InterleavedAccessInfo::invalidateGroups
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:779
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1297
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::VFDatabase::VFDatabase
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:261
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:635
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1251
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1504
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::VFParameter::operator==
bool operator==(const VFParameter &Other) const
Definition: VectorUtils.h:69
llvm::InterleavedAccessInfo::getInterleaveGroup
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:804