LLVM  15.0.0git
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallVector.h"
20 
21 namespace llvm {
22 class TargetLibraryInfo;
23 
24 /// Describes the type of Parameters
25 enum class VFParamKind {
26  Vector, // No semantic information.
27  OMP_Linear, // declare simd linear(i)
28  OMP_LinearRef, // declare simd linear(ref(i))
29  OMP_LinearVal, // declare simd linear(val(i))
30  OMP_LinearUVal, // declare simd linear(uval(i))
31  OMP_LinearPos, // declare simd linear(i:c) uniform(c)
32  OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
33  OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
34  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c)
35  OMP_Uniform, // declare simd uniform(i)
36  GlobalPredicate, // Global logical predicate that acts on all lanes
37  // of the input and output mask concurrently. For
38  // example, it is implied by the `M` token in the
39  // Vector Function ABI mangled name.
40  Unknown
41 };
42 
43 /// Describes the type of Instruction Set Architecture
44 enum class VFISAKind {
45  AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
46  SVE, // AArch64 Scalable Vector Extension
47  SSE, // x86 SSE
48  AVX, // x86 AVX
49  AVX2, // x86 AVX2
50  AVX512, // x86 AVX512
51  LLVM, // LLVM internal ISA for functions that are not
52  // attached to an existing ABI via name mangling.
53  Unknown // Unknown ISA
54 };
55 
56 /// Encapsulates information needed to describe a parameter.
57 ///
58 /// The description of the parameter is not linked directly to
59 /// OpenMP or any other vector function description. This structure
60 /// is extendible to handle other paradigms that describe vector
61 /// functions and their parameters.
62 struct VFParameter {
63  unsigned ParamPos; // Parameter Position in Scalar Function.
64  VFParamKind ParamKind; // Kind of Parameter.
65  int LinearStepOrPos = 0; // Step or Position of the Parameter.
66  Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
67 
68  // Comparison operator.
69  bool operator==(const VFParameter &Other) const {
70  return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
71  std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
72  Other.Alignment);
73  }
74 };
75 
76 /// Contains the information about the kind of vectorization
77 /// available.
78 ///
79 /// This object in independent on the paradigm used to
80 /// represent vector functions. in particular, it is not attached to
81 /// any target-specific ABI.
82 struct VFShape {
83  ElementCount VF; // Vectorization factor.
84  SmallVector<VFParameter, 8> Parameters; // List of parameter information.
85  // Comparison operator.
86  bool operator==(const VFShape &Other) const {
87  return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters);
88  }
89 
90  /// Update the parameter in position P.ParamPos to P.
92  assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
93  Parameters[P.ParamPos] = P;
94  assert(hasValidParameterList() && "Invalid parameter list");
95  }
96 
97  // Retrieve the VFShape that can be used to map a (scalar) function to itself,
98  // with VF = 1.
99  static VFShape getScalarShape(const CallInst &CI) {
100  return VFShape::get(CI, ElementCount::getFixed(1),
101  /*HasGlobalPredicate*/ false);
102  }
103 
104  // Retrieve the basic vectorization shape of the function, where all
105  // parameters are mapped to VFParamKind::Vector with \p EC
106  // lanes. Specifies whether the function has a Global Predicate
107  // argument via \p HasGlobalPred.
108  static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
110  for (unsigned I = 0; I < CI.arg_size(); ++I)
112  if (HasGlobalPred)
113  Parameters.push_back(
115 
116  return {EC, Parameters};
117  }
118  /// Validation check on the Parameters in the VFShape.
119  bool hasValidParameterList() const;
120 };
121 
122 /// Holds the VFShape for a specific scalar to vector function mapping.
123 struct VFInfo {
124  VFShape Shape; /// Classification of the vector function.
125  std::string ScalarName; /// Scalar Function Name.
126  std::string VectorName; /// Vector Function Name associated to this VFInfo.
127  VFISAKind ISA; /// Instruction Set Architecture.
128 };
129 
130 namespace VFABI {
131 /// LLVM Internal VFABI ISA token for vector functions.
132 static constexpr char const *_LLVM_ = "_LLVM_";
133 /// Prefix for internal name redirection for vector function that
134 /// tells the compiler to scalarize the call using the scalar name
135 /// of the function. For example, a mangled name like
136 /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
137 /// vectorizer to vectorize the scalar call `foo`, and to scalarize
138 /// it once vectorization is done.
139 static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
140 
141 /// Function to construct a VFInfo out of a mangled names in the
142 /// following format:
143 ///
144 /// <VFABI_name>{(<redirection>)}
145 ///
146 /// where <VFABI_name> is the name of the vector function, mangled according
147 /// to the rules described in the Vector Function ABI of the target vector
148 /// extension (or <isa> from now on). The <VFABI_name> is in the following
149 /// format:
150 ///
151 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
152 ///
153 /// This methods support demangling rules for the following <isa>:
154 ///
155 /// * AArch64: https://developer.arm.com/docs/101129/latest
156 ///
157 /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
158 /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
159 ///
160 /// \param MangledName -> input string in the format
161 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
162 /// \param M -> Module used to retrieve informations about the vector
163 /// function that are not possible to retrieve from the mangled
164 /// name. At the moment, this parameter is needed only to retrieve the
165 /// Vectorization Factor of scalable vector functions from their
166 /// respective IR declarations.
168 
169 /// This routine mangles the given VectorName according to the LangRef
170 /// specification for vector-function-abi-variant attribute and is specific to
171 /// the TLI mappings. It is the responsibility of the caller to make sure that
172 /// this is only used if all parameters in the vector function are vector type.
173 /// This returned string holds scalar-to-vector mapping:
174 /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
175 ///
176 /// where:
177 ///
178 /// <isa> = "_LLVM_"
179 /// <mask> = "N". Note: TLI does not support masked interfaces.
180 /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
181 /// field of the `VecDesc` struct. If the number of lanes is scalable
182 /// then 'x' is printed instead.
183 /// <vparams> = "v", as many as are the numArgs.
184 /// <scalarname> = the name of the scalar function.
185 /// <vectorname> = the name of the vector function.
186 std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
187  unsigned numArgs, ElementCount VF);
188 
189 /// Retrieve the `VFParamKind` from a string token.
191 
192 // Name of the attribute where the variant mappings are stored.
193 static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
194 
195 /// Populates a set of strings representing the Vector Function ABI variants
196 /// associated to the CallInst CI. If the CI does not contain the
197 /// vector-function-abi-variant attribute, we return without populating
198 /// VariantMappings, i.e. callers of getVectorVariantNames need not check for
199 /// the presence of the attribute (see InjectTLIMappings).
200 void getVectorVariantNames(const CallInst &CI,
201  SmallVectorImpl<std::string> &VariantMappings);
202 } // end namespace VFABI
203 
204 /// The Vector Function Database.
205 ///
206 /// Helper class used to find the vector functions associated to a
207 /// scalar CallInst.
208 class VFDatabase {
209  /// The Module of the CallInst CI.
210  const Module *M;
211  /// The CallInst instance being queried for scalar to vector mappings.
212  const CallInst &CI;
213  /// List of vector functions descriptors associated to the call
214  /// instruction.
215  const SmallVector<VFInfo, 8> ScalarToVectorMappings;
216 
217  /// Retrieve the scalar-to-vector mappings associated to the rule of
218  /// a vector Function ABI.
219  static void getVFABIMappings(const CallInst &CI,
221  if (!CI.getCalledFunction())
222  return;
223 
224  const StringRef ScalarName = CI.getCalledFunction()->getName();
225 
226  SmallVector<std::string, 8> ListOfStrings;
227  // The check for the vector-function-abi-variant attribute is done when
228  // retrieving the vector variant names here.
229  VFABI::getVectorVariantNames(CI, ListOfStrings);
230  if (ListOfStrings.empty())
231  return;
232  for (const auto &MangledName : ListOfStrings) {
233  const Optional<VFInfo> Shape =
234  VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
235  // A match is found via scalar and vector names, and also by
236  // ensuring that the variant described in the attribute has a
237  // corresponding definition or declaration of the vector
238  // function in the Module M.
239  if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) {
240  assert(CI.getModule()->getFunction(Shape.getValue().VectorName) &&
241  "Vector function is missing.");
242  Mappings.push_back(Shape.getValue());
243  }
244  }
245  }
246 
247 public:
248  /// Retrieve all the VFInfo instances associated to the CallInst CI.
251 
252  // Get mappings from the Vector Function ABI variants.
253  getVFABIMappings(CI, Ret);
254 
255  // Other non-VFABI variants should be retrieved here.
256 
257  return Ret;
258  }
259 
260  /// Constructor, requires a CallInst instance.
262  : M(CI.getModule()), CI(CI),
263  ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
264  /// \defgroup VFDatabase query interface.
265  ///
266  /// @{
267  /// Retrieve the Function with VFShape \p Shape.
268  Function *getVectorizedFunction(const VFShape &Shape) const {
269  if (Shape == VFShape::getScalarShape(CI))
270  return CI.getCalledFunction();
271 
272  for (const auto &Info : ScalarToVectorMappings)
273  if (Info.Shape == Shape)
274  return M->getFunction(Info.VectorName);
275 
276  return nullptr;
277  }
278  /// @}
279 };
280 
281 template <typename T> class ArrayRef;
282 class DemandedBits;
283 class GetElementPtrInst;
284 template <typename InstTy> class InterleaveGroup;
285 class IRBuilderBase;
286 class Loop;
287 class ScalarEvolution;
288 class TargetTransformInfo;
289 class Type;
290 class Value;
291 
292 namespace Intrinsic {
293 typedef unsigned ID;
294 }
295 
296 /// A helper function for converting Scalar types to vector types. If
297 /// the incoming type is void, we return void. If the EC represents a
298 /// scalar, we return the scalar type.
299 inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
300  if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
301  return Scalar;
302  return VectorType::get(Scalar, EC);
303 }
304 
305 inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
306  return ToVectorTy(Scalar, ElementCount::getFixed(VF));
307 }
308 
309 /// Identify if the intrinsic is trivially vectorizable.
310 /// This method returns true if the intrinsic's argument types are all scalars
311 /// for the scalar form of the intrinsic and all vectors (or scalars handled by
312 /// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
314 
315 /// Identifies if the vector form of the intrinsic has a scalar operand.
317  unsigned ScalarOpdIdx);
318 
319 /// Identifies if the vector form of the intrinsic has a operand that has
320 /// an overloaded type.
322 
323 /// Returns intrinsic ID for call.
324 /// For the input call instruction it finds mapping intrinsic and returns
325 /// its intrinsic ID, in case it does not found it return not_intrinsic.
326 Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
327  const TargetLibraryInfo *TLI);
328 
329 /// Find the operand of the GEP that should be checked for consecutive
330 /// stores. This ignores trailing indices that have no effect on the final
331 /// pointer.
332 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
333 
334 /// If the argument is a GEP, then returns the operand identified by
335 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
336 /// operand, it returns that instead.
337 Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
338 
339 /// If a value has only one user that is a CastInst, return it.
340 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
341 
342 /// Get the stride of a pointer access in a loop. Looks for symbolic
343 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
344 Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
345 
346 /// Given a vector and an element number, see if the scalar value is
347 /// already around as a register, for example if it were inserted then extracted
348 /// from the vector.
349 Value *findScalarElement(Value *V, unsigned EltNo);
350 
351 /// If all non-negative \p Mask elements are the same value, return that value.
352 /// If all elements are negative (undefined) or \p Mask contains different
353 /// non-negative values, return -1.
354 int getSplatIndex(ArrayRef<int> Mask);
355 
356 /// Get splat value if the input is a splat vector or return nullptr.
357 /// The value may be extracted from a splat constants vector or from
358 /// a sequence of instructions that broadcast a single value into a vector.
359 Value *getSplatValue(const Value *V);
360 
361 /// Return true if each element of the vector value \p V is poisoned or equal to
362 /// every other non-poisoned element. If an index element is specified, either
363 /// every element of the vector is poisoned or the element at that index is not
364 /// poisoned and equal to every other non-poisoned element.
365 /// This may be more powerful than the related getSplatValue() because it is
366 /// not limited by finding a scalar source value to a splatted vector.
367 bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
368 
369 /// Replace each shuffle mask index with the scaled sequential indices for an
370 /// equivalent mask of narrowed elements. Mask elements that are less than 0
371 /// (sentinel values) are repeated in the output mask.
372 ///
373 /// Example with Scale = 4:
374 /// <4 x i32> <3, 2, 0, -1> -->
375 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
376 ///
377 /// This is the reverse process of widening shuffle mask elements, but it always
378 /// succeeds because the indexes can always be multiplied (scaled up) to map to
379 /// narrower vector elements.
380 void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
381  SmallVectorImpl<int> &ScaledMask);
382 
383 /// Try to transform a shuffle mask by replacing elements with the scaled index
384 /// for an equivalent mask of widened elements. If all mask elements that would
385 /// map to a wider element of the new mask are the same negative number
386 /// (sentinel value), that element of the new mask is the same value. If any
387 /// element in a given slice is negative and some other element in that slice is
388 /// not the same value, return false (partial matches with sentinel values are
389 /// not allowed).
390 ///
391 /// Example with Scale = 4:
392 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
393 /// <4 x i32> <3, 2, 0, -1>
394 ///
395 /// This is the reverse process of narrowing shuffle mask elements if it
396 /// succeeds. This transform is not always possible because indexes may not
397 /// divide evenly (scale down) to map to wider vector elements.
398 bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
399  SmallVectorImpl<int> &ScaledMask);
400 
401 /// Splits and processes shuffle mask depending on the number of input and
402 /// output registers. The function does 2 main things: 1) splits the
403 /// source/destination vectors into real registers; 2) do the mask analysis to
404 /// identify which real registers are permuted. Then the function processes
405 /// resulting registers mask using provided action items. If no input register
406 /// is defined, \p NoInputAction action is used. If only 1 input register is
407 /// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
408 /// process > 2 input registers and masks.
409 /// \param Mask Original shuffle mask.
410 /// \param NumOfSrcRegs Number of source registers.
411 /// \param NumOfDestRegs Number of destination registers.
412 /// \param NumOfUsedRegs Number of actually used destination registers.
414  ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
415  unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
416  function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
417  function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
418 
419 /// Compute a map of integer instructions to their minimum legal type
420 /// size.
421 ///
422 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
423 /// type (e.g. i32) whenever arithmetic is performed on them.
424 ///
425 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
426 /// the arithmetic type down again. However InstCombine refuses to create
427 /// illegal types, so for targets without i8 or i16 registers, the lengthening
428 /// and shrinking remains.
429 ///
430 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
431 /// their scalar equivalents do not, so during vectorization it is important to
432 /// remove these lengthens and truncates when deciding the profitability of
433 /// vectorization.
434 ///
435 /// This function analyzes the given range of instructions and determines the
436 /// minimum type size each can be converted to. It attempts to remove or
437 /// minimize type size changes across each def-use chain, so for example in the
438 /// following code:
439 ///
440 /// %1 = load i8, i8*
441 /// %2 = add i8 %1, 2
442 /// %3 = load i16, i16*
443 /// %4 = zext i8 %2 to i32
444 /// %5 = zext i16 %3 to i32
445 /// %6 = add i32 %4, %5
446 /// %7 = trunc i32 %6 to i16
447 ///
448 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
449 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
450 ///
451 /// If the optional TargetTransformInfo is provided, this function tries harder
452 /// to do less work by only looking at illegal types.
453 MapVector<Instruction*, uint64_t>
454 computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
455  DemandedBits &DB,
456  const TargetTransformInfo *TTI=nullptr);
457 
458 /// Compute the union of two access-group lists.
459 ///
460 /// If the list contains just one access group, it is returned directly. If the
461 /// list is empty, returns nullptr.
462 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
463 
464 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
465 /// are both in. If either instruction does not access memory at all, it is
466 /// considered to be in every list.
467 ///
468 /// If the list contains just one access group, it is returned directly. If the
469 /// list is empty, returns nullptr.
470 MDNode *intersectAccessGroups(const Instruction *Inst1,
471  const Instruction *Inst2);
472 
473 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
474 /// MD_nontemporal, MD_access_group].
475 /// For K in Kinds, we get the MDNode for K from each of the
476 /// elements of VL, compute their "intersection" (i.e., the most generic
477 /// metadata value that covers all of the individual values), and set I's
478 /// metadata for M equal to the intersection value.
479 ///
480 /// This function always sets a (possibly null) value for each K in Kinds.
481 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
482 
483 /// Create a mask that filters the members of an interleave group where there
484 /// are gaps.
485 ///
486 /// For example, the mask for \p Group with interleave-factor 3
487 /// and \p VF 4, that has only its first member present is:
488 ///
489 /// <1,0,0,1,0,0,1,0,0,1,0,0>
490 ///
491 /// Note: The result is a mask of 0's and 1's, as opposed to the other
492 /// create[*]Mask() utilities which create a shuffle mask (mask that
493 /// consists of indices).
494 Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
495  const InterleaveGroup<Instruction> &Group);
496 
497 /// Create a mask with replicated elements.
498 ///
499 /// This function creates a shuffle mask for replicating each of the \p VF
500 /// elements in a vector \p ReplicationFactor times. It can be used to
501 /// transform a mask of \p VF elements into a mask of
502 /// \p VF * \p ReplicationFactor elements used by a predicated
503 /// interleaved-group of loads/stores whose Interleaved-factor ==
504 /// \p ReplicationFactor.
505 ///
506 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
507 ///
508 /// <0,0,0,1,1,1,2,2,2,3,3,3>
509 llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
510  unsigned VF);
511 
512 /// Create an interleave shuffle mask.
513 ///
514 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
515 /// vectorization factor \p VF into a single wide vector. The mask is of the
516 /// form:
517 ///
518 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
519 ///
520 /// For example, the mask for VF = 4 and NumVecs = 2 is:
521 ///
522 /// <0, 4, 1, 5, 2, 6, 3, 7>.
523 llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
524 
525 /// Create a stride shuffle mask.
526 ///
527 /// This function creates a shuffle mask whose elements begin at \p Start and
528 /// are incremented by \p Stride. The mask can be used to deinterleave an
529 /// interleaved vector into separate vectors of vectorization factor \p VF. The
530 /// mask is of the form:
531 ///
532 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
533 ///
534 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
535 ///
536 /// <0, 2, 4, 6>
537 llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
538  unsigned VF);
539 
540 /// Create a sequential shuffle mask.
541 ///
542 /// This function creates shuffle mask whose elements are sequential and begin
543 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
544 /// NumUndefs undef values. The mask is of the form:
545 ///
546 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
547 ///
548 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
549 ///
550 /// <0, 1, 2, 3, undef, undef, undef, undef>
552 createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
553 
554 /// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
555 /// mask assuming both operands are identical. This assumes that the unary
556 /// shuffle will use elements from operand 0 (operand 1 will be unused).
558  unsigned NumElts);
559 
560 /// Concatenate a list of vectors.
561 ///
562 /// This function generates code that concatenate the vectors in \p Vecs into a
563 /// single large vector. The number of vectors should be greater than one, and
564 /// their element types should be the same. The number of elements in the
565 /// vectors should also be the same; however, if the last vector has fewer
566 /// elements, it will be padded with undefs.
567 Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
568 
569 /// Given a mask vector of i1, Return true if all of the elements of this
570 /// predicate mask are known to be false or undef. That is, return true if all
571 /// lanes can be assumed inactive.
573 
574 /// Given a mask vector of i1, Return true if all of the elements of this
575 /// predicate mask are known to be true or undef. That is, return true if all
576 /// lanes can be assumed active.
578 
579 /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
580 /// for each lane which may be active.
582 
583 /// The group of interleaved loads/stores sharing the same stride and
584 /// close to each other.
585 ///
586 /// Each member in this group has an index starting from 0, and the largest
587 /// index should be less than interleaved factor, which is equal to the absolute
588 /// value of the access's stride.
589 ///
590 /// E.g. An interleaved load group of factor 4:
591 /// for (unsigned i = 0; i < 1024; i+=4) {
592 /// a = A[i]; // Member of index 0
593 /// b = A[i+1]; // Member of index 1
594 /// d = A[i+3]; // Member of index 3
595 /// ...
596 /// }
597 ///
598 /// An interleaved store group of factor 4:
599 /// for (unsigned i = 0; i < 1024; i+=4) {
600 /// ...
601 /// A[i] = a; // Member of index 0
602 /// A[i+1] = b; // Member of index 1
603 /// A[i+2] = c; // Member of index 2
604 /// A[i+3] = d; // Member of index 3
605 /// }
606 ///
607 /// Note: the interleaved load group could have gaps (missing members), but
608 /// the interleaved store group doesn't allow gaps.
609 template <typename InstTy> class InterleaveGroup {
610 public:
611  InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
612  : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
613  InsertPos(nullptr) {}
614 
615  InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
616  : Alignment(Alignment), InsertPos(Instr) {
617  Factor = std::abs(Stride);
618  assert(Factor > 1 && "Invalid interleave factor");
619 
620  Reverse = Stride < 0;
621  Members[0] = Instr;
622  }
623 
624  bool isReverse() const { return Reverse; }
625  uint32_t getFactor() const { return Factor; }
626  Align getAlign() const { return Alignment; }
627  uint32_t getNumMembers() const { return Members.size(); }
628 
629  /// Try to insert a new member \p Instr with index \p Index and
630  /// alignment \p NewAlign. The index is related to the leader and it could be
631  /// negative if it is the new leader.
632  ///
633  /// \returns false if the instruction doesn't belong to the group.
634  bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
635  // Make sure the key fits in an int32_t.
636  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
637  if (!MaybeKey)
638  return false;
639  int32_t Key = *MaybeKey;
640 
641  // Skip if the key is used for either the tombstone or empty special values.
644  return false;
645 
646  // Skip if there is already a member with the same index.
647  if (Members.find(Key) != Members.end())
648  return false;
649 
650  if (Key > LargestKey) {
651  // The largest index is always less than the interleave factor.
652  if (Index >= static_cast<int32_t>(Factor))
653  return false;
654 
655  LargestKey = Key;
656  } else if (Key < SmallestKey) {
657 
658  // Make sure the largest index fits in an int32_t.
659  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
660  if (!MaybeLargestIndex)
661  return false;
662 
663  // The largest index is always less than the interleave factor.
664  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
665  return false;
666 
667  SmallestKey = Key;
668  }
669 
670  // It's always safe to select the minimum alignment.
671  Alignment = std::min(Alignment, NewAlign);
672  Members[Key] = Instr;
673  return true;
674  }
675 
676  /// Get the member with the given index \p Index
677  ///
678  /// \returns nullptr if contains no such member.
679  InstTy *getMember(uint32_t Index) const {
680  int32_t Key = SmallestKey + Index;
681  return Members.lookup(Key);
682  }
683 
684  /// Get the index for the given member. Unlike the key in the member
685  /// map, the index starts from 0.
686  uint32_t getIndex(const InstTy *Instr) const {
687  for (auto I : Members) {
688  if (I.second == Instr)
689  return I.first - SmallestKey;
690  }
691 
692  llvm_unreachable("InterleaveGroup contains no such member");
693  }
694 
695  InstTy *getInsertPos() const { return InsertPos; }
696  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
697 
698  /// Add metadata (e.g. alias info) from the instructions in this group to \p
699  /// NewInst.
700  ///
701  /// FIXME: this function currently does not add noalias metadata a'la
702  /// addNewMedata. To do that we need to compute the intersection of the
703  /// noalias info from all members.
704  void addMetadata(InstTy *NewInst) const;
705 
706  /// Returns true if this Group requires a scalar iteration to handle gaps.
707  bool requiresScalarEpilogue() const {
708  // If the last member of the Group exists, then a scalar epilog is not
709  // needed for this group.
710  if (getMember(getFactor() - 1))
711  return false;
712 
713  // We have a group with gaps. It therefore can't be a reversed access,
714  // because such groups get invalidated (TODO).
715  assert(!isReverse() && "Group should have been invalidated");
716 
717  // This is a group of loads, with gaps, and without a last-member
718  return true;
719  }
720 
721 private:
722  uint32_t Factor; // Interleave Factor.
723  bool Reverse;
724  Align Alignment;
726  int32_t SmallestKey = 0;
727  int32_t LargestKey = 0;
728 
729  // To avoid breaking dependences, vectorized instructions of an interleave
730  // group should be inserted at either the first load or the last store in
731  // program order.
732  //
733  // E.g. %even = load i32 // Insert Position
734  // %add = add i32 %even // Use of %even
735  // %odd = load i32
736  //
737  // store i32 %even
738  // %odd = add i32 // Def of %odd
739  // store i32 %odd // Insert Position
740  InstTy *InsertPos;
741 };
742 
743 /// Drive the analysis of interleaved memory accesses in the loop.
744 ///
745 /// Use this class to analyze interleaved accesses only when we can vectorize
746 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
747 /// on interleaved accesses is unsafe.
748 ///
749 /// The analysis collects interleave groups and records the relationships
750 /// between the member and the group in a map.
752 public:
754  DominatorTree *DT, LoopInfo *LI,
755  const LoopAccessInfo *LAI)
756  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
757 
759 
760  /// Analyze the interleaved accesses and collect them in interleave
761  /// groups. Substitute symbolic strides using \p Strides.
762  /// Consider also predicated loads/stores in the analysis if
763  /// \p EnableMaskedInterleavedGroup is true.
764  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
765 
766  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
767  /// contrary to original assumption. Although we currently prevent group
768  /// formation for predicated accesses, we may be able to relax this limitation
769  /// in the future once we handle more complicated blocks. Returns true if any
770  /// groups were invalidated.
772  if (InterleaveGroups.empty()) {
773  assert(
774  !RequiresScalarEpilogue &&
775  "RequiresScalarEpilog should not be set without interleave groups");
776  return false;
777  }
778 
779  InterleaveGroupMap.clear();
780  for (auto *Ptr : InterleaveGroups)
781  delete Ptr;
782  InterleaveGroups.clear();
783  RequiresScalarEpilogue = false;
784  return true;
785  }
786 
787  /// Check if \p Instr belongs to any interleave group.
788  bool isInterleaved(Instruction *Instr) const {
789  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
790  }
791 
792  /// Get the interleave group that \p Instr belongs to.
793  ///
794  /// \returns nullptr if doesn't have such group.
796  getInterleaveGroup(const Instruction *Instr) const {
797  return InterleaveGroupMap.lookup(Instr);
798  }
799 
802  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
803  }
804 
805  /// Returns true if an interleaved group that may access memory
806  /// out-of-bounds requires a scalar epilogue iteration for correctness.
807  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
808 
809  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
810  /// happen when optimizing for size forbids a scalar epilogue, and the gap
811  /// cannot be filtered by masking the load/store.
813 
814 private:
815  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
816  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
817  /// The interleaved access analysis can also add new predicates (for example
818  /// by versioning strides of pointers).
820 
821  Loop *TheLoop;
822  DominatorTree *DT;
823  LoopInfo *LI;
824  const LoopAccessInfo *LAI;
825 
826  /// True if the loop may contain non-reversed interleaved groups with
827  /// out-of-bounds accesses. We ensure we don't speculatively access memory
828  /// out-of-bounds by executing at least one scalar epilogue iteration.
829  bool RequiresScalarEpilogue = false;
830 
831  /// Holds the relationships between the members and the interleave group.
833 
834  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
835 
836  /// Holds dependences among the memory accesses in the loop. It maps a source
837  /// access to a set of dependent sink accesses.
839 
840  /// The descriptor for a strided memory access.
841  struct StrideDescriptor {
842  StrideDescriptor() = default;
843  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
844  Align Alignment)
845  : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
846 
847  // The access's stride. It is negative for a reverse access.
848  int64_t Stride = 0;
849 
850  // The scalar expression of this access.
851  const SCEV *Scev = nullptr;
852 
853  // The size of the memory object.
854  uint64_t Size = 0;
855 
856  // The alignment of this access.
857  Align Alignment;
858  };
859 
860  /// A type for holding instructions and their stride descriptors.
861  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
862 
863  /// Create a new interleave group with the given instruction \p Instr,
864  /// stride \p Stride and alignment \p Align.
865  ///
866  /// \returns the newly created interleave group.
867  InterleaveGroup<Instruction> *
868  createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
869  assert(!InterleaveGroupMap.count(Instr) &&
870  "Already in an interleaved access group");
871  InterleaveGroupMap[Instr] =
872  new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
873  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
874  return InterleaveGroupMap[Instr];
875  }
876 
877  /// Release the group and remove all the relationships.
878  void releaseGroup(InterleaveGroup<Instruction> *Group) {
879  for (unsigned i = 0; i < Group->getFactor(); i++)
880  if (Instruction *Member = Group->getMember(i))
881  InterleaveGroupMap.erase(Member);
882 
883  InterleaveGroups.erase(Group);
884  delete Group;
885  }
886 
887  /// Collect all the accesses with a constant stride in program order.
888  void collectConstStrideAccesses(
889  MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
890  const ValueToValueMap &Strides);
891 
892  /// Returns true if \p Stride is allowed in an interleaved group.
893  static bool isStrided(int Stride);
894 
895  /// Returns true if \p BB is a predicated block.
896  bool isPredicated(BasicBlock *BB) const {
897  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
898  }
899 
900  /// Returns true if LoopAccessInfo can be used for dependence queries.
901  bool areDependencesValid() const {
902  return LAI && LAI->getDepChecker().getDependences();
903  }
904 
905  /// Returns true if memory accesses \p A and \p B can be reordered, if
906  /// necessary, when constructing interleaved groups.
907  ///
908  /// \p A must precede \p B in program order. We return false if reordering is
909  /// not necessary or is prevented because \p A and \p B may be dependent.
910  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
911  StrideEntry *B) const {
912  // Code motion for interleaved accesses can potentially hoist strided loads
913  // and sink strided stores. The code below checks the legality of the
914  // following two conditions:
915  //
916  // 1. Potentially moving a strided load (B) before any store (A) that
917  // precedes B, or
918  //
919  // 2. Potentially moving a strided store (A) after any load or store (B)
920  // that A precedes.
921  //
922  // It's legal to reorder A and B if we know there isn't a dependence from A
923  // to B. Note that this determination is conservative since some
924  // dependences could potentially be reordered safely.
925 
926  // A is potentially the source of a dependence.
927  auto *Src = A->first;
928  auto SrcDes = A->second;
929 
930  // B is potentially the sink of a dependence.
931  auto *Sink = B->first;
932  auto SinkDes = B->second;
933 
934  // Code motion for interleaved accesses can't violate WAR dependences.
935  // Thus, reordering is legal if the source isn't a write.
936  if (!Src->mayWriteToMemory())
937  return true;
938 
939  // At least one of the accesses must be strided.
940  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
941  return true;
942 
943  // If dependence information is not available from LoopAccessInfo,
944  // conservatively assume the instructions can't be reordered.
945  if (!areDependencesValid())
946  return false;
947 
948  // If we know there is a dependence from source to sink, assume the
949  // instructions can't be reordered. Otherwise, reordering is legal.
950  return Dependences.find(Src) == Dependences.end() ||
951  !Dependences.lookup(Src).count(Sink);
952  }
953 
954  /// Collect the dependences from LoopAccessInfo.
955  ///
956  /// We process the dependences once during the interleaved access analysis to
957  /// enable constant-time dependence queries.
958  void collectDependences() {
959  if (!areDependencesValid())
960  return;
961  auto *Deps = LAI->getDepChecker().getDependences();
962  for (auto Dep : *Deps)
963  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
964  }
965 };
966 
967 } // llvm namespace
968 
969 #endif
i
i
Definition: README.txt:29
llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition: VectorUtils.h:626
llvm::VFInfo::Shape
VFShape Shape
Definition: VectorUtils.h:124
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1043
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::VFParameter::Alignment
Align Alignment
Definition: VectorUtils.h:66
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::VFParamKind::Vector
@ Vector
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
llvm::VFISAKind::AVX512
@ AVX512
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:634
llvm::ElementCount
Definition: TypeSize.h:404
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:135
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:152
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:1071
llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:611
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2176
llvm::VFISAKind::Unknown
@ Unknown
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::InterleavedAccessInfo::InterleavedAccessInfo
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:753
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:371
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
LoopAccessAnalysis.h
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1491
MapVector.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1017
llvm::VFParameter::ParamKind
VFParamKind ParamKind
Definition: VectorUtils.h:64
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:101
llvm::VFShape::getScalarShape
static VFShape getScalarShape(const CallInst &CI)
Definition: VectorUtils.h:99
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:304
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:615
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::VFParameter::ParamPos
unsigned ParamPos
Definition: VectorUtils.h:63
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:432
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:193
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:947
llvm::Optional
Definition: APInt.h:33
llvm::isVectorIntrinsicWithOverloadTypeAtArg
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
Definition: VectorUtils.cpp:119
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:840
llvm::VFShape::Parameters
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:84
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:175
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:772
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:284
llvm::VFISAKind
VFISAKind
Describes the type of Instruction Set Architecture.
Definition: VectorUtils.h:44
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:312
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1512
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:171
llvm::VFISAKind::SSE
@ SSE
llvm::VFISAKind::SVE
@ SVE
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::DenseMapInfo
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: APInt.h:34
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:386
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1475
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1434
llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:615
llvm::VFParamKind
VFParamKind
Describes the type of Parameters.
Definition: VectorUtils.h:25
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:915
llvm::VFInfo::VectorName
std::string VectorName
Scalar Function Name.
Definition: VectorUtils.h:126
llvm::checkedSub
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:57
llvm::VFDatabase
The Vector Function Database.
Definition: VectorUtils.h:208
CheckedArithmetic.h
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2334
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:906
llvm::VFInfo
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:123
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:209
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::InterleavedAccessInfo::~InterleavedAccessInfo
~InterleavedAccessInfo()
Definition: VectorUtils.h:758
llvm::ToVectorTy
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:299
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
llvm::VFISAKind::LLVM
@ LLVM
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:486
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::VFISAKind::AVX2
@ AVX2
llvm::Instruction
Definition: Instruction.h:42
llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:751
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition: LoopAccessAnalysis.h:220
llvm::VFShape
Contains the information about the kind of vectorization available.
Definition: VectorUtils.h:82
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:624
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:679
llvm::InterleavedAccessInfo::getInterleaveGroups
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:801
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::InterleavedAccessInfo::isInterleaved
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:788
llvm::checkedAdd
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:48
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:696
llvm::VFParameter
Encapsulates information needed to describe a parameter.
Definition: VectorUtils.h:62
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:132
llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition: VectorUtils.h:695
llvm::VFParameter::LinearStepOrPos
int LinearStepOrPos
Definition: VectorUtils.h:65
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:686
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
uint64_t
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:625
llvm::VFInfo::ISA
VFISAKind ISA
Vector Function Name associated to this VFInfo.
Definition: VectorUtils.h:127
llvm::VFDatabase::getVectorizedFunction
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:268
llvm::DenseMap< int32_t, InstTy * >
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:559
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:989
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1166
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Optional::getValue
constexpr const T & getValue() const &
Definition: Optional.h:306
llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:886
llvm::VFInfo::ScalarName
std::string ScalarName
Classification of the vector function.
Definition: VectorUtils.h:125
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::VFShape::operator==
bool operator==(const VFShape &Other) const
Definition: VectorUtils.h:86
llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition: LoopAccessAnalysis.h:601
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:793
llvm::LoopInfo
Definition: LoopInfo.h:1102
llvm::VFParamKind::Unknown
@ Unknown
llvm::VFISAKind::AdvancedSIMD
@ AdvancedSIMD
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::cl::Sink
@ Sink
Definition: CommandLine.h:167
llvm::processShuffleMasks
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition: VectorUtils.cpp:504
uint32_t
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:93
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:906
llvm::InterleaveGroup::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:707
llvm::VFABI::getVFParamKindFromString
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
Definition: VFABIDemangling.cpp:458
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:453
llvm::VFShape::VF
ElementCount VF
Definition: VectorUtils.h:83
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
llvm::VFShape::updateParam
void updateParam(VFParameter P)
Update the parameter in position P.ParamPos to P.
Definition: VectorUtils.h:91
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:101
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:285
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
llvm::InterleavedAccessInfo::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:807
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:349
SmallVector.h
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:193
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:176
llvm::VFShape::get
static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred)
Definition: VectorUtils.h:108
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::SmallVectorImpl< std::string >
llvm::VFABI::tryDemangleForVFABI
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:317
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:926
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::VFABI::_LLVM_Scalarize_
static constexpr const char * _LLVM_Scalarize_
Prefix for internal name redirection for vector function that tells the compiler to scalarize the cal...
Definition: VectorUtils.h:139
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:934
llvm::VFISAKind::AVX
@ AVX
llvm::InterleavedAccessInfo::invalidateGroups
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:771
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::VFDatabase::VFDatabase
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:261
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:627
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1236
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1461
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::VFParameter::operator==
bool operator==(const VFParameter &Other) const
Definition: VectorUtils.h:69
llvm::InterleavedAccessInfo::getInterleaveGroup
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:796