LLVM  10.0.0svn
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
19 #include "llvm/IR/IRBuilder.h"
21 
22 namespace llvm {
23 
24 template <typename T> class ArrayRef;
25 class DemandedBits;
26 class GetElementPtrInst;
27 template <typename InstTy> class InterleaveGroup;
28 class Loop;
29 class ScalarEvolution;
31 class Type;
32 class Value;
33 
34 namespace Intrinsic {
35 enum ID : unsigned;
36 }
37 
38 /// Identify if the intrinsic is trivially vectorizable.
39 /// This method returns true if the intrinsic's argument types are all scalars
40 /// for the scalar form of the intrinsic and all vectors (or scalars handled by
41 /// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
43 
44 /// Identifies if the vector form of the intrinsic has a scalar operand.
45 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
46 
47 /// Returns intrinsic ID for call.
48 /// For the input call instruction it finds mapping intrinsic and returns
49 /// its intrinsic ID, in case it does not found it return not_intrinsic.
51  const TargetLibraryInfo *TLI);
52 
53 /// Find the operand of the GEP that should be checked for consecutive
54 /// stores. This ignores trailing indices that have no effect on the final
55 /// pointer.
56 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
57 
58 /// If the argument is a GEP, then returns the operand identified by
59 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
60 /// operand, it returns that instead.
62 
63 /// If a value has only one user that is a CastInst, return it.
64 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
65 
66 /// Get the stride of a pointer access in a loop. Looks for symbolic
67 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
69 
70 /// Given a vector and an element number, see if the scalar value is
71 /// already around as a register, for example if it were inserted then extracted
72 /// from the vector.
73 Value *findScalarElement(Value *V, unsigned EltNo);
74 
75 /// Get splat value if the input is a splat vector or return nullptr.
76 /// The value may be extracted from a splat constants vector or from
77 /// a sequence of instructions that broadcast a single value into a vector.
78 const Value *getSplatValue(const Value *V);
79 
80 /// Return true if the input value is known to be a vector with all identical
81 /// elements (potentially including undefined elements).
82 /// This may be more powerful than the related getSplatValue() because it is
83 /// not limited by finding a scalar source value to a splatted vector.
84 bool isSplatValue(const Value *V, unsigned Depth = 0);
85 
86 /// Compute a map of integer instructions to their minimum legal type
87 /// size.
88 ///
89 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
90 /// type (e.g. i32) whenever arithmetic is performed on them.
91 ///
92 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
93 /// the arithmetic type down again. However InstCombine refuses to create
94 /// illegal types, so for targets without i8 or i16 registers, the lengthening
95 /// and shrinking remains.
96 ///
97 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
98 /// their scalar equivalents do not, so during vectorization it is important to
99 /// remove these lengthens and truncates when deciding the profitability of
100 /// vectorization.
101 ///
102 /// This function analyzes the given range of instructions and determines the
103 /// minimum type size each can be converted to. It attempts to remove or
104 /// minimize type size changes across each def-use chain, so for example in the
105 /// following code:
106 ///
107 /// %1 = load i8, i8*
108 /// %2 = add i8 %1, 2
109 /// %3 = load i16, i16*
110 /// %4 = zext i8 %2 to i32
111 /// %5 = zext i16 %3 to i32
112 /// %6 = add i32 %4, %5
113 /// %7 = trunc i32 %6 to i16
114 ///
115 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
116 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
117 ///
118 /// If the optional TargetTransformInfo is provided, this function tries harder
119 /// to do less work by only looking at illegal types.
122  DemandedBits &DB,
123  const TargetTransformInfo *TTI=nullptr);
124 
125 /// Compute the union of two access-group lists.
126 ///
127 /// If the list contains just one access group, it is returned directly. If the
128 /// list is empty, returns nullptr.
129 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
130 
131 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
132 /// are both in. If either instruction does not access memory at all, it is
133 /// considered to be in every list.
134 ///
135 /// If the list contains just one access group, it is returned directly. If the
136 /// list is empty, returns nullptr.
138  const Instruction *Inst2);
139 
140 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
141 /// MD_nontemporal, MD_access_group].
142 /// For K in Kinds, we get the MDNode for K from each of the
143 /// elements of VL, compute their "intersection" (i.e., the most generic
144 /// metadata value that covers all of the individual values), and set I's
145 /// metadata for M equal to the intersection value.
146 ///
147 /// This function always sets a (possibly null) value for each K in Kinds.
149 
150 /// Create a mask that filters the members of an interleave group where there
151 /// are gaps.
152 ///
153 /// For example, the mask for \p Group with interleave-factor 3
154 /// and \p VF 4, that has only its first member present is:
155 ///
156 /// <1,0,0,1,0,0,1,0,0,1,0,0>
157 ///
158 /// Note: The result is a mask of 0's and 1's, as opposed to the other
159 /// create[*]Mask() utilities which create a shuffle mask (mask that
160 /// consists of indices).
161 Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
162  const InterleaveGroup<Instruction> &Group);
163 
164 /// Create a mask with replicated elements.
165 ///
166 /// This function creates a shuffle mask for replicating each of the \p VF
167 /// elements in a vector \p ReplicationFactor times. It can be used to
168 /// transform a mask of \p VF elements into a mask of
169 /// \p VF * \p ReplicationFactor elements used by a predicated
170 /// interleaved-group of loads/stores whose Interleaved-factor ==
171 /// \p ReplicationFactor.
172 ///
173 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
174 ///
175 /// <0,0,0,1,1,1,2,2,2,3,3,3>
176 Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
177  unsigned VF);
178 
179 /// Create an interleave shuffle mask.
180 ///
181 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
182 /// vectorization factor \p VF into a single wide vector. The mask is of the
183 /// form:
184 ///
185 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
186 ///
187 /// For example, the mask for VF = 4 and NumVecs = 2 is:
188 ///
189 /// <0, 4, 1, 5, 2, 6, 3, 7>.
190 Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
191  unsigned NumVecs);
192 
193 /// Create a stride shuffle mask.
194 ///
195 /// This function creates a shuffle mask whose elements begin at \p Start and
196 /// are incremented by \p Stride. The mask can be used to deinterleave an
197 /// interleaved vector into separate vectors of vectorization factor \p VF. The
198 /// mask is of the form:
199 ///
200 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
201 ///
202 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
203 ///
204 /// <0, 2, 4, 6>
205 Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
206  unsigned Stride, unsigned VF);
207 
208 /// Create a sequential shuffle mask.
209 ///
210 /// This function creates shuffle mask whose elements are sequential and begin
211 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
212 /// NumUndefs undef values. The mask is of the form:
213 ///
214 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
215 ///
216 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
217 ///
218 /// <0, 1, 2, 3, undef, undef, undef, undef>
219 Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
220  unsigned NumInts, unsigned NumUndefs);
221 
222 /// Concatenate a list of vectors.
223 ///
224 /// This function generates code that concatenate the vectors in \p Vecs into a
225 /// single large vector. The number of vectors should be greater than one, and
226 /// their element types should be the same. The number of elements in the
227 /// vectors should also be the same; however, if the last vector has fewer
228 /// elements, it will be padded with undefs.
230 
231 /// Given a mask vector of the form <Y x i1>, Return true if all of the
232 /// elements of this predicate mask are false or undef. That is, return true
233 /// if all lanes can be assumed inactive.
235 
236 /// Given a mask vector of the form <Y x i1>, Return true if all of the
237 /// elements of this predicate mask are true or undef. That is, return true
238 /// if all lanes can be assumed active.
240 
241 /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
242 /// for each lane which may be active.
244 
245 /// The group of interleaved loads/stores sharing the same stride and
246 /// close to each other.
247 ///
248 /// Each member in this group has an index starting from 0, and the largest
249 /// index should be less than interleaved factor, which is equal to the absolute
250 /// value of the access's stride.
251 ///
252 /// E.g. An interleaved load group of factor 4:
253 /// for (unsigned i = 0; i < 1024; i+=4) {
254 /// a = A[i]; // Member of index 0
255 /// b = A[i+1]; // Member of index 1
256 /// d = A[i+3]; // Member of index 3
257 /// ...
258 /// }
259 ///
260 /// An interleaved store group of factor 4:
261 /// for (unsigned i = 0; i < 1024; i+=4) {
262 /// ...
263 /// A[i] = a; // Member of index 0
264 /// A[i+1] = b; // Member of index 1
265 /// A[i+2] = c; // Member of index 2
266 /// A[i+3] = d; // Member of index 3
267 /// }
268 ///
269 /// Note: the interleaved load group could have gaps (missing members), but
270 /// the interleaved store group doesn't allow gaps.
271 template <typename InstTy> class InterleaveGroup {
272 public:
273  InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
274  : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
275 
276  InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
277  : Align(Align), InsertPos(Instr) {
278  assert(Align && "The alignment should be non-zero");
279 
280  Factor = std::abs(Stride);
281  assert(Factor > 1 && "Invalid interleave factor");
282 
283  Reverse = Stride < 0;
284  Members[0] = Instr;
285  }
286 
287  bool isReverse() const { return Reverse; }
288  uint32_t getFactor() const { return Factor; }
289  uint32_t getAlignment() const { return Align; }
290  uint32_t getNumMembers() const { return Members.size(); }
291 
292  /// Try to insert a new member \p Instr with index \p Index and
293  /// alignment \p NewAlign. The index is related to the leader and it could be
294  /// negative if it is the new leader.
295  ///
296  /// \returns false if the instruction doesn't belong to the group.
297  bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
298  assert(NewAlign && "The new member's alignment should be non-zero");
299 
300  // Make sure the key fits in an int32_t.
301  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
302  if (!MaybeKey)
303  return false;
304  int32_t Key = *MaybeKey;
305 
306  // Skip if there is already a member with the same index.
307  if (Members.find(Key) != Members.end())
308  return false;
309 
310  if (Key > LargestKey) {
311  // The largest index is always less than the interleave factor.
312  if (Index >= static_cast<int32_t>(Factor))
313  return false;
314 
315  LargestKey = Key;
316  } else if (Key < SmallestKey) {
317 
318  // Make sure the largest index fits in an int32_t.
319  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
320  if (!MaybeLargestIndex)
321  return false;
322 
323  // The largest index is always less than the interleave factor.
324  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
325  return false;
326 
327  SmallestKey = Key;
328  }
329 
330  // It's always safe to select the minimum alignment.
331  Align = std::min(Align, NewAlign);
332  Members[Key] = Instr;
333  return true;
334  }
335 
336  /// Get the member with the given index \p Index
337  ///
338  /// \returns nullptr if contains no such member.
339  InstTy *getMember(uint32_t Index) const {
340  int32_t Key = SmallestKey + Index;
341  auto Member = Members.find(Key);
342  if (Member == Members.end())
343  return nullptr;
344 
345  return Member->second;
346  }
347 
348  /// Get the index for the given member. Unlike the key in the member
349  /// map, the index starts from 0.
350  uint32_t getIndex(const InstTy *Instr) const {
351  for (auto I : Members) {
352  if (I.second == Instr)
353  return I.first - SmallestKey;
354  }
355 
356  llvm_unreachable("InterleaveGroup contains no such member");
357  }
358 
359  InstTy *getInsertPos() const { return InsertPos; }
360  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
361 
362  /// Add metadata (e.g. alias info) from the instructions in this group to \p
363  /// NewInst.
364  ///
365  /// FIXME: this function currently does not add noalias metadata a'la
366  /// addNewMedata. To do that we need to compute the intersection of the
367  /// noalias info from all members.
368  void addMetadata(InstTy *NewInst) const;
369 
370  /// Returns true if this Group requires a scalar iteration to handle gaps.
371  bool requiresScalarEpilogue() const {
372  // If the last member of the Group exists, then a scalar epilog is not
373  // needed for this group.
374  if (getMember(getFactor() - 1))
375  return false;
376 
377  // We have a group with gaps. It therefore cannot be a group of stores,
378  // and it can't be a reversed access, because such groups get invalidated.
379  assert(!getMember(0)->mayWriteToMemory() &&
380  "Group should have been invalidated");
381  assert(!isReverse() && "Group should have been invalidated");
382 
383  // This is a group of loads, with gaps, and without a last-member
384  return true;
385  }
386 
387 private:
388  uint32_t Factor; // Interleave Factor.
389  bool Reverse;
390  uint32_t Align;
392  int32_t SmallestKey = 0;
393  int32_t LargestKey = 0;
394 
395  // To avoid breaking dependences, vectorized instructions of an interleave
396  // group should be inserted at either the first load or the last store in
397  // program order.
398  //
399  // E.g. %even = load i32 // Insert Position
400  // %add = add i32 %even // Use of %even
401  // %odd = load i32
402  //
403  // store i32 %even
404  // %odd = add i32 // Def of %odd
405  // store i32 %odd // Insert Position
406  InstTy *InsertPos;
407 };
408 
409 /// Drive the analysis of interleaved memory accesses in the loop.
410 ///
411 /// Use this class to analyze interleaved accesses only when we can vectorize
412 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
413 /// on interleaved accesses is unsafe.
414 ///
415 /// The analysis collects interleave groups and records the relationships
416 /// between the member and the group in a map.
418 public:
420  DominatorTree *DT, LoopInfo *LI,
421  const LoopAccessInfo *LAI)
422  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
423 
424  ~InterleavedAccessInfo() { reset(); }
425 
426  /// Analyze the interleaved accesses and collect them in interleave
427  /// groups. Substitute symbolic strides using \p Strides.
428  /// Consider also predicated loads/stores in the analysis if
429  /// \p EnableMaskedInterleavedGroup is true.
430  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
431 
432  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
433  /// contrary to original assumption. Although we currently prevent group
434  /// formation for predicated accesses, we may be able to relax this limitation
435  /// in the future once we handle more complicated blocks.
436  void reset() {
438  // Avoid releasing a pointer twice.
439  for (auto &I : InterleaveGroupMap)
440  DelSet.insert(I.second);
441  for (auto *Ptr : DelSet)
442  delete Ptr;
443  InterleaveGroupMap.clear();
444  RequiresScalarEpilogue = false;
445  }
446 
447 
448  /// Check if \p Instr belongs to any interleave group.
449  bool isInterleaved(Instruction *Instr) const {
450  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
451  }
452 
453  /// Get the interleave group that \p Instr belongs to.
454  ///
455  /// \returns nullptr if doesn't have such group.
457  getInterleaveGroup(const Instruction *Instr) const {
458  if (InterleaveGroupMap.count(Instr))
459  return InterleaveGroupMap.find(Instr)->second;
460  return nullptr;
461  }
462 
465  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
466  }
467 
468  /// Returns true if an interleaved group that may access memory
469  /// out-of-bounds requires a scalar epilogue iteration for correctness.
470  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
471 
472  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
473  /// happen when optimizing for size forbids a scalar epilogue, and the gap
474  /// cannot be filtered by masking the load/store.
475  void invalidateGroupsRequiringScalarEpilogue();
476 
477 private:
478  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
479  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
480  /// The interleaved access analysis can also add new predicates (for example
481  /// by versioning strides of pointers).
483 
484  Loop *TheLoop;
485  DominatorTree *DT;
486  LoopInfo *LI;
487  const LoopAccessInfo *LAI;
488 
489  /// True if the loop may contain non-reversed interleaved groups with
490  /// out-of-bounds accesses. We ensure we don't speculatively access memory
491  /// out-of-bounds by executing at least one scalar epilogue iteration.
492  bool RequiresScalarEpilogue = false;
493 
494  /// Holds the relationships between the members and the interleave group.
496 
497  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
498 
499  /// Holds dependences among the memory accesses in the loop. It maps a source
500  /// access to a set of dependent sink accesses.
502 
503  /// The descriptor for a strided memory access.
504  struct StrideDescriptor {
505  StrideDescriptor() = default;
506  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
507  unsigned Align)
508  : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
509 
510  // The access's stride. It is negative for a reverse access.
511  int64_t Stride = 0;
512 
513  // The scalar expression of this access.
514  const SCEV *Scev = nullptr;
515 
516  // The size of the memory object.
517  uint64_t Size = 0;
518 
519  // The alignment of this access.
520  unsigned Align = 0;
521  };
522 
523  /// A type for holding instructions and their stride descriptors.
524  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
525 
526  /// Create a new interleave group with the given instruction \p Instr,
527  /// stride \p Stride and alignment \p Align.
528  ///
529  /// \returns the newly created interleave group.
531  createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
532  assert(!InterleaveGroupMap.count(Instr) &&
533  "Already in an interleaved access group");
534  InterleaveGroupMap[Instr] =
535  new InterleaveGroup<Instruction>(Instr, Stride, Align);
536  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
537  return InterleaveGroupMap[Instr];
538  }
539 
540  /// Release the group and remove all the relationships.
541  void releaseGroup(InterleaveGroup<Instruction> *Group) {
542  for (unsigned i = 0; i < Group->getFactor(); i++)
543  if (Instruction *Member = Group->getMember(i))
544  InterleaveGroupMap.erase(Member);
545 
546  InterleaveGroups.erase(Group);
547  delete Group;
548  }
549 
550  /// Collect all the accesses with a constant stride in program order.
551  void collectConstStrideAccesses(
553  const ValueToValueMap &Strides);
554 
555  /// Returns true if \p Stride is allowed in an interleaved group.
556  static bool isStrided(int Stride);
557 
558  /// Returns true if \p BB is a predicated block.
559  bool isPredicated(BasicBlock *BB) const {
560  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
561  }
562 
563  /// Returns true if LoopAccessInfo can be used for dependence queries.
564  bool areDependencesValid() const {
565  return LAI && LAI->getDepChecker().getDependences();
566  }
567 
568  /// Returns true if memory accesses \p A and \p B can be reordered, if
569  /// necessary, when constructing interleaved groups.
570  ///
571  /// \p A must precede \p B in program order. We return false if reordering is
572  /// not necessary or is prevented because \p A and \p B may be dependent.
573  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
574  StrideEntry *B) const {
575  // Code motion for interleaved accesses can potentially hoist strided loads
576  // and sink strided stores. The code below checks the legality of the
577  // following two conditions:
578  //
579  // 1. Potentially moving a strided load (B) before any store (A) that
580  // precedes B, or
581  //
582  // 2. Potentially moving a strided store (A) after any load or store (B)
583  // that A precedes.
584  //
585  // It's legal to reorder A and B if we know there isn't a dependence from A
586  // to B. Note that this determination is conservative since some
587  // dependences could potentially be reordered safely.
588 
589  // A is potentially the source of a dependence.
590  auto *Src = A->first;
591  auto SrcDes = A->second;
592 
593  // B is potentially the sink of a dependence.
594  auto *Sink = B->first;
595  auto SinkDes = B->second;
596 
597  // Code motion for interleaved accesses can't violate WAR dependences.
598  // Thus, reordering is legal if the source isn't a write.
599  if (!Src->mayWriteToMemory())
600  return true;
601 
602  // At least one of the accesses must be strided.
603  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
604  return true;
605 
606  // If dependence information is not available from LoopAccessInfo,
607  // conservatively assume the instructions can't be reordered.
608  if (!areDependencesValid())
609  return false;
610 
611  // If we know there is a dependence from source to sink, assume the
612  // instructions can't be reordered. Otherwise, reordering is legal.
613  return Dependences.find(Src) == Dependences.end() ||
614  !Dependences.lookup(Src).count(Sink);
615  }
616 
617  /// Collect the dependences from LoopAccessInfo.
618  ///
619  /// We process the dependences once during the interleaved access analysis to
620  /// enable constant-time dependence queries.
621  void collectDependences() {
622  if (!areDependencesValid())
623  return;
624  auto *Deps = LAI->getDepChecker().getDependences();
625  for (auto Dep : *Deps)
626  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
627  }
628 };
629 
630 } // llvm namespace
631 
632 #endif
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:464
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:360
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:449
This class represents a function call, abstracting a target machine&#39;s calling convention.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
Metadata node.
Definition: Metadata.h:863
std::enable_if< std::is_signed< T >::value, llvm::Optional< T > >::type checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void reset()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:436
bool isReverse() const
Definition: VectorUtils.h:287
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
Key
PAL metadata keys.
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:417
bool isSplatValue(const Value *V, unsigned Depth=0)
Return true if the input value is known to be a vector with all identical elements (potentially inclu...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:27
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:339
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:457
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:350
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
Definition: VectorUtils.h:273
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:470
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:419
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:297
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:91
uint32_t getAlignment() const
Definition: VectorUtils.h:289
A range adaptor for a pair of iterators.
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, return an APInt (of bitwidth Y) for each lane which ...
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1223
uint32_t getFactor() const
Definition: VectorUtils.h:288
uint32_t Size
Definition: Profile.cpp:46
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
Definition: VectorUtils.h:276
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
InstTy * getInsertPos() const
Definition: VectorUtils.h:359
LLVM Value Representation.
Definition: Value.h:73
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
std::enable_if< std::is_signed< T >::value, llvm::Optional< T > >::type checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
uint32_t getNumMembers() const
Definition: VectorUtils.h:290
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:371
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:43