LLVM  9.0.0svn
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
19 #include "llvm/IR/IRBuilder.h"
21 
22 namespace llvm {
23 
24 template <typename T> class ArrayRef;
25 class DemandedBits;
26 class GetElementPtrInst;
27 template <typename InstTy> class InterleaveGroup;
28 class Loop;
29 class ScalarEvolution;
31 class Type;
32 class Value;
33 
34 namespace Intrinsic {
35 enum ID : unsigned;
36 }
37 
38 /// Identify if the intrinsic is trivially vectorizable.
39 /// This method returns true if the intrinsic's argument types are all
40 /// scalars for the scalar form of the intrinsic and all vectors for
41 /// the vector form of the intrinsic.
43 
44 /// Identifies if the intrinsic has a scalar operand. It checks for
45 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
46 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
47 
48 /// Returns intrinsic ID for call.
49 /// For the input call instruction it finds mapping intrinsic and returns
50 /// its intrinsic ID, in case it does not found it return not_intrinsic.
52  const TargetLibraryInfo *TLI);
53 
54 /// Find the operand of the GEP that should be checked for consecutive
55 /// stores. This ignores trailing indices that have no effect on the final
56 /// pointer.
57 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
58 
59 /// If the argument is a GEP, then returns the operand identified by
60 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
61 /// operand, it returns that instead.
63 
64 /// If a value has only one user that is a CastInst, return it.
65 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
66 
67 /// Get the stride of a pointer access in a loop. Looks for symbolic
68 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
70 
71 /// Given a vector and an element number, see if the scalar value is
72 /// already around as a register, for example if it were inserted then extracted
73 /// from the vector.
74 Value *findScalarElement(Value *V, unsigned EltNo);
75 
76 /// Get splat value if the input is a splat vector or return nullptr.
77 /// The value may be extracted from a splat constants vector or from
78 /// a sequence of instructions that broadcast a single value into a vector.
79 const Value *getSplatValue(const Value *V);
80 
81 /// Return true if the input value is known to be a vector with all identical
82 /// elements (potentially including undefined elements).
83 /// This may be more powerful than the related getSplatValue() because it is
84 /// not limited by finding a scalar source value to a splatted vector.
85 bool isSplatValue(const Value *V, unsigned Depth = 0);
86 
87 /// Compute a map of integer instructions to their minimum legal type
88 /// size.
89 ///
90 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
91 /// type (e.g. i32) whenever arithmetic is performed on them.
92 ///
93 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
94 /// the arithmetic type down again. However InstCombine refuses to create
95 /// illegal types, so for targets without i8 or i16 registers, the lengthening
96 /// and shrinking remains.
97 ///
98 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
99 /// their scalar equivalents do not, so during vectorization it is important to
100 /// remove these lengthens and truncates when deciding the profitability of
101 /// vectorization.
102 ///
103 /// This function analyzes the given range of instructions and determines the
104 /// minimum type size each can be converted to. It attempts to remove or
105 /// minimize type size changes across each def-use chain, so for example in the
106 /// following code:
107 ///
108 /// %1 = load i8, i8*
109 /// %2 = add i8 %1, 2
110 /// %3 = load i16, i16*
111 /// %4 = zext i8 %2 to i32
112 /// %5 = zext i16 %3 to i32
113 /// %6 = add i32 %4, %5
114 /// %7 = trunc i32 %6 to i16
115 ///
116 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
117 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
118 ///
119 /// If the optional TargetTransformInfo is provided, this function tries harder
120 /// to do less work by only looking at illegal types.
123  DemandedBits &DB,
124  const TargetTransformInfo *TTI=nullptr);
125 
126 /// Compute the union of two access-group lists.
127 ///
128 /// If the list contains just one access group, it is returned directly. If the
129 /// list is empty, returns nullptr.
130 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
131 
132 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
133 /// are both in. If either instruction does not access memory at all, it is
134 /// considered to be in every list.
135 ///
136 /// If the list contains just one access group, it is returned directly. If the
137 /// list is empty, returns nullptr.
139  const Instruction *Inst2);
140 
141 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
142 /// MD_nontemporal, MD_access_group].
143 /// For K in Kinds, we get the MDNode for K from each of the
144 /// elements of VL, compute their "intersection" (i.e., the most generic
145 /// metadata value that covers all of the individual values), and set I's
146 /// metadata for M equal to the intersection value.
147 ///
148 /// This function always sets a (possibly null) value for each K in Kinds.
150 
151 /// Create a mask that filters the members of an interleave group where there
152 /// are gaps.
153 ///
154 /// For example, the mask for \p Group with interleave-factor 3
155 /// and \p VF 4, that has only its first member present is:
156 ///
157 /// <1,0,0,1,0,0,1,0,0,1,0,0>
158 ///
159 /// Note: The result is a mask of 0's and 1's, as opposed to the other
160 /// create[*]Mask() utilities which create a shuffle mask (mask that
161 /// consists of indices).
162 Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
163  const InterleaveGroup<Instruction> &Group);
164 
165 /// Create a mask with replicated elements.
166 ///
167 /// This function creates a shuffle mask for replicating each of the \p VF
168 /// elements in a vector \p ReplicationFactor times. It can be used to
169 /// transform a mask of \p VF elements into a mask of
170 /// \p VF * \p ReplicationFactor elements used by a predicated
171 /// interleaved-group of loads/stores whose Interleaved-factor ==
172 /// \p ReplicationFactor.
173 ///
174 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
175 ///
176 /// <0,0,0,1,1,1,2,2,2,3,3,3>
177 Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
178  unsigned VF);
179 
180 /// Create an interleave shuffle mask.
181 ///
182 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
183 /// vectorization factor \p VF into a single wide vector. The mask is of the
184 /// form:
185 ///
186 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
187 ///
188 /// For example, the mask for VF = 4 and NumVecs = 2 is:
189 ///
190 /// <0, 4, 1, 5, 2, 6, 3, 7>.
191 Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
192  unsigned NumVecs);
193 
194 /// Create a stride shuffle mask.
195 ///
196 /// This function creates a shuffle mask whose elements begin at \p Start and
197 /// are incremented by \p Stride. The mask can be used to deinterleave an
198 /// interleaved vector into separate vectors of vectorization factor \p VF. The
199 /// mask is of the form:
200 ///
201 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
202 ///
203 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
204 ///
205 /// <0, 2, 4, 6>
206 Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
207  unsigned Stride, unsigned VF);
208 
209 /// Create a sequential shuffle mask.
210 ///
211 /// This function creates shuffle mask whose elements are sequential and begin
212 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
213 /// NumUndefs undef values. The mask is of the form:
214 ///
215 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
216 ///
217 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
218 ///
219 /// <0, 1, 2, 3, undef, undef, undef, undef>
220 Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
221  unsigned NumInts, unsigned NumUndefs);
222 
223 /// Concatenate a list of vectors.
224 ///
225 /// This function generates code that concatenate the vectors in \p Vecs into a
226 /// single large vector. The number of vectors should be greater than one, and
227 /// their element types should be the same. The number of elements in the
228 /// vectors should also be the same; however, if the last vector has fewer
229 /// elements, it will be padded with undefs.
231 
232 /// Given a mask vector of the form <Y x i1>, Return true if all of the
233 /// elements of this predicate mask are false or undef. That is, return true
234 /// if all lanes can be assumed inactive.
236 
237 /// Given a mask vector of the form <Y x i1>, Return true if all of the
238 /// elements of this predicate mask are true or undef. That is, return true
239 /// if all lanes can be assumed active.
241 
242 /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
243 /// for each lane which may be active.
245 
246 /// The group of interleaved loads/stores sharing the same stride and
247 /// close to each other.
248 ///
249 /// Each member in this group has an index starting from 0, and the largest
250 /// index should be less than interleaved factor, which is equal to the absolute
251 /// value of the access's stride.
252 ///
253 /// E.g. An interleaved load group of factor 4:
254 /// for (unsigned i = 0; i < 1024; i+=4) {
255 /// a = A[i]; // Member of index 0
256 /// b = A[i+1]; // Member of index 1
257 /// d = A[i+3]; // Member of index 3
258 /// ...
259 /// }
260 ///
261 /// An interleaved store group of factor 4:
262 /// for (unsigned i = 0; i < 1024; i+=4) {
263 /// ...
264 /// A[i] = a; // Member of index 0
265 /// A[i+1] = b; // Member of index 1
266 /// A[i+2] = c; // Member of index 2
267 /// A[i+3] = d; // Member of index 3
268 /// }
269 ///
270 /// Note: the interleaved load group could have gaps (missing members), but
271 /// the interleaved store group doesn't allow gaps.
272 template <typename InstTy> class InterleaveGroup {
273 public:
274  InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
275  : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
276 
277  InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
278  : Align(Align), InsertPos(Instr) {
279  assert(Align && "The alignment should be non-zero");
280 
281  Factor = std::abs(Stride);
282  assert(Factor > 1 && "Invalid interleave factor");
283 
284  Reverse = Stride < 0;
285  Members[0] = Instr;
286  }
287 
288  bool isReverse() const { return Reverse; }
289  uint32_t getFactor() const { return Factor; }
290  uint32_t getAlignment() const { return Align; }
291  uint32_t getNumMembers() const { return Members.size(); }
292 
293  /// Try to insert a new member \p Instr with index \p Index and
294  /// alignment \p NewAlign. The index is related to the leader and it could be
295  /// negative if it is the new leader.
296  ///
297  /// \returns false if the instruction doesn't belong to the group.
298  bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
299  assert(NewAlign && "The new member's alignment should be non-zero");
300 
301  // Make sure the key fits in an int32_t.
302  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
303  if (!MaybeKey)
304  return false;
305  int32_t Key = *MaybeKey;
306 
307  // Skip if there is already a member with the same index.
308  if (Members.find(Key) != Members.end())
309  return false;
310 
311  if (Key > LargestKey) {
312  // The largest index is always less than the interleave factor.
313  if (Index >= static_cast<int32_t>(Factor))
314  return false;
315 
316  LargestKey = Key;
317  } else if (Key < SmallestKey) {
318 
319  // Make sure the largest index fits in an int32_t.
320  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
321  if (!MaybeLargestIndex)
322  return false;
323 
324  // The largest index is always less than the interleave factor.
325  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
326  return false;
327 
328  SmallestKey = Key;
329  }
330 
331  // It's always safe to select the minimum alignment.
332  Align = std::min(Align, NewAlign);
333  Members[Key] = Instr;
334  return true;
335  }
336 
337  /// Get the member with the given index \p Index
338  ///
339  /// \returns nullptr if contains no such member.
340  InstTy *getMember(uint32_t Index) const {
341  int32_t Key = SmallestKey + Index;
342  auto Member = Members.find(Key);
343  if (Member == Members.end())
344  return nullptr;
345 
346  return Member->second;
347  }
348 
349  /// Get the index for the given member. Unlike the key in the member
350  /// map, the index starts from 0.
351  uint32_t getIndex(const InstTy *Instr) const {
352  for (auto I : Members) {
353  if (I.second == Instr)
354  return I.first - SmallestKey;
355  }
356 
357  llvm_unreachable("InterleaveGroup contains no such member");
358  }
359 
360  InstTy *getInsertPos() const { return InsertPos; }
361  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
362 
363  /// Add metadata (e.g. alias info) from the instructions in this group to \p
364  /// NewInst.
365  ///
366  /// FIXME: this function currently does not add noalias metadata a'la
367  /// addNewMedata. To do that we need to compute the intersection of the
368  /// noalias info from all members.
369  void addMetadata(InstTy *NewInst) const;
370 
371  /// Returns true if this Group requires a scalar iteration to handle gaps.
372  bool requiresScalarEpilogue() const {
373  // If the last member of the Group exists, then a scalar epilog is not
374  // needed for this group.
375  if (getMember(getFactor() - 1))
376  return false;
377 
378  // We have a group with gaps. It therefore cannot be a group of stores,
379  // and it can't be a reversed access, because such groups get invalidated.
380  assert(!getMember(0)->mayWriteToMemory() &&
381  "Group should have been invalidated");
382  assert(!isReverse() && "Group should have been invalidated");
383 
384  // This is a group of loads, with gaps, and without a last-member
385  return true;
386  }
387 
388 private:
389  uint32_t Factor; // Interleave Factor.
390  bool Reverse;
391  uint32_t Align;
393  int32_t SmallestKey = 0;
394  int32_t LargestKey = 0;
395 
396  // To avoid breaking dependences, vectorized instructions of an interleave
397  // group should be inserted at either the first load or the last store in
398  // program order.
399  //
400  // E.g. %even = load i32 // Insert Position
401  // %add = add i32 %even // Use of %even
402  // %odd = load i32
403  //
404  // store i32 %even
405  // %odd = add i32 // Def of %odd
406  // store i32 %odd // Insert Position
407  InstTy *InsertPos;
408 };
409 
410 /// Drive the analysis of interleaved memory accesses in the loop.
411 ///
412 /// Use this class to analyze interleaved accesses only when we can vectorize
413 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
414 /// on interleaved accesses is unsafe.
415 ///
416 /// The analysis collects interleave groups and records the relationships
417 /// between the member and the group in a map.
419 public:
421  DominatorTree *DT, LoopInfo *LI,
422  const LoopAccessInfo *LAI)
423  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
424 
425  ~InterleavedAccessInfo() { reset(); }
426 
427  /// Analyze the interleaved accesses and collect them in interleave
428  /// groups. Substitute symbolic strides using \p Strides.
429  /// Consider also predicated loads/stores in the analysis if
430  /// \p EnableMaskedInterleavedGroup is true.
431  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
432 
433  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
434  /// contrary to original assumption. Although we currently prevent group
435  /// formation for predicated accesses, we may be able to relax this limitation
436  /// in the future once we handle more complicated blocks.
437  void reset() {
439  // Avoid releasing a pointer twice.
440  for (auto &I : InterleaveGroupMap)
441  DelSet.insert(I.second);
442  for (auto *Ptr : DelSet)
443  delete Ptr;
444  InterleaveGroupMap.clear();
445  RequiresScalarEpilogue = false;
446  }
447 
448 
449  /// Check if \p Instr belongs to any interleave group.
450  bool isInterleaved(Instruction *Instr) const {
451  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
452  }
453 
454  /// Get the interleave group that \p Instr belongs to.
455  ///
456  /// \returns nullptr if doesn't have such group.
458  getInterleaveGroup(const Instruction *Instr) const {
459  if (InterleaveGroupMap.count(Instr))
460  return InterleaveGroupMap.find(Instr)->second;
461  return nullptr;
462  }
463 
466  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
467  }
468 
469  /// Returns true if an interleaved group that may access memory
470  /// out-of-bounds requires a scalar epilogue iteration for correctness.
471  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
472 
473  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
474  /// happen when optimizing for size forbids a scalar epilogue, and the gap
475  /// cannot be filtered by masking the load/store.
476  void invalidateGroupsRequiringScalarEpilogue();
477 
478 private:
479  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
480  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
481  /// The interleaved access analysis can also add new predicates (for example
482  /// by versioning strides of pointers).
484 
485  Loop *TheLoop;
486  DominatorTree *DT;
487  LoopInfo *LI;
488  const LoopAccessInfo *LAI;
489 
490  /// True if the loop may contain non-reversed interleaved groups with
491  /// out-of-bounds accesses. We ensure we don't speculatively access memory
492  /// out-of-bounds by executing at least one scalar epilogue iteration.
493  bool RequiresScalarEpilogue = false;
494 
495  /// Holds the relationships between the members and the interleave group.
497 
498  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
499 
500  /// Holds dependences among the memory accesses in the loop. It maps a source
501  /// access to a set of dependent sink accesses.
503 
504  /// The descriptor for a strided memory access.
505  struct StrideDescriptor {
506  StrideDescriptor() = default;
507  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
508  unsigned Align)
509  : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
510 
511  // The access's stride. It is negative for a reverse access.
512  int64_t Stride = 0;
513 
514  // The scalar expression of this access.
515  const SCEV *Scev = nullptr;
516 
517  // The size of the memory object.
518  uint64_t Size = 0;
519 
520  // The alignment of this access.
521  unsigned Align = 0;
522  };
523 
524  /// A type for holding instructions and their stride descriptors.
525  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
526 
527  /// Create a new interleave group with the given instruction \p Instr,
528  /// stride \p Stride and alignment \p Align.
529  ///
530  /// \returns the newly created interleave group.
532  createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
533  assert(!InterleaveGroupMap.count(Instr) &&
534  "Already in an interleaved access group");
535  InterleaveGroupMap[Instr] =
536  new InterleaveGroup<Instruction>(Instr, Stride, Align);
537  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
538  return InterleaveGroupMap[Instr];
539  }
540 
541  /// Release the group and remove all the relationships.
542  void releaseGroup(InterleaveGroup<Instruction> *Group) {
543  for (unsigned i = 0; i < Group->getFactor(); i++)
544  if (Instruction *Member = Group->getMember(i))
545  InterleaveGroupMap.erase(Member);
546 
547  InterleaveGroups.erase(Group);
548  delete Group;
549  }
550 
551  /// Collect all the accesses with a constant stride in program order.
552  void collectConstStrideAccesses(
554  const ValueToValueMap &Strides);
555 
556  /// Returns true if \p Stride is allowed in an interleaved group.
557  static bool isStrided(int Stride);
558 
559  /// Returns true if \p BB is a predicated block.
560  bool isPredicated(BasicBlock *BB) const {
561  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
562  }
563 
564  /// Returns true if LoopAccessInfo can be used for dependence queries.
565  bool areDependencesValid() const {
566  return LAI && LAI->getDepChecker().getDependences();
567  }
568 
569  /// Returns true if memory accesses \p A and \p B can be reordered, if
570  /// necessary, when constructing interleaved groups.
571  ///
572  /// \p A must precede \p B in program order. We return false if reordering is
573  /// not necessary or is prevented because \p A and \p B may be dependent.
574  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
575  StrideEntry *B) const {
576  // Code motion for interleaved accesses can potentially hoist strided loads
577  // and sink strided stores. The code below checks the legality of the
578  // following two conditions:
579  //
580  // 1. Potentially moving a strided load (B) before any store (A) that
581  // precedes B, or
582  //
583  // 2. Potentially moving a strided store (A) after any load or store (B)
584  // that A precedes.
585  //
586  // It's legal to reorder A and B if we know there isn't a dependence from A
587  // to B. Note that this determination is conservative since some
588  // dependences could potentially be reordered safely.
589 
590  // A is potentially the source of a dependence.
591  auto *Src = A->first;
592  auto SrcDes = A->second;
593 
594  // B is potentially the sink of a dependence.
595  auto *Sink = B->first;
596  auto SinkDes = B->second;
597 
598  // Code motion for interleaved accesses can't violate WAR dependences.
599  // Thus, reordering is legal if the source isn't a write.
600  if (!Src->mayWriteToMemory())
601  return true;
602 
603  // At least one of the accesses must be strided.
604  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
605  return true;
606 
607  // If dependence information is not available from LoopAccessInfo,
608  // conservatively assume the instructions can't be reordered.
609  if (!areDependencesValid())
610  return false;
611 
612  // If we know there is a dependence from source to sink, assume the
613  // instructions can't be reordered. Otherwise, reordering is legal.
614  return Dependences.find(Src) == Dependences.end() ||
615  !Dependences.lookup(Src).count(Sink);
616  }
617 
618  /// Collect the dependences from LoopAccessInfo.
619  ///
620  /// We process the dependences once during the interleaved access analysis to
621  /// enable constant-time dependence queries.
622  void collectDependences() {
623  if (!areDependencesValid())
624  return;
625  auto *Deps = LAI->getDepChecker().getDependences();
626  for (auto Dep : *Deps)
627  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
628  }
629 };
630 
631 } // llvm namespace
632 
633 #endif
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:465
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:361
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:450
This class represents a function call, abstracting a target machine&#39;s calling convention.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
Metadata node.
Definition: Metadata.h:863
std::enable_if< std::is_signed< T >::value, llvm::Optional< T > >::type checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void reset()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:437
bool isReverse() const
Definition: VectorUtils.h:288
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
Key
PAL metadata keys.
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:418
bool isSplatValue(const Value *V, unsigned Depth=0)
Return true if the input value is known to be a vector with all identical elements (potentially inclu...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:27
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:340
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:458
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:351
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
Definition: VectorUtils.h:274
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:471
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:420
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:298
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:90
uint32_t getAlignment() const
Definition: VectorUtils.h:290
A range adaptor for a pair of iterators.
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, return an APInt (of bitwidth Y) for each lane which ...
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1223
uint32_t getFactor() const
Definition: VectorUtils.h:289
uint32_t Size
Definition: Profile.cpp:46
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
Definition: VectorUtils.h:277
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
InstTy * getInsertPos() const
Definition: VectorUtils.h:360
LLVM Value Representation.
Definition: Value.h:72
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
std::enable_if< std::is_signed< T >::value, llvm::Optional< T > >::type checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
uint32_t getNumMembers() const
Definition: VectorUtils.h:291
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:372
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:42