LLVM  9.0.0svn
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstdlib>
66 #include <iterator>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
75 VectorizationFactor("force-vector-width", cl::Hidden,
76  cl::desc("Sets the SIMD width. Zero is autoselect."),
79 
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82  cl::desc("Sets the vectorization interleave count. "
83  "Zero is autoselect."),
87 
89  "runtime-memory-check-threshold", cl::Hidden,
90  cl::desc("When performing memory disambiguation checks at runtime do not "
91  "generate more than this number of comparisons (default = 8)."),
94 
95 /// The maximum iterations used to merge memory checks
97  "memory-check-merge-threshold", cl::Hidden,
98  cl::desc("Maximum number of comparisons done when trying to merge "
99  "runtime memory checks. (default = 100)"),
100  cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107  MaxDependences("max-dependences", cl::Hidden,
108  cl::desc("Maximum number of dependences collected by "
109  "loop-access analysis (default = 100)"),
110  cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 /// for (i = 0; i < N; ++i)
115 /// A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 /// if (Stride1 == 1 && Stride2 == 1) {
119 /// for (i = 0; i < N; i+=4)
120 /// A[i:i+3] += ...
121 /// } else
122 /// ...
124  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125  cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
130  "store-to-load-forwarding-conflict-detection", cl::Hidden,
131  cl::desc("Enable conflict detection in loop-access analysis"),
132  cl::init(true));
133 
135  return ::VectorizationInterleave.getNumOccurrences() > 0;
136 }
137 
139  if (auto *CI = dyn_cast<CastInst>(V))
140  if (CI->getOperand(0)->getType()->isIntegerTy())
141  return CI->getOperand(0);
142  return V;
143 }
144 
146  const ValueToValueMap &PtrToStride,
147  Value *Ptr, Value *OrigPtr) {
148  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
149 
150  // If there is an entry in the map return the SCEV of the pointer with the
151  // symbolic stride replaced by one.
153  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
154  if (SI != PtrToStride.end()) {
155  Value *StrideVal = SI->second;
156 
157  // Strip casts.
158  StrideVal = stripIntegerCast(StrideVal);
159 
160  ScalarEvolution *SE = PSE.getSE();
161  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
162  const auto *CT =
163  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
164 
165  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
166  auto *Expr = PSE.getSCEV(Ptr);
167 
168  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
169  << " by: " << *Expr << "\n");
170  return Expr;
171  }
172 
173  // Otherwise, just return the SCEV of the original pointer.
174  return OrigSCEV;
175 }
176 
177 /// Calculate Start and End points of memory access.
178 /// Let's assume A is the first access and B is a memory access on N-th loop
179 /// iteration. Then B is calculated as:
180 /// B = A + Step*N .
181 /// Step value may be positive or negative.
182 /// N is a calculated back-edge taken count:
183 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
184 /// Start and End points are calculated in the following way:
185 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
186 /// where SizeOfElt is the size of single memory access in bytes.
187 ///
188 /// There is no conflict when the intervals are disjoint:
189 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
190 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
191  unsigned DepSetId, unsigned ASId,
192  const ValueToValueMap &Strides,
194  // Get the stride replaced scev.
195  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
196  ScalarEvolution *SE = PSE.getSE();
197 
198  const SCEV *ScStart;
199  const SCEV *ScEnd;
200 
201  if (SE->isLoopInvariant(Sc, Lp))
202  ScStart = ScEnd = Sc;
203  else {
204  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
205  assert(AR && "Invalid addrec expression");
206  const SCEV *Ex = PSE.getBackedgeTakenCount();
207 
208  ScStart = AR->getStart();
209  ScEnd = AR->evaluateAtIteration(Ex, *SE);
210  const SCEV *Step = AR->getStepRecurrence(*SE);
211 
212  // For expressions with negative step, the upper bound is ScStart and the
213  // lower bound is ScEnd.
214  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
215  if (CStep->getValue()->isNegative())
216  std::swap(ScStart, ScEnd);
217  } else {
218  // Fallback case: the step is not constant, but we can still
219  // get the upper and lower bounds of the interval by using min/max
220  // expressions.
221  ScStart = SE->getUMinExpr(ScStart, ScEnd);
222  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
223  }
224  // Add the size of the pointed element to ScEnd.
225  unsigned EltSize =
227  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
228  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
229  }
230 
231  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
232 }
233 
235 RuntimePointerChecking::generateChecks() const {
237 
238  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
239  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
240  const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
241  const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
242 
243  if (needsChecking(CGI, CGJ))
244  Checks.push_back(std::make_pair(&CGI, &CGJ));
245  }
246  }
247  return Checks;
248 }
249 
250 void RuntimePointerChecking::generateChecks(
251  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
252  assert(Checks.empty() && "Checks is not empty");
253  groupChecks(DepCands, UseDependencies);
254  Checks = generateChecks();
255 }
256 
258  const CheckingPtrGroup &N) const {
259  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
260  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
261  if (needsChecking(M.Members[I], N.Members[J]))
262  return true;
263  return false;
264 }
265 
266 /// Compare \p I and \p J and return the minimum.
267 /// Return nullptr in case we couldn't find an answer.
268 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
269  ScalarEvolution *SE) {
270  const SCEV *Diff = SE->getMinusSCEV(J, I);
271  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
272 
273  if (!C)
274  return nullptr;
275  if (C->getValue()->isNegative())
276  return J;
277  return I;
278 }
279 
281  const SCEV *Start = RtCheck.Pointers[Index].Start;
282  const SCEV *End = RtCheck.Pointers[Index].End;
283 
284  // Compare the starts and ends with the known minimum and maximum
285  // of this set. We need to know how we compare against the min/max
286  // of the set in order to be able to emit memchecks.
287  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
288  if (!Min0)
289  return false;
290 
291  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
292  if (!Min1)
293  return false;
294 
295  // Update the low bound expression if we've found a new min value.
296  if (Min0 == Start)
297  Low = Start;
298 
299  // Update the high bound expression if we've found a new max value.
300  if (Min1 != End)
301  High = End;
302 
303  Members.push_back(Index);
304  return true;
305 }
306 
307 void RuntimePointerChecking::groupChecks(
308  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
309  // We build the groups from dependency candidates equivalence classes
310  // because:
311  // - We know that pointers in the same equivalence class share
312  // the same underlying object and therefore there is a chance
313  // that we can compare pointers
314  // - We wouldn't be able to merge two pointers for which we need
315  // to emit a memcheck. The classes in DepCands are already
316  // conveniently built such that no two pointers in the same
317  // class need checking against each other.
318 
319  // We use the following (greedy) algorithm to construct the groups
320  // For every pointer in the equivalence class:
321  // For each existing group:
322  // - if the difference between this pointer and the min/max bounds
323  // of the group is a constant, then make the pointer part of the
324  // group and update the min/max bounds of that group as required.
325 
326  CheckingGroups.clear();
327 
328  // If we need to check two pointers to the same underlying object
329  // with a non-constant difference, we shouldn't perform any pointer
330  // grouping with those pointers. This is because we can easily get
331  // into cases where the resulting check would return false, even when
332  // the accesses are safe.
333  //
334  // The following example shows this:
335  // for (i = 0; i < 1000; ++i)
336  // a[5000 + i * m] = a[i] + a[i + 9000]
337  //
338  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
339  // (0, 10000) which is always false. However, if m is 1, there is no
340  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
341  // us to perform an accurate check in this case.
342  //
343  // The above case requires that we have an UnknownDependence between
344  // accesses to the same underlying object. This cannot happen unless
345  // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
346  // is also false. In this case we will use the fallback path and create
347  // separate checking groups for all pointers.
348 
349  // If we don't have the dependency partitions, construct a new
350  // checking pointer group for each pointer. This is also required
351  // for correctness, because in this case we can have checking between
352  // pointers to the same underlying object.
353  if (!UseDependencies) {
354  for (unsigned I = 0; I < Pointers.size(); ++I)
355  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
356  return;
357  }
358 
359  unsigned TotalComparisons = 0;
360 
361  DenseMap<Value *, unsigned> PositionMap;
362  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
363  PositionMap[Pointers[Index].PointerValue] = Index;
364 
365  // We need to keep track of what pointers we've already seen so we
366  // don't process them twice.
368 
369  // Go through all equivalence classes, get the "pointer check groups"
370  // and add them to the overall solution. We use the order in which accesses
371  // appear in 'Pointers' to enforce determinism.
372  for (unsigned I = 0; I < Pointers.size(); ++I) {
373  // We've seen this pointer before, and therefore already processed
374  // its equivalence class.
375  if (Seen.count(I))
376  continue;
377 
378  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
379  Pointers[I].IsWritePtr);
380 
382  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
383 
384  // Because DepCands is constructed by visiting accesses in the order in
385  // which they appear in alias sets (which is deterministic) and the
386  // iteration order within an equivalence class member is only dependent on
387  // the order in which unions and insertions are performed on the
388  // equivalence class, the iteration order is deterministic.
389  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
390  MI != ME; ++MI) {
391  unsigned Pointer = PositionMap[MI->getPointer()];
392  bool Merged = false;
393  // Mark this pointer as seen.
394  Seen.insert(Pointer);
395 
396  // Go through all the existing sets and see if we can find one
397  // which can include this pointer.
398  for (CheckingPtrGroup &Group : Groups) {
399  // Don't perform more than a certain amount of comparisons.
400  // This should limit the cost of grouping the pointers to something
401  // reasonable. If we do end up hitting this threshold, the algorithm
402  // will create separate groups for all remaining pointers.
403  if (TotalComparisons > MemoryCheckMergeThreshold)
404  break;
405 
406  TotalComparisons++;
407 
408  if (Group.addPointer(Pointer)) {
409  Merged = true;
410  break;
411  }
412  }
413 
414  if (!Merged)
415  // We couldn't add this pointer to any existing set or the threshold
416  // for the number of comparisons has been reached. Create a new group
417  // to hold the current pointer.
418  Groups.push_back(CheckingPtrGroup(Pointer, *this));
419  }
420 
421  // We've computed the grouped checks for this partition.
422  // Save the results and continue with the next one.
423  llvm::copy(Groups, std::back_inserter(CheckingGroups));
424  }
425 }
426 
428  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
429  unsigned PtrIdx2) {
430  return (PtrToPartition[PtrIdx1] != -1 &&
431  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
432 }
433 
434 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
435  const PointerInfo &PointerI = Pointers[I];
436  const PointerInfo &PointerJ = Pointers[J];
437 
438  // No need to check if two readonly pointers intersect.
439  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
440  return false;
441 
442  // Only need to check pointers between two different dependency sets.
443  if (PointerI.DependencySetId == PointerJ.DependencySetId)
444  return false;
445 
446  // Only need to check pointers in the same alias set.
447  if (PointerI.AliasSetId != PointerJ.AliasSetId)
448  return false;
449 
450  return true;
451 }
452 
454  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
455  unsigned Depth) const {
456  unsigned N = 0;
457  for (const auto &Check : Checks) {
458  const auto &First = Check.first->Members, &Second = Check.second->Members;
459 
460  OS.indent(Depth) << "Check " << N++ << ":\n";
461 
462  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
463  for (unsigned K = 0; K < First.size(); ++K)
464  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
465 
466  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
467  for (unsigned K = 0; K < Second.size(); ++K)
468  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
469  }
470 }
471 
473 
474  OS.indent(Depth) << "Run-time memory checks:\n";
475  printChecks(OS, Checks, Depth);
476 
477  OS.indent(Depth) << "Grouped accesses:\n";
478  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
479  const auto &CG = CheckingGroups[I];
480 
481  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
482  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
483  << ")\n";
484  for (unsigned J = 0; J < CG.Members.size(); ++J) {
485  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
486  << "\n";
487  }
488  }
489 }
490 
491 namespace {
492 
493 /// Analyses memory accesses in a loop.
494 ///
495 /// Checks whether run time pointer checks are needed and builds sets for data
496 /// dependence checking.
497 class AccessAnalysis {
498 public:
499  /// Read or write access location.
500  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
501  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
502 
503  AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
506  : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
507  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
508 
509  /// Register a load and whether it is only read from.
510  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
511  Value *Ptr = const_cast<Value*>(Loc.Ptr);
512  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
513  Accesses.insert(MemAccessInfo(Ptr, false));
514  if (IsReadOnly)
515  ReadOnlyPtr.insert(Ptr);
516  }
517 
518  /// Register a store.
519  void addStore(MemoryLocation &Loc) {
520  Value *Ptr = const_cast<Value*>(Loc.Ptr);
521  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
522  Accesses.insert(MemAccessInfo(Ptr, true));
523  }
524 
525  /// Check if we can emit a run-time no-alias check for \p Access.
526  ///
527  /// Returns true if we can emit a run-time no alias check for \p Access.
528  /// If we can check this access, this also adds it to a dependence set and
529  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
530  /// we will attempt to use additional run-time checks in order to get
531  /// the bounds of the pointer.
532  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
533  MemAccessInfo Access,
534  const ValueToValueMap &Strides,
535  DenseMap<Value *, unsigned> &DepSetId,
536  Loop *TheLoop, unsigned &RunningDepId,
537  unsigned ASId, bool ShouldCheckStride,
538  bool Assume);
539 
540  /// Check whether we can check the pointers at runtime for
541  /// non-intersection.
542  ///
543  /// Returns true if we need no check or if we do and we can generate them
544  /// (i.e. the pointers have computable bounds).
545  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
546  Loop *TheLoop, const ValueToValueMap &Strides,
547  bool ShouldCheckWrap = false);
548 
549  /// Goes over all memory accesses, checks whether a RT check is needed
550  /// and builds sets of dependent accesses.
551  void buildDependenceSets() {
552  processMemAccesses();
553  }
554 
555  /// Initial processing of memory accesses determined that we need to
556  /// perform dependency checking.
557  ///
558  /// Note that this can later be cleared if we retry memcheck analysis without
559  /// dependency checking (i.e. FoundNonConstantDistanceDependence).
560  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
561 
562  /// We decided that no dependence analysis would be used. Reset the state.
563  void resetDepChecks(MemoryDepChecker &DepChecker) {
564  CheckDeps.clear();
565  DepChecker.clearDependences();
566  }
567 
568  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
569 
570 private:
571  typedef SetVector<MemAccessInfo> PtrAccessSet;
572 
573  /// Go over all memory access and check whether runtime pointer checks
574  /// are needed and build sets of dependency check candidates.
575  void processMemAccesses();
576 
577  /// Set of all accesses.
578  PtrAccessSet Accesses;
579 
580  const DataLayout &DL;
581 
582  /// The loop being checked.
583  const Loop *TheLoop;
584 
585  /// List of accesses that need a further dependence check.
586  MemAccessInfoList CheckDeps;
587 
588  /// Set of pointers that are read only.
589  SmallPtrSet<Value*, 16> ReadOnlyPtr;
590 
591  /// An alias set tracker to partition the access set by underlying object and
592  //intrinsic property (such as TBAA metadata).
593  AliasSetTracker AST;
594 
595  LoopInfo *LI;
596 
597  /// Sets of potentially dependent accesses - members of one set share an
598  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
599  /// dependence check.
601 
602  /// Initial processing of memory accesses determined that we may need
603  /// to add memchecks. Perform the analysis to determine the necessary checks.
604  ///
605  /// Note that, this is different from isDependencyCheckNeeded. When we retry
606  /// memcheck analysis without dependency checking
607  /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
608  /// cleared while this remains set if we have potentially dependent accesses.
609  bool IsRTCheckAnalysisNeeded;
610 
611  /// The SCEV predicate containing all the SCEV-related assumptions.
613 };
614 
615 } // end anonymous namespace
616 
617 /// Check whether a pointer can participate in a runtime bounds check.
618 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
619 /// by adding run-time checks (overflow checks) if necessary.
621  const ValueToValueMap &Strides, Value *Ptr,
622  Loop *L, bool Assume) {
623  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
624 
625  // The bounds for loop-invariant pointer is trivial.
626  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
627  return true;
628 
629  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
630 
631  if (!AR && Assume)
632  AR = PSE.getAsAddRec(Ptr);
633 
634  if (!AR)
635  return false;
636 
637  return AR->isAffine();
638 }
639 
640 /// Check whether a pointer address cannot wrap.
642  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
643  const SCEV *PtrScev = PSE.getSCEV(Ptr);
644  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
645  return true;
646 
647  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
648  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
649  return true;
650 
651  return false;
652 }
653 
654 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
655  MemAccessInfo Access,
656  const ValueToValueMap &StridesMap,
657  DenseMap<Value *, unsigned> &DepSetId,
658  Loop *TheLoop, unsigned &RunningDepId,
659  unsigned ASId, bool ShouldCheckWrap,
660  bool Assume) {
661  Value *Ptr = Access.getPointer();
662 
663  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
664  return false;
665 
666  // When we run after a failing dependency check we have to make sure
667  // we don't have wrapping pointers.
668  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
669  auto *Expr = PSE.getSCEV(Ptr);
670  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
671  return false;
672  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
673  }
674 
675  // The id of the dependence set.
676  unsigned DepId;
677 
678  if (isDependencyCheckNeeded()) {
679  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
680  unsigned &LeaderId = DepSetId[Leader];
681  if (!LeaderId)
682  LeaderId = RunningDepId++;
683  DepId = LeaderId;
684  } else
685  // Each access has its own dependence set.
686  DepId = RunningDepId++;
687 
688  bool IsWrite = Access.getInt();
689  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
690  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
691 
692  return true;
693  }
694 
695 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
696  ScalarEvolution *SE, Loop *TheLoop,
697  const ValueToValueMap &StridesMap,
698  bool ShouldCheckWrap) {
699  // Find pointers with computable bounds. We are going to use this information
700  // to place a runtime bound check.
701  bool CanDoRT = true;
702 
703  bool NeedRTCheck = false;
704  if (!IsRTCheckAnalysisNeeded) return true;
705 
706  bool IsDepCheckNeeded = isDependencyCheckNeeded();
707 
708  // We assign a consecutive id to access from different alias sets.
709  // Accesses between different groups doesn't need to be checked.
710  unsigned ASId = 1;
711  for (auto &AS : AST) {
712  int NumReadPtrChecks = 0;
713  int NumWritePtrChecks = 0;
714  bool CanDoAliasSetRT = true;
715 
716  // We assign consecutive id to access from different dependence sets.
717  // Accesses within the same set don't need a runtime check.
718  unsigned RunningDepId = 1;
720 
722 
723  for (auto A : AS) {
724  Value *Ptr = A.getValue();
725  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
726  MemAccessInfo Access(Ptr, IsWrite);
727 
728  if (IsWrite)
729  ++NumWritePtrChecks;
730  else
731  ++NumReadPtrChecks;
732 
733  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
734  RunningDepId, ASId, ShouldCheckWrap, false)) {
735  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
736  Retries.push_back(Access);
737  CanDoAliasSetRT = false;
738  }
739  }
740 
741  // If we have at least two writes or one write and a read then we need to
742  // check them. But there is no need to checks if there is only one
743  // dependence set for this alias set.
744  //
745  // Note that this function computes CanDoRT and NeedRTCheck independently.
746  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
747  // for which we couldn't find the bounds but we don't actually need to emit
748  // any checks so it does not matter.
749  bool NeedsAliasSetRTCheck = false;
750  if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
751  NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
752  (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
753 
754  // We need to perform run-time alias checks, but some pointers had bounds
755  // that couldn't be checked.
756  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
757  // Reset the CanDoSetRt flag and retry all accesses that have failed.
758  // We know that we need these checks, so we can now be more aggressive
759  // and add further checks if required (overflow checks).
760  CanDoAliasSetRT = true;
761  for (auto Access : Retries)
762  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
763  TheLoop, RunningDepId, ASId,
764  ShouldCheckWrap, /*Assume=*/true)) {
765  CanDoAliasSetRT = false;
766  break;
767  }
768  }
769 
770  CanDoRT &= CanDoAliasSetRT;
771  NeedRTCheck |= NeedsAliasSetRTCheck;
772  ++ASId;
773  }
774 
775  // If the pointers that we would use for the bounds comparison have different
776  // address spaces, assume the values aren't directly comparable, so we can't
777  // use them for the runtime check. We also have to assume they could
778  // overlap. In the future there should be metadata for whether address spaces
779  // are disjoint.
780  unsigned NumPointers = RtCheck.Pointers.size();
781  for (unsigned i = 0; i < NumPointers; ++i) {
782  for (unsigned j = i + 1; j < NumPointers; ++j) {
783  // Only need to check pointers between two different dependency sets.
784  if (RtCheck.Pointers[i].DependencySetId ==
785  RtCheck.Pointers[j].DependencySetId)
786  continue;
787  // Only need to check pointers in the same alias set.
788  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
789  continue;
790 
791  Value *PtrI = RtCheck.Pointers[i].PointerValue;
792  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
793 
794  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
795  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
796  if (ASi != ASj) {
797  LLVM_DEBUG(
798  dbgs() << "LAA: Runtime check would require comparison between"
799  " different address spaces\n");
800  return false;
801  }
802  }
803  }
804 
805  if (NeedRTCheck && CanDoRT)
806  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
807 
808  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
809  << " pointer comparisons.\n");
810 
811  RtCheck.Need = NeedRTCheck;
812 
813  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
814  if (!CanDoRTIfNeeded)
815  RtCheck.reset();
816  return CanDoRTIfNeeded;
817 }
818 
819 void AccessAnalysis::processMemAccesses() {
820  // We process the set twice: first we process read-write pointers, last we
821  // process read-only pointers. This allows us to skip dependence tests for
822  // read-only pointers.
823 
824  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
825  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
826  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
827  LLVM_DEBUG({
828  for (auto A : Accesses)
829  dbgs() << "\t" << *A.getPointer() << " (" <<
830  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
831  "read-only" : "read")) << ")\n";
832  });
833 
834  // The AliasSetTracker has nicely partitioned our pointers by metadata
835  // compatibility and potential for underlying-object overlap. As a result, we
836  // only need to check for potential pointer dependencies within each alias
837  // set.
838  for (auto &AS : AST) {
839  // Note that both the alias-set tracker and the alias sets themselves used
840  // linked lists internally and so the iteration order here is deterministic
841  // (matching the original instruction order within each set).
842 
843  bool SetHasWrite = false;
844 
845  // Map of pointers to last access encountered.
846  typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
847  UnderlyingObjToAccessMap ObjToLastAccess;
848 
849  // Set of access to check after all writes have been processed.
850  PtrAccessSet DeferredAccesses;
851 
852  // Iterate over each alias set twice, once to process read/write pointers,
853  // and then to process read-only pointers.
854  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
855  bool UseDeferred = SetIteration > 0;
856  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
857 
858  for (auto AV : AS) {
859  Value *Ptr = AV.getValue();
860 
861  // For a single memory access in AliasSetTracker, Accesses may contain
862  // both read and write, and they both need to be handled for CheckDeps.
863  for (auto AC : S) {
864  if (AC.getPointer() != Ptr)
865  continue;
866 
867  bool IsWrite = AC.getInt();
868 
869  // If we're using the deferred access set, then it contains only
870  // reads.
871  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
872  if (UseDeferred && !IsReadOnlyPtr)
873  continue;
874  // Otherwise, the pointer must be in the PtrAccessSet, either as a
875  // read or a write.
876  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
877  S.count(MemAccessInfo(Ptr, false))) &&
878  "Alias-set pointer not in the access set?");
879 
880  MemAccessInfo Access(Ptr, IsWrite);
881  DepCands.insert(Access);
882 
883  // Memorize read-only pointers for later processing and skip them in
884  // the first round (they need to be checked after we have seen all
885  // write pointers). Note: we also mark pointer that are not
886  // consecutive as "read-only" pointers (so that we check
887  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
888  if (!UseDeferred && IsReadOnlyPtr) {
889  DeferredAccesses.insert(Access);
890  continue;
891  }
892 
893  // If this is a write - check other reads and writes for conflicts. If
894  // this is a read only check other writes for conflicts (but only if
895  // there is no other write to the ptr - this is an optimization to
896  // catch "a[i] = a[i] + " without having to do a dependence check).
897  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
898  CheckDeps.push_back(Access);
899  IsRTCheckAnalysisNeeded = true;
900  }
901 
902  if (IsWrite)
903  SetHasWrite = true;
904 
905  // Create sets of pointers connected by a shared alias set and
906  // underlying object.
907  typedef SmallVector<Value *, 16> ValueVector;
908  ValueVector TempObjects;
909 
910  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
911  LLVM_DEBUG(dbgs()
912  << "Underlying objects for pointer " << *Ptr << "\n");
913  for (Value *UnderlyingObj : TempObjects) {
914  // nullptr never alias, don't join sets for pointer that have "null"
915  // in their UnderlyingObjects list.
916  if (isa<ConstantPointerNull>(UnderlyingObj) &&
918  TheLoop->getHeader()->getParent(),
919  UnderlyingObj->getType()->getPointerAddressSpace()))
920  continue;
921 
922  UnderlyingObjToAccessMap::iterator Prev =
923  ObjToLastAccess.find(UnderlyingObj);
924  if (Prev != ObjToLastAccess.end())
925  DepCands.unionSets(Access, Prev->second);
926 
927  ObjToLastAccess[UnderlyingObj] = Access;
928  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
929  }
930  }
931  }
932  }
933  }
934 }
935 
936 static bool isInBoundsGep(Value *Ptr) {
937  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
938  return GEP->isInBounds();
939  return false;
940 }
941 
942 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
943 /// i.e. monotonically increasing/decreasing.
944 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
945  PredicatedScalarEvolution &PSE, const Loop *L) {
946  // FIXME: This should probably only return true for NUW.
948  return true;
949 
950  // Scalar evolution does not propagate the non-wrapping flags to values that
951  // are derived from a non-wrapping induction variable because non-wrapping
952  // could be flow-sensitive.
953  //
954  // Look through the potentially overflowing instruction to try to prove
955  // non-wrapping for the *specific* value of Ptr.
956 
957  // The arithmetic implied by an inbounds GEP can't overflow.
958  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
959  if (!GEP || !GEP->isInBounds())
960  return false;
961 
962  // Make sure there is only one non-const index and analyze that.
963  Value *NonConstIndex = nullptr;
964  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
965  if (!isa<ConstantInt>(Index)) {
966  if (NonConstIndex)
967  return false;
968  NonConstIndex = Index;
969  }
970  if (!NonConstIndex)
971  // The recurrence is on the pointer, ignore for now.
972  return false;
973 
974  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
975  // AddRec using a NSW operation.
976  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
977  if (OBO->hasNoSignedWrap() &&
978  // Assume constant for other the operand so that the AddRec can be
979  // easily found.
980  isa<ConstantInt>(OBO->getOperand(1))) {
981  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
982 
983  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
984  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
985  }
986 
987  return false;
988 }
989 
990 /// Check whether the access through \p Ptr has a constant stride.
992  const Loop *Lp, const ValueToValueMap &StridesMap,
993  bool Assume, bool ShouldCheckWrap) {
994  Type *Ty = Ptr->getType();
995  assert(Ty->isPointerTy() && "Unexpected non-ptr");
996 
997  // Make sure that the pointer does not point to aggregate types.
998  auto *PtrTy = cast<PointerType>(Ty);
999  if (PtrTy->getElementType()->isAggregateType()) {
1000  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
1001  << *Ptr << "\n");
1002  return 0;
1003  }
1004 
1005  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1006 
1007  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1008  if (Assume && !AR)
1009  AR = PSE.getAsAddRec(Ptr);
1010 
1011  if (!AR) {
1012  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1013  << " SCEV: " << *PtrScev << "\n");
1014  return 0;
1015  }
1016 
1017  // The accesss function must stride over the innermost loop.
1018  if (Lp != AR->getLoop()) {
1019  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1020  << *Ptr << " SCEV: " << *AR << "\n");
1021  return 0;
1022  }
1023 
1024  // The address calculation must not wrap. Otherwise, a dependence could be
1025  // inverted.
1026  // An inbounds getelementptr that is a AddRec with a unit stride
1027  // cannot wrap per definition. The unit stride requirement is checked later.
1028  // An getelementptr without an inbounds attribute and unit stride would have
1029  // to access the pointer value "0" which is undefined behavior in address
1030  // space 0, therefore we can also vectorize this case.
1031  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1032  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1034  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1035  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1037  PtrTy->getAddressSpace())) {
1038  if (Assume) {
1040  IsNoWrapAddRec = true;
1041  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1042  << "LAA: Pointer: " << *Ptr << "\n"
1043  << "LAA: SCEV: " << *AR << "\n"
1044  << "LAA: Added an overflow assumption\n");
1045  } else {
1046  LLVM_DEBUG(
1047  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1048  << *Ptr << " SCEV: " << *AR << "\n");
1049  return 0;
1050  }
1051  }
1052 
1053  // Check the step is constant.
1054  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1055 
1056  // Calculate the pointer stride and check if it is constant.
1057  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1058  if (!C) {
1059  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1060  << " SCEV: " << *AR << "\n");
1061  return 0;
1062  }
1063 
1064  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1065  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1066  const APInt &APStepVal = C->getAPInt();
1067 
1068  // Huge step value - give up.
1069  if (APStepVal.getBitWidth() > 64)
1070  return 0;
1071 
1072  int64_t StepVal = APStepVal.getSExtValue();
1073 
1074  // Strided access.
1075  int64_t Stride = StepVal / Size;
1076  int64_t Rem = StepVal % Size;
1077  if (Rem)
1078  return 0;
1079 
1080  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1081  // know we can't "wrap around the address space". In case of address space
1082  // zero we know that this won't happen without triggering undefined behavior.
1083  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1084  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1085  PtrTy->getAddressSpace()))) {
1086  if (Assume) {
1087  // We can avoid this case by adding a run-time check.
1088  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1089  << "inbouds or in address space 0 may wrap:\n"
1090  << "LAA: Pointer: " << *Ptr << "\n"
1091  << "LAA: SCEV: " << *AR << "\n"
1092  << "LAA: Added an overflow assumption\n");
1094  } else
1095  return 0;
1096  }
1097 
1098  return Stride;
1099 }
1100 
1102  ScalarEvolution &SE,
1103  SmallVectorImpl<unsigned> &SortedIndices) {
1105  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1106  "Expected list of pointer operands.");
1108  OffValPairs.reserve(VL.size());
1109 
1110  // Walk over the pointers, and map each of them to an offset relative to
1111  // first pointer in the array.
1112  Value *Ptr0 = VL[0];
1113  const SCEV *Scev0 = SE.getSCEV(Ptr0);
1114  Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1115 
1117  for (auto *Ptr : VL) {
1118  // TODO: Outline this code as a special, more time consuming, version of
1119  // computeConstantDifference() function.
1120  if (Ptr->getType()->getPointerAddressSpace() !=
1121  Ptr0->getType()->getPointerAddressSpace())
1122  return false;
1123  // If a pointer refers to a different underlying object, bail - the
1124  // pointers are by definition incomparable.
1125  Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1126  if (CurrObj != Obj0)
1127  return false;
1128 
1129  const SCEV *Scev = SE.getSCEV(Ptr);
1130  const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1131  // The pointers may not have a constant offset from each other, or SCEV
1132  // may just not be smart enough to figure out they do. Regardless,
1133  // there's nothing we can do.
1134  if (!Diff)
1135  return false;
1136 
1137  // Check if the pointer with the same offset is found.
1138  int64_t Offset = Diff->getAPInt().getSExtValue();
1139  if (!Offsets.insert(Offset).second)
1140  return false;
1141  OffValPairs.emplace_back(Offset, Ptr);
1142  }
1143  SortedIndices.clear();
1144  SortedIndices.resize(VL.size());
1145  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1146 
1147  // Sort the memory accesses and keep the order of their uses in UseOrder.
1148  std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
1149  [&OffValPairs](unsigned Left, unsigned Right) {
1150  return OffValPairs[Left].first < OffValPairs[Right].first;
1151  });
1152 
1153  // Check if the order is consecutive already.
1154  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1155  return I == SortedIndices[I];
1156  }))
1157  SortedIndices.clear();
1158 
1159  return true;
1160 }
1161 
1162 /// Take the address space operand from the Load/Store instruction.
1163 /// Returns -1 if this is not a valid Load/Store instruction.
1164 static unsigned getAddressSpaceOperand(Value *I) {
1165  if (LoadInst *L = dyn_cast<LoadInst>(I))
1166  return L->getPointerAddressSpace();
1167  if (StoreInst *S = dyn_cast<StoreInst>(I))
1168  return S->getPointerAddressSpace();
1169  return -1;
1170 }
1171 
1172 /// Returns true if the memory operations \p A and \p B are consecutive.
1174  ScalarEvolution &SE, bool CheckType) {
1175  Value *PtrA = getLoadStorePointerOperand(A);
1176  Value *PtrB = getLoadStorePointerOperand(B);
1177  unsigned ASA = getAddressSpaceOperand(A);
1178  unsigned ASB = getAddressSpaceOperand(B);
1179 
1180  // Check that the address spaces match and that the pointers are valid.
1181  if (!PtrA || !PtrB || (ASA != ASB))
1182  return false;
1183 
1184  // Make sure that A and B are different pointers.
1185  if (PtrA == PtrB)
1186  return false;
1187 
1188  // Make sure that A and B have the same type if required.
1189  if (CheckType && PtrA->getType() != PtrB->getType())
1190  return false;
1191 
1192  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1193  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1194  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1195 
1196  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1197  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1198  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1199 
1200  // OffsetDelta = OffsetB - OffsetA;
1201  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1202  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1203  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1204  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1205  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1206  // Check if they are based on the same pointer. That makes the offsets
1207  // sufficient.
1208  if (PtrA == PtrB)
1209  return OffsetDelta == Size;
1210 
1211  // Compute the necessary base pointer delta to have the necessary final delta
1212  // equal to the size.
1213  // BaseDelta = Size - OffsetDelta;
1214  const SCEV *SizeSCEV = SE.getConstant(Size);
1215  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1216 
1217  // Otherwise compute the distance with SCEV between the base pointers.
1218  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1219  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1220  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1221  return X == PtrSCEVB;
1222 }
1223 
1226  switch (Type) {
1227  case NoDep:
1228  case Forward:
1229  case BackwardVectorizable:
1230  return VectorizationSafetyStatus::Safe;
1231 
1232  case Unknown:
1233  return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1234  case ForwardButPreventsForwarding:
1235  case Backward:
1236  case BackwardVectorizableButPreventsForwarding:
1237  return VectorizationSafetyStatus::Unsafe;
1238  }
1239  llvm_unreachable("unexpected DepType!");
1240 }
1241 
1243  switch (Type) {
1244  case NoDep:
1245  case Forward:
1246  case ForwardButPreventsForwarding:
1247  case Unknown:
1248  return false;
1249 
1250  case BackwardVectorizable:
1251  case Backward:
1252  case BackwardVectorizableButPreventsForwarding:
1253  return true;
1254  }
1255  llvm_unreachable("unexpected DepType!");
1256 }
1257 
1259  return isBackward() || Type == Unknown;
1260 }
1261 
1263  switch (Type) {
1264  case Forward:
1265  case ForwardButPreventsForwarding:
1266  return true;
1267 
1268  case NoDep:
1269  case Unknown:
1270  case BackwardVectorizable:
1271  case Backward:
1272  case BackwardVectorizableButPreventsForwarding:
1273  return false;
1274  }
1275  llvm_unreachable("unexpected DepType!");
1276 }
1277 
1278 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1279  uint64_t TypeByteSize) {
1280  // If loads occur at a distance that is not a multiple of a feasible vector
1281  // factor store-load forwarding does not take place.
1282  // Positive dependences might cause troubles because vectorizing them might
1283  // prevent store-load forwarding making vectorized code run a lot slower.
1284  // a[i] = a[i-3] ^ a[i-8];
1285  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1286  // hence on your typical architecture store-load forwarding does not take
1287  // place. Vectorizing in such cases does not make sense.
1288  // Store-load forwarding distance.
1289 
1290  // After this many iterations store-to-load forwarding conflicts should not
1291  // cause any slowdowns.
1292  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1293  // Maximum vector factor.
1294  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1295  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1296 
1297  // Compute the smallest VF at which the store and load would be misaligned.
1298  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1299  VF *= 2) {
1300  // If the number of vector iteration between the store and the load are
1301  // small we could incur conflicts.
1302  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1303  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1304  break;
1305  }
1306  }
1307 
1308  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1309  LLVM_DEBUG(
1310  dbgs() << "LAA: Distance " << Distance
1311  << " that could cause a store-load forwarding conflict\n");
1312  return true;
1313  }
1314 
1315  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1316  MaxVFWithoutSLForwardIssues !=
1317  VectorizerParams::MaxVectorWidth * TypeByteSize)
1318  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1319  return false;
1320 }
1321 
1322 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1323  if (Status < S)
1324  Status = S;
1325 }
1326 
1327 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1328 /// memory accesses, that have the same stride whose absolute value is given
1329 /// in \p Stride, and that have the same type size \p TypeByteSize,
1330 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1331 /// possible to prove statically that the dependence distance is larger
1332 /// than the range that the accesses will travel through the execution of
1333 /// the loop. If so, return true; false otherwise. This is useful for
1334 /// example in loops such as the following (PR31098):
1335 /// for (i = 0; i < D; ++i) {
1336 /// = out[i];
1337 /// out[i+D] =
1338 /// }
1340  const SCEV &BackedgeTakenCount,
1341  const SCEV &Dist, uint64_t Stride,
1342  uint64_t TypeByteSize) {
1343 
1344  // If we can prove that
1345  // (**) |Dist| > BackedgeTakenCount * Step
1346  // where Step is the absolute stride of the memory accesses in bytes,
1347  // then there is no dependence.
1348  //
1349  // Ratioanle:
1350  // We basically want to check if the absolute distance (|Dist/Step|)
1351  // is >= the loop iteration count (or > BackedgeTakenCount).
1352  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1353  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1354  // that the dependence distance is >= VF; This is checked elsewhere.
1355  // But in some cases we can prune unknown dependence distances early, and
1356  // even before selecting the VF, and without a runtime test, by comparing
1357  // the distance against the loop iteration count. Since the vectorized code
1358  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1359  // also guarantees that distance >= VF.
1360  //
1361  const uint64_t ByteStride = Stride * TypeByteSize;
1362  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1363  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1364 
1365  const SCEV *CastedDist = &Dist;
1366  const SCEV *CastedProduct = Product;
1367  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1368  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1369 
1370  // The dependence distance can be positive/negative, so we sign extend Dist;
1371  // The multiplication of the absolute stride in bytes and the
1372  // backdgeTakenCount is non-negative, so we zero extend Product.
1373  if (DistTypeSize > ProductTypeSize)
1374  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1375  else
1376  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1377 
1378  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1379  // (If so, then we have proven (**) because |Dist| >= Dist)
1380  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1381  if (SE.isKnownPositive(Minus))
1382  return true;
1383 
1384  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1385  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1386  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1387  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1388  if (SE.isKnownPositive(Minus))
1389  return true;
1390 
1391  return false;
1392 }
1393 
1394 /// Check the dependence for two accesses with the same stride \p Stride.
1395 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1396 /// bytes.
1397 ///
1398 /// \returns true if they are independent.
1399 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1400  uint64_t TypeByteSize) {
1401  assert(Stride > 1 && "The stride must be greater than 1");
1402  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1403  assert(Distance > 0 && "The distance must be non-zero");
1404 
1405  // Skip if the distance is not multiple of type byte size.
1406  if (Distance % TypeByteSize)
1407  return false;
1408 
1409  uint64_t ScaledDist = Distance / TypeByteSize;
1410 
1411  // No dependence if the scaled distance is not multiple of the stride.
1412  // E.g.
1413  // for (i = 0; i < 1024 ; i += 4)
1414  // A[i+2] = A[i] + 1;
1415  //
1416  // Two accesses in memory (scaled distance is 2, stride is 4):
1417  // | A[0] | | | | A[4] | | | |
1418  // | | | A[2] | | | | A[6] | |
1419  //
1420  // E.g.
1421  // for (i = 0; i < 1024 ; i += 3)
1422  // A[i+4] = A[i] + 1;
1423  //
1424  // Two accesses in memory (scaled distance is 4, stride is 3):
1425  // | A[0] | | | A[3] | | | A[6] | | |
1426  // | | | | | A[4] | | | A[7] | |
1427  return ScaledDist % Stride;
1428 }
1429 
1431 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1432  const MemAccessInfo &B, unsigned BIdx,
1433  const ValueToValueMap &Strides) {
1434  assert (AIdx < BIdx && "Must pass arguments in program order");
1435 
1436  Value *APtr = A.getPointer();
1437  Value *BPtr = B.getPointer();
1438  bool AIsWrite = A.getInt();
1439  bool BIsWrite = B.getInt();
1440 
1441  // Two reads are independent.
1442  if (!AIsWrite && !BIsWrite)
1443  return Dependence::NoDep;
1444 
1445  // We cannot check pointers in different address spaces.
1446  if (APtr->getType()->getPointerAddressSpace() !=
1447  BPtr->getType()->getPointerAddressSpace())
1448  return Dependence::Unknown;
1449 
1450  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1451  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1452 
1453  const SCEV *Src = PSE.getSCEV(APtr);
1454  const SCEV *Sink = PSE.getSCEV(BPtr);
1455 
1456  // If the induction step is negative we have to invert source and sink of the
1457  // dependence.
1458  if (StrideAPtr < 0) {
1459  std::swap(APtr, BPtr);
1460  std::swap(Src, Sink);
1461  std::swap(AIsWrite, BIsWrite);
1462  std::swap(AIdx, BIdx);
1463  std::swap(StrideAPtr, StrideBPtr);
1464  }
1465 
1466  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1467 
1468  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1469  << "(Induction step: " << StrideAPtr << ")\n");
1470  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1471  << *InstMap[BIdx] << ": " << *Dist << "\n");
1472 
1473  // Need accesses with constant stride. We don't want to vectorize
1474  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1475  // the address space.
1476  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1477  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1478  return Dependence::Unknown;
1479  }
1480 
1481  Type *ATy = APtr->getType()->getPointerElementType();
1482  Type *BTy = BPtr->getType()->getPointerElementType();
1483  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1484  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1485  uint64_t Stride = std::abs(StrideAPtr);
1486  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1487  if (!C) {
1488  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1489  isSafeDependenceDistance(DL, *(PSE.getSE()),
1490  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1491  TypeByteSize))
1492  return Dependence::NoDep;
1493 
1494  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1495  FoundNonConstantDistanceDependence = true;
1496  return Dependence::Unknown;
1497  }
1498 
1499  const APInt &Val = C->getAPInt();
1500  int64_t Distance = Val.getSExtValue();
1501 
1502  // Attempt to prove strided accesses independent.
1503  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1504  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1505  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1506  return Dependence::NoDep;
1507  }
1508 
1509  // Negative distances are not plausible dependencies.
1510  if (Val.isNegative()) {
1511  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1512  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1513  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1514  ATy != BTy)) {
1515  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1516  return Dependence::ForwardButPreventsForwarding;
1517  }
1518 
1519  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1520  return Dependence::Forward;
1521  }
1522 
1523  // Write to the same location with the same size.
1524  // Could be improved to assert type sizes are the same (i32 == float, etc).
1525  if (Val == 0) {
1526  if (ATy == BTy)
1527  return Dependence::Forward;
1528  LLVM_DEBUG(
1529  dbgs() << "LAA: Zero dependence difference but different types\n");
1530  return Dependence::Unknown;
1531  }
1532 
1533  assert(Val.isStrictlyPositive() && "Expect a positive value");
1534 
1535  if (ATy != BTy) {
1536  LLVM_DEBUG(
1537  dbgs()
1538  << "LAA: ReadWrite-Write positive dependency with different types\n");
1539  return Dependence::Unknown;
1540  }
1541 
1542  // Bail out early if passed-in parameters make vectorization not feasible.
1543  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1545  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1547  // The minimum number of iterations for a vectorized/unrolled version.
1548  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1549 
1550  // It's not vectorizable if the distance is smaller than the minimum distance
1551  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1552  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1553  // TypeByteSize (No need to plus the last gap distance).
1554  //
1555  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1556  // foo(int *A) {
1557  // int *B = (int *)((char *)A + 14);
1558  // for (i = 0 ; i < 1024 ; i += 2)
1559  // B[i] = A[i] + 1;
1560  // }
1561  //
1562  // Two accesses in memory (stride is 2):
1563  // | A[0] | | A[2] | | A[4] | | A[6] | |
1564  // | B[0] | | B[2] | | B[4] |
1565  //
1566  // Distance needs for vectorizing iterations except the last iteration:
1567  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1568  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1569  //
1570  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1571  // 12, which is less than distance.
1572  //
1573  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1574  // the minimum distance needed is 28, which is greater than distance. It is
1575  // not safe to do vectorization.
1576  uint64_t MinDistanceNeeded =
1577  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1578  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1579  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1580  << Distance << '\n');
1581  return Dependence::Backward;
1582  }
1583 
1584  // Unsafe if the minimum distance needed is greater than max safe distance.
1585  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1586  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1587  << MinDistanceNeeded << " size in bytes");
1588  return Dependence::Backward;
1589  }
1590 
1591  // Positive distance bigger than max vectorization factor.
1592  // FIXME: Should use max factor instead of max distance in bytes, which could
1593  // not handle different types.
1594  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1595  // void foo (int *A, char *B) {
1596  // for (unsigned i = 0; i < 1024; i++) {
1597  // A[i+2] = A[i] + 1;
1598  // B[i+2] = B[i] + 1;
1599  // }
1600  // }
1601  //
1602  // This case is currently unsafe according to the max safe distance. If we
1603  // analyze the two accesses on array B, the max safe dependence distance
1604  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1605  // is 8, which is less than 2 and forbidden vectorization, But actually
1606  // both A and B could be vectorized by 2 iterations.
1607  MaxSafeDepDistBytes =
1608  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1609 
1610  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1611  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1612  couldPreventStoreLoadForward(Distance, TypeByteSize))
1613  return Dependence::BackwardVectorizableButPreventsForwarding;
1614 
1615  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1616  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1617  << " with max VF = " << MaxVF << '\n');
1618  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1619  MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1620  return Dependence::BackwardVectorizable;
1621 }
1622 
1624  MemAccessInfoList &CheckDeps,
1625  const ValueToValueMap &Strides) {
1626 
1627  MaxSafeDepDistBytes = -1;
1629  for (MemAccessInfo CurAccess : CheckDeps) {
1630  if (Visited.count(CurAccess))
1631  continue;
1632 
1633  // Get the relevant memory access set.
1635  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1636 
1637  // Check accesses within this set.
1639  AccessSets.member_begin(I);
1641  AccessSets.member_end();
1642 
1643  // Check every access pair.
1644  while (AI != AE) {
1645  Visited.insert(*AI);
1647  while (OI != AE) {
1648  // Check every accessing instruction pair in program order.
1649  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1650  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1651  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1652  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1653  auto A = std::make_pair(&*AI, *I1);
1654  auto B = std::make_pair(&*OI, *I2);
1655 
1656  assert(*I1 != *I2);
1657  if (*I1 > *I2)
1658  std::swap(A, B);
1659 
1661  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1662  mergeInStatus(Dependence::isSafeForVectorization(Type));
1663 
1664  // Gather dependences unless we accumulated MaxDependences
1665  // dependences. In that case return as soon as we find the first
1666  // unsafe dependence. This puts a limit on this quadratic
1667  // algorithm.
1668  if (RecordDependences) {
1669  if (Type != Dependence::NoDep)
1670  Dependences.push_back(Dependence(A.second, B.second, Type));
1671 
1672  if (Dependences.size() >= MaxDependences) {
1673  RecordDependences = false;
1674  Dependences.clear();
1675  LLVM_DEBUG(dbgs()
1676  << "Too many dependences, stopped recording\n");
1677  }
1678  }
1679  if (!RecordDependences && !isSafeForVectorization())
1680  return false;
1681  }
1682  ++OI;
1683  }
1684  AI++;
1685  }
1686  }
1687 
1688  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1689  return isSafeForVectorization();
1690 }
1691 
1694  MemAccessInfo Access(Ptr, isWrite);
1695  auto &IndexVector = Accesses.find(Access)->second;
1696 
1698  transform(IndexVector,
1699  std::back_inserter(Insts),
1700  [&](unsigned Idx) { return this->InstMap[Idx]; });
1701  return Insts;
1702 }
1703 
1704 const char *MemoryDepChecker::Dependence::DepName[] = {
1705  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1706  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1707 
1709  raw_ostream &OS, unsigned Depth,
1710  const SmallVectorImpl<Instruction *> &Instrs) const {
1711  OS.indent(Depth) << DepName[Type] << ":\n";
1712  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1713  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1714 }
1715 
1716 bool LoopAccessInfo::canAnalyzeLoop() {
1717  // We need to have a loop header.
1718  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1719  << TheLoop->getHeader()->getParent()->getName() << ": "
1720  << TheLoop->getHeader()->getName() << '\n');
1721 
1722  // We can only analyze innermost loops.
1723  if (!TheLoop->empty()) {
1724  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1725  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1726  return false;
1727  }
1728 
1729  // We must have a single backedge.
1730  if (TheLoop->getNumBackEdges() != 1) {
1731  LLVM_DEBUG(
1732  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1733  recordAnalysis("CFGNotUnderstood")
1734  << "loop control flow is not understood by analyzer";
1735  return false;
1736  }
1737 
1738  // We must have a single exiting block.
1739  if (!TheLoop->getExitingBlock()) {
1740  LLVM_DEBUG(
1741  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1742  recordAnalysis("CFGNotUnderstood")
1743  << "loop control flow is not understood by analyzer";
1744  return false;
1745  }
1746 
1747  // We only handle bottom-tested loops, i.e. loop in which the condition is
1748  // checked at the end of each iteration. With that we can assume that all
1749  // instructions in the loop are executed the same number of times.
1750  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1751  LLVM_DEBUG(
1752  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1753  recordAnalysis("CFGNotUnderstood")
1754  << "loop control flow is not understood by analyzer";
1755  return false;
1756  }
1757 
1758  // ScalarEvolution needs to be able to find the exit count.
1759  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1760  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1761  recordAnalysis("CantComputeNumberOfIterations")
1762  << "could not determine number of loop iterations";
1763  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1764  return false;
1765  }
1766 
1767  return true;
1768 }
1769 
1770 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1771  const TargetLibraryInfo *TLI,
1772  DominatorTree *DT) {
1773  typedef SmallPtrSet<Value*, 16> ValueSet;
1774 
1775  // Holds the Load and Store instructions.
1778 
1779  // Holds all the different accesses in the loop.
1780  unsigned NumReads = 0;
1781  unsigned NumReadWrites = 0;
1782 
1783  PtrRtChecking->Pointers.clear();
1784  PtrRtChecking->Need = false;
1785 
1786  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1787 
1788  // For each block.
1789  for (BasicBlock *BB : TheLoop->blocks()) {
1790  // Scan the BB and collect legal loads and stores.
1791  for (Instruction &I : *BB) {
1792  // If this is a load, save it. If this instruction can read from memory
1793  // but is not a load, then we quit. Notice that we don't handle function
1794  // calls that read or write.
1795  if (I.mayReadFromMemory()) {
1796  // Many math library functions read the rounding mode. We will only
1797  // vectorize a loop if it contains known function calls that don't set
1798  // the flag. Therefore, it is safe to ignore this read from memory.
1799  auto *Call = dyn_cast<CallInst>(&I);
1800  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1801  continue;
1802 
1803  // If the function has an explicit vectorized counterpart, we can safely
1804  // assume that it can be vectorized.
1805  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1806  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1807  continue;
1808 
1809  auto *Ld = dyn_cast<LoadInst>(&I);
1810  if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1811  recordAnalysis("NonSimpleLoad", Ld)
1812  << "read with atomic ordering or volatile read";
1813  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1814  CanVecMem = false;
1815  return;
1816  }
1817  NumLoads++;
1818  Loads.push_back(Ld);
1819  DepChecker->addAccess(Ld);
1821  collectStridedAccess(Ld);
1822  continue;
1823  }
1824 
1825  // Save 'store' instructions. Abort if other instructions write to memory.
1826  if (I.mayWriteToMemory()) {
1827  auto *St = dyn_cast<StoreInst>(&I);
1828  if (!St) {
1829  recordAnalysis("CantVectorizeInstruction", St)
1830  << "instruction cannot be vectorized";
1831  CanVecMem = false;
1832  return;
1833  }
1834  if (!St->isSimple() && !IsAnnotatedParallel) {
1835  recordAnalysis("NonSimpleStore", St)
1836  << "write with atomic ordering or volatile write";
1837  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1838  CanVecMem = false;
1839  return;
1840  }
1841  NumStores++;
1842  Stores.push_back(St);
1843  DepChecker->addAccess(St);
1845  collectStridedAccess(St);
1846  }
1847  } // Next instr.
1848  } // Next block.
1849 
1850  // Now we have two lists that hold the loads and the stores.
1851  // Next, we find the pointers that they use.
1852 
1853  // Check if we see any stores. If there are no stores, then we don't
1854  // care if the pointers are *restrict*.
1855  if (!Stores.size()) {
1856  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1857  CanVecMem = true;
1858  return;
1859  }
1860 
1861  MemoryDepChecker::DepCandidates DependentAccesses;
1862  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1863  TheLoop, AA, LI, DependentAccesses, *PSE);
1864 
1865  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1866  // multiple times on the same object. If the ptr is accessed twice, once
1867  // for read and once for write, it will only appear once (on the write
1868  // list). This is okay, since we are going to check for conflicts between
1869  // writes and between reads and writes, but not between reads and reads.
1870  ValueSet Seen;
1871 
1872  // Record uniform store addresses to identify if we have multiple stores
1873  // to the same address.
1874  ValueSet UniformStores;
1875 
1876  for (StoreInst *ST : Stores) {
1877  Value *Ptr = ST->getPointerOperand();
1878 
1879  if (isUniform(Ptr))
1880  HasDependenceInvolvingLoopInvariantAddress |=
1881  !UniformStores.insert(Ptr).second;
1882 
1883  // If we did *not* see this pointer before, insert it to the read-write
1884  // list. At this phase it is only a 'write' list.
1885  if (Seen.insert(Ptr).second) {
1886  ++NumReadWrites;
1887 
1889  // The TBAA metadata could have a control dependency on the predication
1890  // condition, so we cannot rely on it when determining whether or not we
1891  // need runtime pointer checks.
1892  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1893  Loc.AATags.TBAA = nullptr;
1894 
1895  Accesses.addStore(Loc);
1896  }
1897  }
1898 
1899  if (IsAnnotatedParallel) {
1900  LLVM_DEBUG(
1901  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
1902  << "checks.\n");
1903  CanVecMem = true;
1904  return;
1905  }
1906 
1907  for (LoadInst *LD : Loads) {
1908  Value *Ptr = LD->getPointerOperand();
1909  // If we did *not* see this pointer before, insert it to the
1910  // read list. If we *did* see it before, then it is already in
1911  // the read-write list. This allows us to vectorize expressions
1912  // such as A[i] += x; Because the address of A[i] is a read-write
1913  // pointer. This only works if the index of A[i] is consecutive.
1914  // If the address of i is unknown (for example A[B[i]]) then we may
1915  // read a few words, modify, and write a few words, and some of the
1916  // words may be written to the same address.
1917  bool IsReadOnlyPtr = false;
1918  if (Seen.insert(Ptr).second ||
1919  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1920  ++NumReads;
1921  IsReadOnlyPtr = true;
1922  }
1923 
1924  // See if there is an unsafe dependency between a load to a uniform address and
1925  // store to the same uniform address.
1926  if (UniformStores.count(Ptr)) {
1927  LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
1928  "load and uniform store to the same address!\n");
1929  HasDependenceInvolvingLoopInvariantAddress = true;
1930  }
1931 
1933  // The TBAA metadata could have a control dependency on the predication
1934  // condition, so we cannot rely on it when determining whether or not we
1935  // need runtime pointer checks.
1936  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1937  Loc.AATags.TBAA = nullptr;
1938 
1939  Accesses.addLoad(Loc, IsReadOnlyPtr);
1940  }
1941 
1942  // If we write (or read-write) to a single destination and there are no
1943  // other reads in this loop then is it safe to vectorize.
1944  if (NumReadWrites == 1 && NumReads == 0) {
1945  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1946  CanVecMem = true;
1947  return;
1948  }
1949 
1950  // Build dependence sets and check whether we need a runtime pointer bounds
1951  // check.
1952  Accesses.buildDependenceSets();
1953 
1954  // Find pointers with computable bounds. We are going to use this information
1955  // to place a runtime bound check.
1956  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1957  TheLoop, SymbolicStrides);
1958  if (!CanDoRTIfNeeded) {
1959  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1960  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1961  << "the array bounds.\n");
1962  CanVecMem = false;
1963  return;
1964  }
1965 
1966  LLVM_DEBUG(
1967  dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1968 
1969  CanVecMem = true;
1970  if (Accesses.isDependencyCheckNeeded()) {
1971  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1972  CanVecMem = DepChecker->areDepsSafe(
1973  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1974  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1975 
1976  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1977  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1978 
1979  // Clear the dependency checks. We assume they are not needed.
1980  Accesses.resetDepChecks(*DepChecker);
1981 
1982  PtrRtChecking->reset();
1983  PtrRtChecking->Need = true;
1984 
1985  auto *SE = PSE->getSE();
1986  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1987  SymbolicStrides, true);
1988 
1989  // Check that we found the bounds for the pointer.
1990  if (!CanDoRTIfNeeded) {
1991  recordAnalysis("CantCheckMemDepsAtRunTime")
1992  << "cannot check memory dependencies at runtime";
1993  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1994  CanVecMem = false;
1995  return;
1996  }
1997 
1998  CanVecMem = true;
1999  }
2000  }
2001 
2002  if (CanVecMem)
2003  LLVM_DEBUG(
2004  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2005  << (PtrRtChecking->Need ? "" : " don't")
2006  << " need runtime memory checks.\n");
2007  else {
2008  recordAnalysis("UnsafeMemDep")
2009  << "unsafe dependent memory operations in loop. Use "
2010  "#pragma loop distribute(enable) to allow loop distribution "
2011  "to attempt to isolate the offending operations into a separate "
2012  "loop";
2013  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2014  }
2015 }
2016 
2018  DominatorTree *DT) {
2019  assert(TheLoop->contains(BB) && "Unknown block used");
2020 
2021  // Blocks that do not dominate the latch need predication.
2022  BasicBlock* Latch = TheLoop->getLoopLatch();
2023  return !DT->dominates(BB, Latch);
2024 }
2025 
2026 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2027  Instruction *I) {
2028  assert(!Report && "Multiple reports generated");
2029 
2030  Value *CodeRegion = TheLoop->getHeader();
2031  DebugLoc DL = TheLoop->getStartLoc();
2032 
2033  if (I) {
2034  CodeRegion = I->getParent();
2035  // If there is no debug location attached to the instruction, revert back to
2036  // using the loop's.
2037  if (I->getDebugLoc())
2038  DL = I->getDebugLoc();
2039  }
2040 
2041  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2042  CodeRegion);
2043  return *Report;
2044 }
2045 
2047  auto *SE = PSE->getSE();
2048  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2049  // never considered uniform.
2050  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2051  // trivially loop-invariant FP values to be considered uniform.
2052  if (!SE->isSCEVable(V->getType()))
2053  return false;
2054  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2055 }
2056 
2057 // FIXME: this function is currently a duplicate of the one in
2058 // LoopVectorize.cpp.
2060  Instruction *Loc) {
2061  if (FirstInst)
2062  return FirstInst;
2063  if (Instruction *I = dyn_cast<Instruction>(V))
2064  return I->getParent() == Loc->getParent() ? I : nullptr;
2065  return nullptr;
2066 }
2067 
2068 namespace {
2069 
2070 /// IR Values for the lower and upper bounds of a pointer evolution. We
2071 /// need to use value-handles because SCEV expansion can invalidate previously
2072 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
2073 /// a previous one.
2074 struct PointerBounds {
2075  TrackingVH<Value> Start;
2076  TrackingVH<Value> End;
2077 };
2078 
2079 } // end anonymous namespace
2080 
2081 /// Expand code for the lower and upper bound of the pointer group \p CG
2082 /// in \p TheLoop. \return the values for the bounds.
2083 static PointerBounds
2085  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2086  const RuntimePointerChecking &PtrRtChecking) {
2087  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2088  const SCEV *Sc = SE->getSCEV(Ptr);
2089 
2090  unsigned AS = Ptr->getType()->getPointerAddressSpace();
2091  LLVMContext &Ctx = Loc->getContext();
2092 
2093  // Use this type for pointer arithmetic.
2094  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2095 
2096  if (SE->isLoopInvariant(Sc, TheLoop)) {
2097  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
2098  << *Ptr << "\n");
2099  // Ptr could be in the loop body. If so, expand a new one at the correct
2100  // location.
2101  Instruction *Inst = dyn_cast<Instruction>(Ptr);
2102  Value *NewPtr = (Inst && TheLoop->contains(Inst))
2103  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2104  : Ptr;
2105  // We must return a half-open range, which means incrementing Sc.
2106  const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2107  Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2108  return {NewPtr, NewPtrPlusOne};
2109  } else {
2110  Value *Start = nullptr, *End = nullptr;
2111  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2112  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2113  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2114  LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
2115  << "\n");
2116  return {Start, End};
2117  }
2118 }
2119 
2120 /// Turns a collection of checks into a collection of expanded upper and
2121 /// lower bounds for both pointers in the check.
2124  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2125  const RuntimePointerChecking &PtrRtChecking) {
2127 
2128  // Here we're relying on the SCEV Expander's cache to only emit code for the
2129  // same bounds once.
2130  transform(
2131  PointerChecks, std::back_inserter(ChecksWithBounds),
2133  PointerBounds
2134  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2135  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2136  return std::make_pair(First, Second);
2137  });
2138 
2139  return ChecksWithBounds;
2140 }
2141 
2142 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2143  Instruction *Loc,
2145  const {
2146  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2147  auto *SE = PSE->getSE();
2148  SCEVExpander Exp(*SE, DL, "induction");
2149  auto ExpandedChecks =
2150  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2151 
2152  LLVMContext &Ctx = Loc->getContext();
2153  Instruction *FirstInst = nullptr;
2154  IRBuilder<> ChkBuilder(Loc);
2155  // Our instructions might fold to a constant.
2156  Value *MemoryRuntimeCheck = nullptr;
2157 
2158  for (const auto &Check : ExpandedChecks) {
2159  const PointerBounds &A = Check.first, &B = Check.second;
2160  // Check if two pointers (A and B) conflict where conflict is computed as:
2161  // start(A) <= end(B) && start(B) <= end(A)
2162  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2163  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2164 
2165  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2166  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2167  "Trying to bounds check pointers with different address spaces");
2168 
2169  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2170  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2171 
2172  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2173  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2174  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2175  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2176 
2177  // [A|B].Start points to the first accessed byte under base [A|B].
2178  // [A|B].End points to the last accessed byte, plus one.
2179  // There is no conflict when the intervals are disjoint:
2180  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2181  //
2182  // bound0 = (B.Start < A.End)
2183  // bound1 = (A.Start < B.End)
2184  // IsConflict = bound0 & bound1
2185  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2186  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2187  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2188  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2189  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2190  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2191  if (MemoryRuntimeCheck) {
2192  IsConflict =
2193  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2194  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2195  }
2196  MemoryRuntimeCheck = IsConflict;
2197  }
2198 
2199  if (!MemoryRuntimeCheck)
2200  return std::make_pair(nullptr, nullptr);
2201 
2202  // We have to do this trickery because the IRBuilder might fold the check to a
2203  // constant expression in which case there is no Instruction anchored in a
2204  // the block.
2205  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2206  ConstantInt::getTrue(Ctx));
2207  ChkBuilder.Insert(Check, "memcheck.conflict");
2208  FirstInst = getFirstInst(FirstInst, Check, Loc);
2209  return std::make_pair(FirstInst, Check);
2210 }
2211 
2212 std::pair<Instruction *, Instruction *>
2214  if (!PtrRtChecking->Need)
2215  return std::make_pair(nullptr, nullptr);
2216 
2217  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2218 }
2219 
2220 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2221  Value *Ptr = nullptr;
2222  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2223  Ptr = LI->getPointerOperand();
2224  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2225  Ptr = SI->getPointerOperand();
2226  else
2227  return;
2228 
2229  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2230  if (!Stride)
2231  return;
2232 
2233  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2234  "versioning:");
2235  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2236 
2237  // Avoid adding the "Stride == 1" predicate when we know that
2238  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2239  // or zero iteration loop, as Trip-Count <= Stride == 1.
2240  //
2241  // TODO: We are currently not making a very informed decision on when it is
2242  // beneficial to apply stride versioning. It might make more sense that the
2243  // users of this analysis (such as the vectorizer) will trigger it, based on
2244  // their specific cost considerations; For example, in cases where stride
2245  // versioning does not help resolving memory accesses/dependences, the
2246  // vectorizer should evaluate the cost of the runtime test, and the benefit
2247  // of various possible stride specializations, considering the alternatives
2248  // of using gather/scatters (if available).
2249 
2250  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2251  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2252 
2253  // Match the types so we can compare the stride and the BETakenCount.
2254  // The Stride can be positive/negative, so we sign extend Stride;
2255  // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
2256  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2257  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2258  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2259  const SCEV *CastedStride = StrideExpr;
2260  const SCEV *CastedBECount = BETakenCount;
2261  ScalarEvolution *SE = PSE->getSE();
2262  if (BETypeSize >= StrideTypeSize)
2263  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2264  else
2265  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2266  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2267  // Since TripCount == BackEdgeTakenCount + 1, checking:
2268  // "Stride >= TripCount" is equivalent to checking:
2269  // Stride - BETakenCount > 0
2270  if (SE->isKnownPositive(StrideMinusBETaken)) {
2271  LLVM_DEBUG(
2272  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2273  "Stride==1 predicate will imply that the loop executes "
2274  "at most once.\n");
2275  return;
2276  }
2277  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2278 
2279  SymbolicStrides[Ptr] = Stride;
2280  StrideSet.insert(Stride);
2281 }
2282 
2284  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2285  DominatorTree *DT, LoopInfo *LI)
2286  : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2287  PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2288  DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2289  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2290  HasDependenceInvolvingLoopInvariantAddress(false) {
2291  if (canAnalyzeLoop())
2292  analyzeLoop(AA, LI, TLI, DT);
2293 }
2294 
2295 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2296  if (CanVecMem) {
2297  OS.indent(Depth) << "Memory dependences are safe";
2298  if (MaxSafeDepDistBytes != -1ULL)
2299  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2300  << " bytes";
2301  if (PtrRtChecking->Need)
2302  OS << " with run-time checks";
2303  OS << "\n";
2304  }
2305 
2306  if (Report)
2307  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2308 
2309  if (auto *Dependences = DepChecker->getDependences()) {
2310  OS.indent(Depth) << "Dependences:\n";
2311  for (auto &Dep : *Dependences) {
2312  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2313  OS << "\n";
2314  }
2315  } else
2316  OS.indent(Depth) << "Too many dependences, not recorded\n";
2317 
2318  // List the pair of accesses need run-time checks to prove independence.
2319  PtrRtChecking->print(OS, Depth);
2320  OS << "\n";
2321 
2322  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2323  << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2324  << "found in loop.\n";
2325 
2326  OS.indent(Depth) << "SCEV assumptions:\n";
2327  PSE->getUnionPredicate().print(OS, Depth);
2328 
2329  OS << "\n";
2330 
2331  OS.indent(Depth) << "Expressions re-written:\n";
2332  PSE->print(OS, Depth);
2333 }
2334 
2336  auto &LAI = LoopAccessInfoMap[L];
2337 
2338  if (!LAI)
2339  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2340 
2341  return *LAI.get();
2342 }
2343 
2345  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2346 
2347  for (Loop *TopLevelLoop : *LI)
2348  for (Loop *L : depth_first(TopLevelLoop)) {
2349  OS.indent(2) << L->getHeader()->getName() << ":\n";
2350  auto &LAI = LAA.getInfo(L);
2351  LAI.print(OS, 4);
2352  }
2353 }
2354 
2356  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2357  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2358  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2359  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2360  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2361  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2362 
2363  return false;
2364 }
2365 
2371 
2372  AU.setPreservesAll();
2373 }
2374 
2376 static const char laa_name[] = "Loop Access Analysis";
2377 #define LAA_NAME "loop-accesses"
2378 
2385 
2387 
2390  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2391 }
2392 
2393 namespace llvm {
2394 
2396  return new LoopAccessLegacyAnalysis();
2397  }
2398 
2399 } // end namespace llvm
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
#define LAA_NAME
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1800
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
#define DEBUG_TYPE
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn&#39;t overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:373
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:658
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1855
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static constexpr LocationSize unknown()
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
void push_back(const T &Elt)
Definition: SmallVector.h:218
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1025
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
Pass * createLAAPass()
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1349
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
void reserve(size_type N)
Definition: SmallVector.h:376
uint64_t High
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:227
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:321
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1182
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:391
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:337
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
unsigned getAddressSpace() const
Definition: Globals.cpp:111
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:365
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:53
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
bool isNegative() const
Definition: Constants.h:188
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const X86InstrFMA3Group Groups[]
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we&#39;ve proved that V doesn&#39;t wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:578
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1437
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:547
Class for arbitrary precision integers.
Definition: APInt.h:70
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
Dependece between memory access instructions.
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
iterator end()
Definition: DenseMap.h:109
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1164
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1268
bool empty() const
Definition: LoopInfo.h:146
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:794
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
iterator_range< df_iterator< T > > depth_first(const T &G)
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don&#39;t prevent vectorization.
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:290
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:419
A vector that has set insertion semantics.
Definition: SetVector.h:41
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:970
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1238
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
bool sortPtrAccesses(ArrayRef< Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices, if reordering is required.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:71
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:439
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:274
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
void resize(size_type N)
Definition: SmallVector.h:351