LLVM  14.0.0git
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The implementation for the loop memory dependence that was originally
10 // developed for the loop vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/DebugLoc.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/DiagnosticInfo.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Operator.h"
49 #include "llvm/IR/PassManager.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/IR/ValueHandle.h"
53 #include "llvm/InitializePasses.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <cstdlib>
64 #include <iterator>
65 #include <utility>
66 #include <vector>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "loop-accesses"
71 
73 VectorizationFactor("force-vector-width", cl::Hidden,
74  cl::desc("Sets the SIMD width. Zero is autoselect."),
77 
79 VectorizationInterleave("force-vector-interleave", cl::Hidden,
80  cl::desc("Sets the vectorization interleave count. "
81  "Zero is autoselect."),
85 
87  "runtime-memory-check-threshold", cl::Hidden,
88  cl::desc("When performing memory disambiguation checks at runtime do not "
89  "generate more than this number of comparisons (default = 8)."),
92 
93 /// The maximum iterations used to merge memory checks
95  "memory-check-merge-threshold", cl::Hidden,
96  cl::desc("Maximum number of comparisons done when trying to merge "
97  "runtime memory checks. (default = 100)"),
98  cl::init(100));
99 
100 /// Maximum SIMD width.
101 const unsigned VectorizerParams::MaxVectorWidth = 64;
102 
103 /// We collect dependences up to this threshold.
104 static cl::opt<unsigned>
105  MaxDependences("max-dependences", cl::Hidden,
106  cl::desc("Maximum number of dependences collected by "
107  "loop-access analysis (default = 100)"),
108  cl::init(100));
109 
110 /// This enables versioning on the strides of symbolically striding memory
111 /// accesses in code like the following.
112 /// for (i = 0; i < N; ++i)
113 /// A[i * Stride1] += B[i * Stride2] ...
114 ///
115 /// Will be roughly translated to
116 /// if (Stride1 == 1 && Stride2 == 1) {
117 /// for (i = 0; i < N; i+=4)
118 /// A[i:i+3] += ...
119 /// } else
120 /// ...
122  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
123  cl::desc("Enable symbolic stride memory access versioning"));
124 
125 /// Enable store-to-load forwarding conflict detection. This option can
126 /// be disabled for correctness testing.
128  "store-to-load-forwarding-conflict-detection", cl::Hidden,
129  cl::desc("Enable conflict detection in loop-access analysis"),
130  cl::init(true));
131 
133  return ::VectorizationInterleave.getNumOccurrences() > 0;
134 }
135 
137  if (auto *CI = dyn_cast<CastInst>(V))
138  if (CI->getOperand(0)->getType()->isIntegerTy())
139  return CI->getOperand(0);
140  return V;
141 }
142 
144  const ValueToValueMap &PtrToStride,
145  Value *Ptr) {
146  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
147 
148  // If there is an entry in the map return the SCEV of the pointer with the
149  // symbolic stride replaced by one.
150  ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr);
151  if (SI == PtrToStride.end())
152  // For a non-symbolic stride, just return the original expression.
153  return OrigSCEV;
154 
155  Value *StrideVal = stripIntegerCast(SI->second);
156 
157  ScalarEvolution *SE = PSE.getSE();
158  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
159  const auto *CT =
160  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
161 
162  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
163  auto *Expr = PSE.getSCEV(Ptr);
164 
165  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
166  << " by: " << *Expr << "\n");
167  return Expr;
168 }
169 
171  unsigned Index, RuntimePointerChecking &RtCheck)
172  : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
173  AddressSpace(RtCheck.Pointers[Index]
174  .PointerValue->getType()
175  ->getPointerAddressSpace()) {
176  Members.push_back(Index);
177 }
178 
179 /// Calculate Start and End points of memory access.
180 /// Let's assume A is the first access and B is a memory access on N-th loop
181 /// iteration. Then B is calculated as:
182 /// B = A + Step*N .
183 /// Step value may be positive or negative.
184 /// N is a calculated back-edge taken count:
185 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
186 /// Start and End points are calculated in the following way:
187 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
188 /// where SizeOfElt is the size of single memory access in bytes.
189 ///
190 /// There is no conflict when the intervals are disjoint:
191 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
192 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
193  unsigned DepSetId, unsigned ASId,
194  const ValueToValueMap &Strides,
196  // Get the stride replaced scev.
197  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
198  ScalarEvolution *SE = PSE.getSE();
199 
200  const SCEV *ScStart;
201  const SCEV *ScEnd;
202 
203  if (SE->isLoopInvariant(Sc, Lp)) {
204  ScStart = ScEnd = Sc;
205  } else {
206  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
207  assert(AR && "Invalid addrec expression");
208  const SCEV *Ex = PSE.getBackedgeTakenCount();
209 
210  ScStart = AR->getStart();
211  ScEnd = AR->evaluateAtIteration(Ex, *SE);
212  const SCEV *Step = AR->getStepRecurrence(*SE);
213 
214  // For expressions with negative step, the upper bound is ScStart and the
215  // lower bound is ScEnd.
216  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
217  if (CStep->getValue()->isNegative())
218  std::swap(ScStart, ScEnd);
219  } else {
220  // Fallback case: the step is not constant, but we can still
221  // get the upper and lower bounds of the interval by using min/max
222  // expressions.
223  ScStart = SE->getUMinExpr(ScStart, ScEnd);
224  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
225  }
226  }
227  // Add the size of the pointed element to ScEnd.
228  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
229  Type *IdxTy = DL.getIndexType(Ptr->getType());
230  const SCEV *EltSizeSCEV =
231  SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
232  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
233 
234  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
235 }
236 
238 RuntimePointerChecking::generateChecks() const {
240 
241  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
242  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
244  const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
245 
246  if (needsChecking(CGI, CGJ))
247  Checks.push_back(std::make_pair(&CGI, &CGJ));
248  }
249  }
250  return Checks;
251 }
252 
253 void RuntimePointerChecking::generateChecks(
254  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
255  assert(Checks.empty() && "Checks is not empty");
256  groupChecks(DepCands, UseDependencies);
257  Checks = generateChecks();
258 }
259 
261  const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
262  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
263  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
264  if (needsChecking(M.Members[I], N.Members[J]))
265  return true;
266  return false;
267 }
268 
269 /// Compare \p I and \p J and return the minimum.
270 /// Return nullptr in case we couldn't find an answer.
271 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
272  ScalarEvolution *SE) {
273  const SCEV *Diff = SE->getMinusSCEV(J, I);
274  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
275 
276  if (!C)
277  return nullptr;
278  if (C->getValue()->isNegative())
279  return J;
280  return I;
281 }
282 
284  RuntimePointerChecking &RtCheck) {
285  return addPointer(
286  Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
287  RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
288  *RtCheck.SE);
289 }
290 
291 bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
292  const SCEV *End, unsigned AS,
293  ScalarEvolution &SE) {
294  assert(AddressSpace == AS &&
295  "all pointers in a checking group must be in the same address space");
296 
297  // Compare the starts and ends with the known minimum and maximum
298  // of this set. We need to know how we compare against the min/max
299  // of the set in order to be able to emit memchecks.
300  const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
301  if (!Min0)
302  return false;
303 
304  const SCEV *Min1 = getMinFromExprs(End, High, &SE);
305  if (!Min1)
306  return false;
307 
308  // Update the low bound expression if we've found a new min value.
309  if (Min0 == Start)
310  Low = Start;
311 
312  // Update the high bound expression if we've found a new max value.
313  if (Min1 != End)
314  High = End;
315 
316  Members.push_back(Index);
317  return true;
318 }
319 
320 void RuntimePointerChecking::groupChecks(
321  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
322  // We build the groups from dependency candidates equivalence classes
323  // because:
324  // - We know that pointers in the same equivalence class share
325  // the same underlying object and therefore there is a chance
326  // that we can compare pointers
327  // - We wouldn't be able to merge two pointers for which we need
328  // to emit a memcheck. The classes in DepCands are already
329  // conveniently built such that no two pointers in the same
330  // class need checking against each other.
331 
332  // We use the following (greedy) algorithm to construct the groups
333  // For every pointer in the equivalence class:
334  // For each existing group:
335  // - if the difference between this pointer and the min/max bounds
336  // of the group is a constant, then make the pointer part of the
337  // group and update the min/max bounds of that group as required.
338 
339  CheckingGroups.clear();
340 
341  // If we need to check two pointers to the same underlying object
342  // with a non-constant difference, we shouldn't perform any pointer
343  // grouping with those pointers. This is because we can easily get
344  // into cases where the resulting check would return false, even when
345  // the accesses are safe.
346  //
347  // The following example shows this:
348  // for (i = 0; i < 1000; ++i)
349  // a[5000 + i * m] = a[i] + a[i + 9000]
350  //
351  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
352  // (0, 10000) which is always false. However, if m is 1, there is no
353  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
354  // us to perform an accurate check in this case.
355  //
356  // The above case requires that we have an UnknownDependence between
357  // accesses to the same underlying object. This cannot happen unless
358  // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
359  // is also false. In this case we will use the fallback path and create
360  // separate checking groups for all pointers.
361 
362  // If we don't have the dependency partitions, construct a new
363  // checking pointer group for each pointer. This is also required
364  // for correctness, because in this case we can have checking between
365  // pointers to the same underlying object.
366  if (!UseDependencies) {
367  for (unsigned I = 0; I < Pointers.size(); ++I)
368  CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
369  return;
370  }
371 
372  unsigned TotalComparisons = 0;
373 
374  DenseMap<Value *, unsigned> PositionMap;
375  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
376  PositionMap[Pointers[Index].PointerValue] = Index;
377 
378  // We need to keep track of what pointers we've already seen so we
379  // don't process them twice.
381 
382  // Go through all equivalence classes, get the "pointer check groups"
383  // and add them to the overall solution. We use the order in which accesses
384  // appear in 'Pointers' to enforce determinism.
385  for (unsigned I = 0; I < Pointers.size(); ++I) {
386  // We've seen this pointer before, and therefore already processed
387  // its equivalence class.
388  if (Seen.count(I))
389  continue;
390 
391  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
392  Pointers[I].IsWritePtr);
393 
395  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
396 
397  // Because DepCands is constructed by visiting accesses in the order in
398  // which they appear in alias sets (which is deterministic) and the
399  // iteration order within an equivalence class member is only dependent on
400  // the order in which unions and insertions are performed on the
401  // equivalence class, the iteration order is deterministic.
402  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
403  MI != ME; ++MI) {
404  auto PointerI = PositionMap.find(MI->getPointer());
405  assert(PointerI != PositionMap.end() &&
406  "pointer in equivalence class not found in PositionMap");
407  unsigned Pointer = PointerI->second;
408  bool Merged = false;
409  // Mark this pointer as seen.
410  Seen.insert(Pointer);
411 
412  // Go through all the existing sets and see if we can find one
413  // which can include this pointer.
414  for (RuntimeCheckingPtrGroup &Group : Groups) {
415  // Don't perform more than a certain amount of comparisons.
416  // This should limit the cost of grouping the pointers to something
417  // reasonable. If we do end up hitting this threshold, the algorithm
418  // will create separate groups for all remaining pointers.
419  if (TotalComparisons > MemoryCheckMergeThreshold)
420  break;
421 
422  TotalComparisons++;
423 
424  if (Group.addPointer(Pointer, *this)) {
425  Merged = true;
426  break;
427  }
428  }
429 
430  if (!Merged)
431  // We couldn't add this pointer to any existing set or the threshold
432  // for the number of comparisons has been reached. Create a new group
433  // to hold the current pointer.
434  Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
435  }
436 
437  // We've computed the grouped checks for this partition.
438  // Save the results and continue with the next one.
439  llvm::copy(Groups, std::back_inserter(CheckingGroups));
440  }
441 }
442 
444  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
445  unsigned PtrIdx2) {
446  return (PtrToPartition[PtrIdx1] != -1 &&
447  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
448 }
449 
450 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
451  const PointerInfo &PointerI = Pointers[I];
452  const PointerInfo &PointerJ = Pointers[J];
453 
454  // No need to check if two readonly pointers intersect.
455  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
456  return false;
457 
458  // Only need to check pointers between two different dependency sets.
459  if (PointerI.DependencySetId == PointerJ.DependencySetId)
460  return false;
461 
462  // Only need to check pointers in the same alias set.
463  if (PointerI.AliasSetId != PointerJ.AliasSetId)
464  return false;
465 
466  return true;
467 }
468 
471  unsigned Depth) const {
472  unsigned N = 0;
473  for (const auto &Check : Checks) {
474  const auto &First = Check.first->Members, &Second = Check.second->Members;
475 
476  OS.indent(Depth) << "Check " << N++ << ":\n";
477 
478  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
479  for (unsigned K = 0; K < First.size(); ++K)
480  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
481 
482  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
483  for (unsigned K = 0; K < Second.size(); ++K)
484  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
485  }
486 }
487 
489 
490  OS.indent(Depth) << "Run-time memory checks:\n";
491  printChecks(OS, Checks, Depth);
492 
493  OS.indent(Depth) << "Grouped accesses:\n";
494  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
495  const auto &CG = CheckingGroups[I];
496 
497  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
498  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
499  << ")\n";
500  for (unsigned J = 0; J < CG.Members.size(); ++J) {
501  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
502  << "\n";
503  }
504  }
505 }
506 
507 namespace {
508 
509 /// Analyses memory accesses in a loop.
510 ///
511 /// Checks whether run time pointer checks are needed and builds sets for data
512 /// dependence checking.
513 class AccessAnalysis {
514 public:
515  /// Read or write access location.
516  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
517  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
518 
519  AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
522  : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
523  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
524 
525  /// Register a load and whether it is only read from.
526  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
527  Value *Ptr = const_cast<Value*>(Loc.Ptr);
528  AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
529  Accesses.insert(MemAccessInfo(Ptr, false));
530  if (IsReadOnly)
531  ReadOnlyPtr.insert(Ptr);
532  }
533 
534  /// Register a store.
535  void addStore(MemoryLocation &Loc) {
536  Value *Ptr = const_cast<Value*>(Loc.Ptr);
537  AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
538  Accesses.insert(MemAccessInfo(Ptr, true));
539  }
540 
541  /// Check if we can emit a run-time no-alias check for \p Access.
542  ///
543  /// Returns true if we can emit a run-time no alias check for \p Access.
544  /// If we can check this access, this also adds it to a dependence set and
545  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
546  /// we will attempt to use additional run-time checks in order to get
547  /// the bounds of the pointer.
548  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
549  MemAccessInfo Access,
550  const ValueToValueMap &Strides,
551  DenseMap<Value *, unsigned> &DepSetId,
552  Loop *TheLoop, unsigned &RunningDepId,
553  unsigned ASId, bool ShouldCheckStride,
554  bool Assume);
555 
556  /// Check whether we can check the pointers at runtime for
557  /// non-intersection.
558  ///
559  /// Returns true if we need no check or if we do and we can generate them
560  /// (i.e. the pointers have computable bounds).
561  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
562  Loop *TheLoop, const ValueToValueMap &Strides,
563  bool ShouldCheckWrap = false);
564 
565  /// Goes over all memory accesses, checks whether a RT check is needed
566  /// and builds sets of dependent accesses.
567  void buildDependenceSets() {
568  processMemAccesses();
569  }
570 
571  /// Initial processing of memory accesses determined that we need to
572  /// perform dependency checking.
573  ///
574  /// Note that this can later be cleared if we retry memcheck analysis without
575  /// dependency checking (i.e. FoundNonConstantDistanceDependence).
576  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
577 
578  /// We decided that no dependence analysis would be used. Reset the state.
579  void resetDepChecks(MemoryDepChecker &DepChecker) {
580  CheckDeps.clear();
581  DepChecker.clearDependences();
582  }
583 
584  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
585 
586 private:
587  typedef SetVector<MemAccessInfo> PtrAccessSet;
588 
589  /// Go over all memory access and check whether runtime pointer checks
590  /// are needed and build sets of dependency check candidates.
591  void processMemAccesses();
592 
593  /// Set of all accesses.
594  PtrAccessSet Accesses;
595 
596  /// The loop being checked.
597  const Loop *TheLoop;
598 
599  /// List of accesses that need a further dependence check.
600  MemAccessInfoList CheckDeps;
601 
602  /// Set of pointers that are read only.
603  SmallPtrSet<Value*, 16> ReadOnlyPtr;
604 
605  /// An alias set tracker to partition the access set by underlying object and
606  //intrinsic property (such as TBAA metadata).
607  AliasSetTracker AST;
608 
609  LoopInfo *LI;
610 
611  /// Sets of potentially dependent accesses - members of one set share an
612  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
613  /// dependence check.
615 
616  /// Initial processing of memory accesses determined that we may need
617  /// to add memchecks. Perform the analysis to determine the necessary checks.
618  ///
619  /// Note that, this is different from isDependencyCheckNeeded. When we retry
620  /// memcheck analysis without dependency checking
621  /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
622  /// cleared while this remains set if we have potentially dependent accesses.
623  bool IsRTCheckAnalysisNeeded;
624 
625  /// The SCEV predicate containing all the SCEV-related assumptions.
627 };
628 
629 } // end anonymous namespace
630 
631 /// Check whether a pointer can participate in a runtime bounds check.
632 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
633 /// by adding run-time checks (overflow checks) if necessary.
635  const ValueToValueMap &Strides, Value *Ptr,
636  Loop *L, bool Assume) {
637  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
638 
639  // The bounds for loop-invariant pointer is trivial.
640  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
641  return true;
642 
643  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
644 
645  if (!AR && Assume)
646  AR = PSE.getAsAddRec(Ptr);
647 
648  if (!AR)
649  return false;
650 
651  return AR->isAffine();
652 }
653 
654 /// Check whether a pointer address cannot wrap.
656  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
657  const SCEV *PtrScev = PSE.getSCEV(Ptr);
658  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
659  return true;
660 
661  Type *AccessTy = Ptr->getType()->getPointerElementType();
662  int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
663  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
664  return true;
665 
666  return false;
667 }
668 
669 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
670  MemAccessInfo Access,
671  const ValueToValueMap &StridesMap,
672  DenseMap<Value *, unsigned> &DepSetId,
673  Loop *TheLoop, unsigned &RunningDepId,
674  unsigned ASId, bool ShouldCheckWrap,
675  bool Assume) {
676  Value *Ptr = Access.getPointer();
677 
678  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
679  return false;
680 
681  // When we run after a failing dependency check we have to make sure
682  // we don't have wrapping pointers.
683  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
684  auto *Expr = PSE.getSCEV(Ptr);
685  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
686  return false;
687  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
688  }
689 
690  // The id of the dependence set.
691  unsigned DepId;
692 
693  if (isDependencyCheckNeeded()) {
694  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
695  unsigned &LeaderId = DepSetId[Leader];
696  if (!LeaderId)
697  LeaderId = RunningDepId++;
698  DepId = LeaderId;
699  } else
700  // Each access has its own dependence set.
701  DepId = RunningDepId++;
702 
703  bool IsWrite = Access.getInt();
704  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
705  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
706 
707  return true;
708  }
709 
710 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
711  ScalarEvolution *SE, Loop *TheLoop,
712  const ValueToValueMap &StridesMap,
713  bool ShouldCheckWrap) {
714  // Find pointers with computable bounds. We are going to use this information
715  // to place a runtime bound check.
716  bool CanDoRT = true;
717 
718  bool MayNeedRTCheck = false;
719  if (!IsRTCheckAnalysisNeeded) return true;
720 
721  bool IsDepCheckNeeded = isDependencyCheckNeeded();
722 
723  // We assign a consecutive id to access from different alias sets.
724  // Accesses between different groups doesn't need to be checked.
725  unsigned ASId = 0;
726  for (auto &AS : AST) {
727  int NumReadPtrChecks = 0;
728  int NumWritePtrChecks = 0;
729  bool CanDoAliasSetRT = true;
730  ++ASId;
731 
732  // We assign consecutive id to access from different dependence sets.
733  // Accesses within the same set don't need a runtime check.
734  unsigned RunningDepId = 1;
736 
738 
739  // First, count how many write and read accesses are in the alias set. Also
740  // collect MemAccessInfos for later.
741  SmallVector<MemAccessInfo, 4> AccessInfos;
742  for (const auto &A : AS) {
743  Value *Ptr = A.getValue();
744  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
745 
746  if (IsWrite)
747  ++NumWritePtrChecks;
748  else
749  ++NumReadPtrChecks;
750  AccessInfos.emplace_back(Ptr, IsWrite);
751  }
752 
753  // We do not need runtime checks for this alias set, if there are no writes
754  // or a single write and no reads.
755  if (NumWritePtrChecks == 0 ||
756  (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
757  assert((AS.size() <= 1 ||
758  all_of(AS,
759  [this](auto AC) {
760  MemAccessInfo AccessWrite(AC.getValue(), true);
761  return DepCands.findValue(AccessWrite) == DepCands.end();
762  })) &&
763  "Can only skip updating CanDoRT below, if all entries in AS "
764  "are reads or there is at most 1 entry");
765  continue;
766  }
767 
768  for (auto &Access : AccessInfos) {
769  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
770  RunningDepId, ASId, ShouldCheckWrap, false)) {
771  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
772  << *Access.getPointer() << '\n');
773  Retries.push_back(Access);
774  CanDoAliasSetRT = false;
775  }
776  }
777 
778  // Note that this function computes CanDoRT and MayNeedRTCheck
779  // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
780  // we have a pointer for which we couldn't find the bounds but we don't
781  // actually need to emit any checks so it does not matter.
782  //
783  // We need runtime checks for this alias set, if there are at least 2
784  // dependence sets (in which case RunningDepId > 2) or if we need to re-try
785  // any bound checks (because in that case the number of dependence sets is
786  // incomplete).
787  bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
788 
789  // We need to perform run-time alias checks, but some pointers had bounds
790  // that couldn't be checked.
791  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
792  // Reset the CanDoSetRt flag and retry all accesses that have failed.
793  // We know that we need these checks, so we can now be more aggressive
794  // and add further checks if required (overflow checks).
795  CanDoAliasSetRT = true;
796  for (auto Access : Retries)
797  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
798  TheLoop, RunningDepId, ASId,
799  ShouldCheckWrap, /*Assume=*/true)) {
800  CanDoAliasSetRT = false;
801  break;
802  }
803  }
804 
805  CanDoRT &= CanDoAliasSetRT;
806  MayNeedRTCheck |= NeedsAliasSetRTCheck;
807  ++ASId;
808  }
809 
810  // If the pointers that we would use for the bounds comparison have different
811  // address spaces, assume the values aren't directly comparable, so we can't
812  // use them for the runtime check. We also have to assume they could
813  // overlap. In the future there should be metadata for whether address spaces
814  // are disjoint.
815  unsigned NumPointers = RtCheck.Pointers.size();
816  for (unsigned i = 0; i < NumPointers; ++i) {
817  for (unsigned j = i + 1; j < NumPointers; ++j) {
818  // Only need to check pointers between two different dependency sets.
819  if (RtCheck.Pointers[i].DependencySetId ==
820  RtCheck.Pointers[j].DependencySetId)
821  continue;
822  // Only need to check pointers in the same alias set.
823  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
824  continue;
825 
826  Value *PtrI = RtCheck.Pointers[i].PointerValue;
827  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
828 
829  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
830  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
831  if (ASi != ASj) {
832  LLVM_DEBUG(
833  dbgs() << "LAA: Runtime check would require comparison between"
834  " different address spaces\n");
835  return false;
836  }
837  }
838  }
839 
840  if (MayNeedRTCheck && CanDoRT)
841  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
842 
843  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
844  << " pointer comparisons.\n");
845 
846  // If we can do run-time checks, but there are no checks, no runtime checks
847  // are needed. This can happen when all pointers point to the same underlying
848  // object for example.
849  RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
850 
851  bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
852  if (!CanDoRTIfNeeded)
853  RtCheck.reset();
854  return CanDoRTIfNeeded;
855 }
856 
857 void AccessAnalysis::processMemAccesses() {
858  // We process the set twice: first we process read-write pointers, last we
859  // process read-only pointers. This allows us to skip dependence tests for
860  // read-only pointers.
861 
862  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
863  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
864  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
865  LLVM_DEBUG({
866  for (auto A : Accesses)
867  dbgs() << "\t" << *A.getPointer() << " (" <<
868  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
869  "read-only" : "read")) << ")\n";
870  });
871 
872  // The AliasSetTracker has nicely partitioned our pointers by metadata
873  // compatibility and potential for underlying-object overlap. As a result, we
874  // only need to check for potential pointer dependencies within each alias
875  // set.
876  for (const auto &AS : AST) {
877  // Note that both the alias-set tracker and the alias sets themselves used
878  // linked lists internally and so the iteration order here is deterministic
879  // (matching the original instruction order within each set).
880 
881  bool SetHasWrite = false;
882 
883  // Map of pointers to last access encountered.
884  typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
885  UnderlyingObjToAccessMap ObjToLastAccess;
886 
887  // Set of access to check after all writes have been processed.
888  PtrAccessSet DeferredAccesses;
889 
890  // Iterate over each alias set twice, once to process read/write pointers,
891  // and then to process read-only pointers.
892  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
893  bool UseDeferred = SetIteration > 0;
894  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
895 
896  for (const auto &AV : AS) {
897  Value *Ptr = AV.getValue();
898 
899  // For a single memory access in AliasSetTracker, Accesses may contain
900  // both read and write, and they both need to be handled for CheckDeps.
901  for (const auto &AC : S) {
902  if (AC.getPointer() != Ptr)
903  continue;
904 
905  bool IsWrite = AC.getInt();
906 
907  // If we're using the deferred access set, then it contains only
908  // reads.
909  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
910  if (UseDeferred && !IsReadOnlyPtr)
911  continue;
912  // Otherwise, the pointer must be in the PtrAccessSet, either as a
913  // read or a write.
914  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
915  S.count(MemAccessInfo(Ptr, false))) &&
916  "Alias-set pointer not in the access set?");
917 
918  MemAccessInfo Access(Ptr, IsWrite);
919  DepCands.insert(Access);
920 
921  // Memorize read-only pointers for later processing and skip them in
922  // the first round (they need to be checked after we have seen all
923  // write pointers). Note: we also mark pointer that are not
924  // consecutive as "read-only" pointers (so that we check
925  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
926  if (!UseDeferred && IsReadOnlyPtr) {
927  DeferredAccesses.insert(Access);
928  continue;
929  }
930 
931  // If this is a write - check other reads and writes for conflicts. If
932  // this is a read only check other writes for conflicts (but only if
933  // there is no other write to the ptr - this is an optimization to
934  // catch "a[i] = a[i] + " without having to do a dependence check).
935  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
936  CheckDeps.push_back(Access);
937  IsRTCheckAnalysisNeeded = true;
938  }
939 
940  if (IsWrite)
941  SetHasWrite = true;
942 
943  // Create sets of pointers connected by a shared alias set and
944  // underlying object.
945  typedef SmallVector<const Value *, 16> ValueVector;
946  ValueVector TempObjects;
947 
948  getUnderlyingObjects(Ptr, TempObjects, LI);
949  LLVM_DEBUG(dbgs()
950  << "Underlying objects for pointer " << *Ptr << "\n");
951  for (const Value *UnderlyingObj : TempObjects) {
952  // nullptr never alias, don't join sets for pointer that have "null"
953  // in their UnderlyingObjects list.
954  if (isa<ConstantPointerNull>(UnderlyingObj) &&
956  TheLoop->getHeader()->getParent(),
957  UnderlyingObj->getType()->getPointerAddressSpace()))
958  continue;
959 
960  UnderlyingObjToAccessMap::iterator Prev =
961  ObjToLastAccess.find(UnderlyingObj);
962  if (Prev != ObjToLastAccess.end())
963  DepCands.unionSets(Access, Prev->second);
964 
965  ObjToLastAccess[UnderlyingObj] = Access;
966  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
967  }
968  }
969  }
970  }
971  }
972 }
973 
974 static bool isInBoundsGep(Value *Ptr) {
975  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
976  return GEP->isInBounds();
977  return false;
978 }
979 
980 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
981 /// i.e. monotonically increasing/decreasing.
982 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
983  PredicatedScalarEvolution &PSE, const Loop *L) {
984  // FIXME: This should probably only return true for NUW.
986  return true;
987 
988  // Scalar evolution does not propagate the non-wrapping flags to values that
989  // are derived from a non-wrapping induction variable because non-wrapping
990  // could be flow-sensitive.
991  //
992  // Look through the potentially overflowing instruction to try to prove
993  // non-wrapping for the *specific* value of Ptr.
994 
995  // The arithmetic implied by an inbounds GEP can't overflow.
996  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
997  if (!GEP || !GEP->isInBounds())
998  return false;
999 
1000  // Make sure there is only one non-const index and analyze that.
1001  Value *NonConstIndex = nullptr;
1002  for (Value *Index : GEP->indices())
1003  if (!isa<ConstantInt>(Index)) {
1004  if (NonConstIndex)
1005  return false;
1006  NonConstIndex = Index;
1007  }
1008  if (!NonConstIndex)
1009  // The recurrence is on the pointer, ignore for now.
1010  return false;
1011 
1012  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
1013  // AddRec using a NSW operation.
1014  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1015  if (OBO->hasNoSignedWrap() &&
1016  // Assume constant for other the operand so that the AddRec can be
1017  // easily found.
1018  isa<ConstantInt>(OBO->getOperand(1))) {
1019  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
1020 
1021  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1022  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
1023  }
1024 
1025  return false;
1026 }
1027 
1028 /// Check whether the access through \p Ptr has a constant stride.
1030  Value *Ptr, const Loop *Lp,
1031  const ValueToValueMap &StridesMap, bool Assume,
1032  bool ShouldCheckWrap) {
1033  Type *Ty = Ptr->getType();
1034  assert(Ty->isPointerTy() && "Unexpected non-ptr");
1035  unsigned AddrSpace = Ty->getPointerAddressSpace();
1036 
1037  // Make sure we're not accessing an aggregate type.
1038  // TODO: Why? This doesn't make any sense.
1039  if (AccessTy->isAggregateType()) {
1040  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
1041  << *Ptr << "\n");
1042  return 0;
1043  }
1044 
1045  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1046 
1047  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1048  if (Assume && !AR)
1049  AR = PSE.getAsAddRec(Ptr);
1050 
1051  if (!AR) {
1052  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1053  << " SCEV: " << *PtrScev << "\n");
1054  return 0;
1055  }
1056 
1057  // The access function must stride over the innermost loop.
1058  if (Lp != AR->getLoop()) {
1059  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1060  << *Ptr << " SCEV: " << *AR << "\n");
1061  return 0;
1062  }
1063 
1064  // The address calculation must not wrap. Otherwise, a dependence could be
1065  // inverted.
1066  // An inbounds getelementptr that is a AddRec with a unit stride
1067  // cannot wrap per definition. The unit stride requirement is checked later.
1068  // An getelementptr without an inbounds attribute and unit stride would have
1069  // to access the pointer value "0" which is undefined behavior in address
1070  // space 0, therefore we can also vectorize this case.
1071  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1072  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1074  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1075  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1076  NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
1077  if (Assume) {
1079  IsNoWrapAddRec = true;
1080  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1081  << "LAA: Pointer: " << *Ptr << "\n"
1082  << "LAA: SCEV: " << *AR << "\n"
1083  << "LAA: Added an overflow assumption\n");
1084  } else {
1085  LLVM_DEBUG(
1086  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1087  << *Ptr << " SCEV: " << *AR << "\n");
1088  return 0;
1089  }
1090  }
1091 
1092  // Check the step is constant.
1093  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1094 
1095  // Calculate the pointer stride and check if it is constant.
1096  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1097  if (!C) {
1098  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1099  << " SCEV: " << *AR << "\n");
1100  return 0;
1101  }
1102 
1103  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1104  int64_t Size = DL.getTypeAllocSize(AccessTy);
1105  const APInt &APStepVal = C->getAPInt();
1106 
1107  // Huge step value - give up.
1108  if (APStepVal.getBitWidth() > 64)
1109  return 0;
1110 
1111  int64_t StepVal = APStepVal.getSExtValue();
1112 
1113  // Strided access.
1114  int64_t Stride = StepVal / Size;
1115  int64_t Rem = StepVal % Size;
1116  if (Rem)
1117  return 0;
1118 
1119  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1120  // know we can't "wrap around the address space". In case of address space
1121  // zero we know that this won't happen without triggering undefined behavior.
1122  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1123  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1124  AddrSpace))) {
1125  if (Assume) {
1126  // We can avoid this case by adding a run-time check.
1127  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1128  << "inbounds or in address space 0 may wrap:\n"
1129  << "LAA: Pointer: " << *Ptr << "\n"
1130  << "LAA: SCEV: " << *AR << "\n"
1131  << "LAA: Added an overflow assumption\n");
1133  } else
1134  return 0;
1135  }
1136 
1137  return Stride;
1138 }
1139 
1141  Value *PtrB, const DataLayout &DL,
1142  ScalarEvolution &SE, bool StrictCheck,
1143  bool CheckType) {
1144  assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1145  assert(cast<PointerType>(PtrA->getType())
1146  ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
1147  assert(cast<PointerType>(PtrB->getType())
1148  ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
1149 
1150  // Make sure that A and B are different pointers.
1151  if (PtrA == PtrB)
1152  return 0;
1153 
1154  // Make sure that the element types are the same if required.
1155  if (CheckType && ElemTyA != ElemTyB)
1156  return None;
1157 
1158  unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1159  unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1160 
1161  // Check that the address spaces match.
1162  if (ASA != ASB)
1163  return None;
1164  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1165 
1166  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1167  Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1168  Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1169 
1170  int Val;
1171  if (PtrA1 == PtrB1) {
1172  // Retrieve the address space again as pointer stripping now tracks through
1173  // `addrspacecast`.
1174  ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1175  ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1176  // Check that the address spaces match and that the pointers are valid.
1177  if (ASA != ASB)
1178  return None;
1179 
1180  IdxWidth = DL.getIndexSizeInBits(ASA);
1181  OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1182  OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1183 
1184  OffsetB -= OffsetA;
1185  Val = OffsetB.getSExtValue();
1186  } else {
1187  // Otherwise compute the distance with SCEV between the base pointers.
1188  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1189  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1190  const auto *Diff =
1191  dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
1192  if (!Diff)
1193  return None;
1194  Val = Diff->getAPInt().getSExtValue();
1195  }
1196  int Size = DL.getTypeStoreSize(ElemTyA);
1197  int Dist = Val / Size;
1198 
1199  // Ensure that the calculated distance matches the type-based one after all
1200  // the bitcasts removal in the provided pointers.
1201  if (!StrictCheck || Dist * Size == Val)
1202  return Dist;
1203  return None;
1204 }
1205 
1207  const DataLayout &DL, ScalarEvolution &SE,
1208  SmallVectorImpl<unsigned> &SortedIndices) {
1210  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1211  "Expected list of pointer operands.");
1212  // Walk over the pointers, and map each of them to an offset relative to
1213  // first pointer in the array.
1214  Value *Ptr0 = VL[0];
1215 
1216  using DistOrdPair = std::pair<int64_t, int>;
1217  auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
1218  return L.first < R.first;
1219  };
1220  std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1221  Offsets.emplace(0, 0);
1222  int Cnt = 1;
1223  bool IsConsecutive = true;
1224  for (auto *Ptr : VL.drop_front()) {
1225  Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1226  /*StrictCheck=*/true);
1227  if (!Diff)
1228  return false;
1229 
1230  // Check if the pointer with the same offset is found.
1231  int64_t Offset = *Diff;
1232  auto Res = Offsets.emplace(Offset, Cnt);
1233  if (!Res.second)
1234  return false;
1235  // Consecutive order if the inserted element is the last one.
1236  IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
1237  ++Cnt;
1238  }
1239  SortedIndices.clear();
1240  if (!IsConsecutive) {
1241  // Fill SortedIndices array only if it is non-consecutive.
1242  SortedIndices.resize(VL.size());
1243  Cnt = 0;
1244  for (const std::pair<int64_t, int> &Pair : Offsets) {
1245  SortedIndices[Cnt] = Pair.second;
1246  ++Cnt;
1247  }
1248  }
1249  return true;
1250 }
1251 
1252 /// Returns true if the memory operations \p A and \p B are consecutive.
1254  ScalarEvolution &SE, bool CheckType) {
1255  Value *PtrA = getLoadStorePointerOperand(A);
1257  if (!PtrA || !PtrB)
1258  return false;
1259  Type *ElemTyA = getLoadStoreType(A);
1260  Type *ElemTyB = getLoadStoreType(B);
1261  Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1262  /*StrictCheck=*/true, CheckType);
1263  return Diff && *Diff == 1;
1264 }
1265 
1266 static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
1267  function_ref<void(Value *)> AddPointer) {
1268  SmallPtrSet<Value *, 8> Visited;
1269  SmallVector<Value *> WorkList;
1270  WorkList.push_back(StartPtr);
1271 
1272  while (!WorkList.empty()) {
1273  Value *Ptr = WorkList.pop_back_val();
1274  if (!Visited.insert(Ptr).second)
1275  continue;
1276  auto *PN = dyn_cast<PHINode>(Ptr);
1277  // SCEV does not look through non-header PHIs inside the loop. Such phis
1278  // can be analyzed by adding separate accesses for each incoming pointer
1279  // value.
1280  if (PN && InnermostLoop.contains(PN->getParent()) &&
1281  PN->getParent() != InnermostLoop.getHeader()) {
1282  for (const Use &Inc : PN->incoming_values())
1283  WorkList.push_back(Inc);
1284  } else
1285  AddPointer(Ptr);
1286  }
1287 }
1288 
1290  visitPointers(SI->getPointerOperand(), *InnermostLoop,
1291  [this, SI](Value *Ptr) {
1292  Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1293  InstMap.push_back(SI);
1294  ++AccessIdx;
1295  });
1296 }
1297 
1299  visitPointers(LI->getPointerOperand(), *InnermostLoop,
1300  [this, LI](Value *Ptr) {
1301  Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1302  InstMap.push_back(LI);
1303  ++AccessIdx;
1304  });
1305 }
1306 
1309  switch (Type) {
1310  case NoDep:
1311  case Forward:
1312  case BackwardVectorizable:
1314 
1315  case Unknown:
1318  case Backward:
1321  }
1322  llvm_unreachable("unexpected DepType!");
1323 }
1324 
1326  switch (Type) {
1327  case NoDep:
1328  case Forward:
1329  case ForwardButPreventsForwarding:
1330  case Unknown:
1331  return false;
1332 
1333  case BackwardVectorizable:
1334  case Backward:
1335  case BackwardVectorizableButPreventsForwarding:
1336  return true;
1337  }
1338  llvm_unreachable("unexpected DepType!");
1339 }
1340 
1342  return isBackward() || Type == Unknown;
1343 }
1344 
1346  switch (Type) {
1347  case Forward:
1348  case ForwardButPreventsForwarding:
1349  return true;
1350 
1351  case NoDep:
1352  case Unknown:
1353  case BackwardVectorizable:
1354  case Backward:
1355  case BackwardVectorizableButPreventsForwarding:
1356  return false;
1357  }
1358  llvm_unreachable("unexpected DepType!");
1359 }
1360 
1361 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1362  uint64_t TypeByteSize) {
1363  // If loads occur at a distance that is not a multiple of a feasible vector
1364  // factor store-load forwarding does not take place.
1365  // Positive dependences might cause troubles because vectorizing them might
1366  // prevent store-load forwarding making vectorized code run a lot slower.
1367  // a[i] = a[i-3] ^ a[i-8];
1368  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1369  // hence on your typical architecture store-load forwarding does not take
1370  // place. Vectorizing in such cases does not make sense.
1371  // Store-load forwarding distance.
1372 
1373  // After this many iterations store-to-load forwarding conflicts should not
1374  // cause any slowdowns.
1375  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1376  // Maximum vector factor.
1377  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1378  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1379 
1380  // Compute the smallest VF at which the store and load would be misaligned.
1381  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1382  VF *= 2) {
1383  // If the number of vector iteration between the store and the load are
1384  // small we could incur conflicts.
1385  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1386  MaxVFWithoutSLForwardIssues = (VF >> 1);
1387  break;
1388  }
1389  }
1390 
1391  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1392  LLVM_DEBUG(
1393  dbgs() << "LAA: Distance " << Distance
1394  << " that could cause a store-load forwarding conflict\n");
1395  return true;
1396  }
1397 
1398  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1399  MaxVFWithoutSLForwardIssues !=
1400  VectorizerParams::MaxVectorWidth * TypeByteSize)
1401  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1402  return false;
1403 }
1404 
1405 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1406  if (Status < S)
1407  Status = S;
1408 }
1409 
1410 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1411 /// memory accesses, that have the same stride whose absolute value is given
1412 /// in \p Stride, and that have the same type size \p TypeByteSize,
1413 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1414 /// possible to prove statically that the dependence distance is larger
1415 /// than the range that the accesses will travel through the execution of
1416 /// the loop. If so, return true; false otherwise. This is useful for
1417 /// example in loops such as the following (PR31098):
1418 /// for (i = 0; i < D; ++i) {
1419 /// = out[i];
1420 /// out[i+D] =
1421 /// }
1423  const SCEV &BackedgeTakenCount,
1424  const SCEV &Dist, uint64_t Stride,
1425  uint64_t TypeByteSize) {
1426 
1427  // If we can prove that
1428  // (**) |Dist| > BackedgeTakenCount * Step
1429  // where Step is the absolute stride of the memory accesses in bytes,
1430  // then there is no dependence.
1431  //
1432  // Rationale:
1433  // We basically want to check if the absolute distance (|Dist/Step|)
1434  // is >= the loop iteration count (or > BackedgeTakenCount).
1435  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1436  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1437  // that the dependence distance is >= VF; This is checked elsewhere.
1438  // But in some cases we can prune unknown dependence distances early, and
1439  // even before selecting the VF, and without a runtime test, by comparing
1440  // the distance against the loop iteration count. Since the vectorized code
1441  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1442  // also guarantees that distance >= VF.
1443  //
1444  const uint64_t ByteStride = Stride * TypeByteSize;
1445  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1446  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1447 
1448  const SCEV *CastedDist = &Dist;
1449  const SCEV *CastedProduct = Product;
1450  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1451  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1452 
1453  // The dependence distance can be positive/negative, so we sign extend Dist;
1454  // The multiplication of the absolute stride in bytes and the
1455  // backedgeTakenCount is non-negative, so we zero extend Product.
1456  if (DistTypeSize > ProductTypeSize)
1457  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1458  else
1459  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1460 
1461  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1462  // (If so, then we have proven (**) because |Dist| >= Dist)
1463  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1464  if (SE.isKnownPositive(Minus))
1465  return true;
1466 
1467  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1468  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1469  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1470  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1471  if (SE.isKnownPositive(Minus))
1472  return true;
1473 
1474  return false;
1475 }
1476 
1477 /// Check the dependence for two accesses with the same stride \p Stride.
1478 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1479 /// bytes.
1480 ///
1481 /// \returns true if they are independent.
1482 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1483  uint64_t TypeByteSize) {
1484  assert(Stride > 1 && "The stride must be greater than 1");
1485  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1486  assert(Distance > 0 && "The distance must be non-zero");
1487 
1488  // Skip if the distance is not multiple of type byte size.
1489  if (Distance % TypeByteSize)
1490  return false;
1491 
1492  uint64_t ScaledDist = Distance / TypeByteSize;
1493 
1494  // No dependence if the scaled distance is not multiple of the stride.
1495  // E.g.
1496  // for (i = 0; i < 1024 ; i += 4)
1497  // A[i+2] = A[i] + 1;
1498  //
1499  // Two accesses in memory (scaled distance is 2, stride is 4):
1500  // | A[0] | | | | A[4] | | | |
1501  // | | | A[2] | | | | A[6] | |
1502  //
1503  // E.g.
1504  // for (i = 0; i < 1024 ; i += 3)
1505  // A[i+4] = A[i] + 1;
1506  //
1507  // Two accesses in memory (scaled distance is 4, stride is 3):
1508  // | A[0] | | | A[3] | | | A[6] | | |
1509  // | | | | | A[4] | | | A[7] | |
1510  return ScaledDist % Stride;
1511 }
1512 
1514 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1515  const MemAccessInfo &B, unsigned BIdx,
1516  const ValueToValueMap &Strides) {
1517  assert (AIdx < BIdx && "Must pass arguments in program order");
1518 
1519  Value *APtr = A.getPointer();
1520  Value *BPtr = B.getPointer();
1521  bool AIsWrite = A.getInt();
1522  bool BIsWrite = B.getInt();
1523  Type *ATy = APtr->getType()->getPointerElementType();
1524  Type *BTy = BPtr->getType()->getPointerElementType();
1525 
1526  // Two reads are independent.
1527  if (!AIsWrite && !BIsWrite)
1528  return Dependence::NoDep;
1529 
1530  // We cannot check pointers in different address spaces.
1531  if (APtr->getType()->getPointerAddressSpace() !=
1532  BPtr->getType()->getPointerAddressSpace())
1533  return Dependence::Unknown;
1534 
1535  int64_t StrideAPtr =
1536  getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
1537  int64_t StrideBPtr =
1538  getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
1539 
1540  const SCEV *Src = PSE.getSCEV(APtr);
1541  const SCEV *Sink = PSE.getSCEV(BPtr);
1542 
1543  // If the induction step is negative we have to invert source and sink of the
1544  // dependence.
1545  if (StrideAPtr < 0) {
1546  std::swap(APtr, BPtr);
1547  std::swap(ATy, BTy);
1548  std::swap(Src, Sink);
1549  std::swap(AIsWrite, BIsWrite);
1550  std::swap(AIdx, BIdx);
1551  std::swap(StrideAPtr, StrideBPtr);
1552  }
1553 
1554  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1555 
1556  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1557  << "(Induction step: " << StrideAPtr << ")\n");
1558  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1559  << *InstMap[BIdx] << ": " << *Dist << "\n");
1560 
1561  // Need accesses with constant stride. We don't want to vectorize
1562  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1563  // the address space.
1564  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1565  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1566  return Dependence::Unknown;
1567  }
1568 
1569  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1570  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1571  uint64_t Stride = std::abs(StrideAPtr);
1572  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1573  if (!C) {
1574  if (!isa<SCEVCouldNotCompute>(Dist) &&
1575  TypeByteSize == DL.getTypeAllocSize(BTy) &&
1576  isSafeDependenceDistance(DL, *(PSE.getSE()),
1577  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1578  TypeByteSize))
1579  return Dependence::NoDep;
1580 
1581  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1582  FoundNonConstantDistanceDependence = true;
1583  return Dependence::Unknown;
1584  }
1585 
1586  const APInt &Val = C->getAPInt();
1587  int64_t Distance = Val.getSExtValue();
1588 
1589  // Attempt to prove strided accesses independent.
1590  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1591  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1592  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1593  return Dependence::NoDep;
1594  }
1595 
1596  // Negative distances are not plausible dependencies.
1597  if (Val.isNegative()) {
1598  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1599  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1600  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1601  ATy != BTy)) {
1602  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1604  }
1605 
1606  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1607  return Dependence::Forward;
1608  }
1609 
1610  // Write to the same location with the same size.
1611  // Could be improved to assert type sizes are the same (i32 == float, etc).
1612  if (Val == 0) {
1613  if (ATy == BTy)
1614  return Dependence::Forward;
1615  LLVM_DEBUG(
1616  dbgs() << "LAA: Zero dependence difference but different types\n");
1617  return Dependence::Unknown;
1618  }
1619 
1620  assert(Val.isStrictlyPositive() && "Expect a positive value");
1621 
1622  if (ATy != BTy) {
1623  LLVM_DEBUG(
1624  dbgs()
1625  << "LAA: ReadWrite-Write positive dependency with different types\n");
1626  return Dependence::Unknown;
1627  }
1628 
1629  // Bail out early if passed-in parameters make vectorization not feasible.
1630  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1632  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1634  // The minimum number of iterations for a vectorized/unrolled version.
1635  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1636 
1637  // It's not vectorizable if the distance is smaller than the minimum distance
1638  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1639  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1640  // TypeByteSize (No need to plus the last gap distance).
1641  //
1642  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1643  // foo(int *A) {
1644  // int *B = (int *)((char *)A + 14);
1645  // for (i = 0 ; i < 1024 ; i += 2)
1646  // B[i] = A[i] + 1;
1647  // }
1648  //
1649  // Two accesses in memory (stride is 2):
1650  // | A[0] | | A[2] | | A[4] | | A[6] | |
1651  // | B[0] | | B[2] | | B[4] |
1652  //
1653  // Distance needs for vectorizing iterations except the last iteration:
1654  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1655  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1656  //
1657  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1658  // 12, which is less than distance.
1659  //
1660  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1661  // the minimum distance needed is 28, which is greater than distance. It is
1662  // not safe to do vectorization.
1663  uint64_t MinDistanceNeeded =
1664  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1665  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1666  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1667  << Distance << '\n');
1668  return Dependence::Backward;
1669  }
1670 
1671  // Unsafe if the minimum distance needed is greater than max safe distance.
1672  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1673  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1674  << MinDistanceNeeded << " size in bytes");
1675  return Dependence::Backward;
1676  }
1677 
1678  // Positive distance bigger than max vectorization factor.
1679  // FIXME: Should use max factor instead of max distance in bytes, which could
1680  // not handle different types.
1681  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1682  // void foo (int *A, char *B) {
1683  // for (unsigned i = 0; i < 1024; i++) {
1684  // A[i+2] = A[i] + 1;
1685  // B[i+2] = B[i] + 1;
1686  // }
1687  // }
1688  //
1689  // This case is currently unsafe according to the max safe distance. If we
1690  // analyze the two accesses on array B, the max safe dependence distance
1691  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1692  // is 8, which is less than 2 and forbidden vectorization, But actually
1693  // both A and B could be vectorized by 2 iterations.
1694  MaxSafeDepDistBytes =
1695  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1696 
1697  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1698  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1699  couldPreventStoreLoadForward(Distance, TypeByteSize))
1701 
1702  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1703  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1704  << " with max VF = " << MaxVF << '\n');
1705  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1706  MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
1708 }
1709 
1711  MemAccessInfoList &CheckDeps,
1712  const ValueToValueMap &Strides) {
1713 
1714  MaxSafeDepDistBytes = -1;
1716  for (MemAccessInfo CurAccess : CheckDeps) {
1717  if (Visited.count(CurAccess))
1718  continue;
1719 
1720  // Get the relevant memory access set.
1722  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1723 
1724  // Check accesses within this set.
1726  AccessSets.member_begin(I);
1728  AccessSets.member_end();
1729 
1730  // Check every access pair.
1731  while (AI != AE) {
1732  Visited.insert(*AI);
1733  bool AIIsWrite = AI->getInt();
1734  // Check loads only against next equivalent class, but stores also against
1735  // other stores in the same equivalence class - to the same address.
1737  (AIIsWrite ? AI : std::next(AI));
1738  while (OI != AE) {
1739  // Check every accessing instruction pair in program order.
1740  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1741  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1742  // Scan all accesses of another equivalence class, but only the next
1743  // accesses of the same equivalent class.
1744  for (std::vector<unsigned>::iterator
1745  I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
1746  I2E = (OI == AI ? I1E : Accesses[*OI].end());
1747  I2 != I2E; ++I2) {
1748  auto A = std::make_pair(&*AI, *I1);
1749  auto B = std::make_pair(&*OI, *I2);
1750 
1751  assert(*I1 != *I2);
1752  if (*I1 > *I2)
1753  std::swap(A, B);
1754 
1756  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1757  mergeInStatus(Dependence::isSafeForVectorization(Type));
1758 
1759  // Gather dependences unless we accumulated MaxDependences
1760  // dependences. In that case return as soon as we find the first
1761  // unsafe dependence. This puts a limit on this quadratic
1762  // algorithm.
1763  if (RecordDependences) {
1764  if (Type != Dependence::NoDep)
1765  Dependences.push_back(Dependence(A.second, B.second, Type));
1766 
1767  if (Dependences.size() >= MaxDependences) {
1768  RecordDependences = false;
1769  Dependences.clear();
1770  LLVM_DEBUG(dbgs()
1771  << "Too many dependences, stopped recording\n");
1772  }
1773  }
1774  if (!RecordDependences && !isSafeForVectorization())
1775  return false;
1776  }
1777  ++OI;
1778  }
1779  AI++;
1780  }
1781  }
1782 
1783  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1784  return isSafeForVectorization();
1785 }
1786 
1789  MemAccessInfo Access(Ptr, isWrite);
1790  auto &IndexVector = Accesses.find(Access)->second;
1791 
1793  transform(IndexVector,
1794  std::back_inserter(Insts),
1795  [&](unsigned Idx) { return this->InstMap[Idx]; });
1796  return Insts;
1797 }
1798 
1799 const char *MemoryDepChecker::Dependence::DepName[] = {
1800  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1801  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1802 
1804  raw_ostream &OS, unsigned Depth,
1805  const SmallVectorImpl<Instruction *> &Instrs) const {
1806  OS.indent(Depth) << DepName[Type] << ":\n";
1807  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1808  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1809 }
1810 
1811 bool LoopAccessInfo::canAnalyzeLoop() {
1812  // We need to have a loop header.
1813  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1814  << TheLoop->getHeader()->getParent()->getName() << ": "
1815  << TheLoop->getHeader()->getName() << '\n');
1816 
1817  // We can only analyze innermost loops.
1818  if (!TheLoop->isInnermost()) {
1819  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1820  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1821  return false;
1822  }
1823 
1824  // We must have a single backedge.
1825  if (TheLoop->getNumBackEdges() != 1) {
1826  LLVM_DEBUG(
1827  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1828  recordAnalysis("CFGNotUnderstood")
1829  << "loop control flow is not understood by analyzer";
1830  return false;
1831  }
1832 
1833  // ScalarEvolution needs to be able to find the exit count.
1834  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1835  if (isa<SCEVCouldNotCompute>(ExitCount)) {
1836  recordAnalysis("CantComputeNumberOfIterations")
1837  << "could not determine number of loop iterations";
1838  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1839  return false;
1840  }
1841 
1842  return true;
1843 }
1844 
1845 void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
1846  const TargetLibraryInfo *TLI,
1847  DominatorTree *DT) {
1848  typedef SmallPtrSet<Value*, 16> ValueSet;
1849 
1850  // Holds the Load and Store instructions.
1853 
1854  // Holds all the different accesses in the loop.
1855  unsigned NumReads = 0;
1856  unsigned NumReadWrites = 0;
1857 
1858  bool HasComplexMemInst = false;
1859 
1860  // A runtime check is only legal to insert if there are no convergent calls.
1861  HasConvergentOp = false;
1862 
1863  PtrRtChecking->Pointers.clear();
1864  PtrRtChecking->Need = false;
1865 
1866  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1867 
1868  const bool EnableMemAccessVersioningOfLoop =
1870  !TheLoop->getHeader()->getParent()->hasOptSize();
1871 
1872  // For each block.
1873  for (BasicBlock *BB : TheLoop->blocks()) {
1874  // Scan the BB and collect legal loads and stores. Also detect any
1875  // convergent instructions.
1876  for (Instruction &I : *BB) {
1877  if (auto *Call = dyn_cast<CallBase>(&I)) {
1878  if (Call->isConvergent())
1879  HasConvergentOp = true;
1880  }
1881 
1882  // With both a non-vectorizable memory instruction and a convergent
1883  // operation, found in this loop, no reason to continue the search.
1884  if (HasComplexMemInst && HasConvergentOp) {
1885  CanVecMem = false;
1886  return;
1887  }
1888 
1889  // Avoid hitting recordAnalysis multiple times.
1890  if (HasComplexMemInst)
1891  continue;
1892 
1893  // If this is a load, save it. If this instruction can read from memory
1894  // but is not a load, then we quit. Notice that we don't handle function
1895  // calls that read or write.
1896  if (I.mayReadFromMemory()) {
1897  // Many math library functions read the rounding mode. We will only
1898  // vectorize a loop if it contains known function calls that don't set
1899  // the flag. Therefore, it is safe to ignore this read from memory.
1900  auto *Call = dyn_cast<CallInst>(&I);
1901  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1902  continue;
1903 
1904  // If the function has an explicit vectorized counterpart, we can safely
1905  // assume that it can be vectorized.
1906  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1907  !VFDatabase::getMappings(*Call).empty())
1908  continue;
1909 
1910  auto *Ld = dyn_cast<LoadInst>(&I);
1911  if (!Ld) {
1912  recordAnalysis("CantVectorizeInstruction", Ld)
1913  << "instruction cannot be vectorized";
1914  HasComplexMemInst = true;
1915  continue;
1916  }
1917  if (!Ld->isSimple() && !IsAnnotatedParallel) {
1918  recordAnalysis("NonSimpleLoad", Ld)
1919  << "read with atomic ordering or volatile read";
1920  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1921  HasComplexMemInst = true;
1922  continue;
1923  }
1924  NumLoads++;
1925  Loads.push_back(Ld);
1926  DepChecker->addAccess(Ld);
1927  if (EnableMemAccessVersioningOfLoop)
1928  collectStridedAccess(Ld);
1929  continue;
1930  }
1931 
1932  // Save 'store' instructions. Abort if other instructions write to memory.
1933  if (I.mayWriteToMemory()) {
1934  auto *St = dyn_cast<StoreInst>(&I);
1935  if (!St) {
1936  recordAnalysis("CantVectorizeInstruction", St)
1937  << "instruction cannot be vectorized";
1938  HasComplexMemInst = true;
1939  continue;
1940  }
1941  if (!St->isSimple() && !IsAnnotatedParallel) {
1942  recordAnalysis("NonSimpleStore", St)
1943  << "write with atomic ordering or volatile write";
1944  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1945  HasComplexMemInst = true;
1946  continue;
1947  }
1948  NumStores++;
1949  Stores.push_back(St);
1950  DepChecker->addAccess(St);
1951  if (EnableMemAccessVersioningOfLoop)
1952  collectStridedAccess(St);
1953  }
1954  } // Next instr.
1955  } // Next block.
1956 
1957  if (HasComplexMemInst) {
1958  CanVecMem = false;
1959  return;
1960  }
1961 
1962  // Now we have two lists that hold the loads and the stores.
1963  // Next, we find the pointers that they use.
1964 
1965  // Check if we see any stores. If there are no stores, then we don't
1966  // care if the pointers are *restrict*.
1967  if (!Stores.size()) {
1968  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1969  CanVecMem = true;
1970  return;
1971  }
1972 
1973  MemoryDepChecker::DepCandidates DependentAccesses;
1974  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
1975 
1976  // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
1977  // multiple times on the same object. If the ptr is accessed twice, once
1978  // for read and once for write, it will only appear once (on the write
1979  // list). This is okay, since we are going to check for conflicts between
1980  // writes and between reads and writes, but not between reads and reads.
1981  ValueSet Seen;
1982 
1983  // Record uniform store addresses to identify if we have multiple stores
1984  // to the same address.
1985  ValueSet UniformStores;
1986 
1987  for (StoreInst *ST : Stores) {
1988  Value *Ptr = ST->getPointerOperand();
1989 
1990  if (isUniform(Ptr))
1991  HasDependenceInvolvingLoopInvariantAddress |=
1992  !UniformStores.insert(Ptr).second;
1993 
1994  // If we did *not* see this pointer before, insert it to the read-write
1995  // list. At this phase it is only a 'write' list.
1996  if (Seen.insert(Ptr).second) {
1997  ++NumReadWrites;
1998 
2000  // The TBAA metadata could have a control dependency on the predication
2001  // condition, so we cannot rely on it when determining whether or not we
2002  // need runtime pointer checks.
2003  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2004  Loc.AATags.TBAA = nullptr;
2005 
2006  visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2007  [&Accesses, Loc](Value *Ptr) {
2008  MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2009  Accesses.addStore(NewLoc);
2010  });
2011  }
2012  }
2013 
2014  if (IsAnnotatedParallel) {
2015  LLVM_DEBUG(
2016  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2017  << "checks.\n");
2018  CanVecMem = true;
2019  return;
2020  }
2021 
2022  for (LoadInst *LD : Loads) {
2023  Value *Ptr = LD->getPointerOperand();
2024  // If we did *not* see this pointer before, insert it to the
2025  // read list. If we *did* see it before, then it is already in
2026  // the read-write list. This allows us to vectorize expressions
2027  // such as A[i] += x; Because the address of A[i] is a read-write
2028  // pointer. This only works if the index of A[i] is consecutive.
2029  // If the address of i is unknown (for example A[B[i]]) then we may
2030  // read a few words, modify, and write a few words, and some of the
2031  // words may be written to the same address.
2032  bool IsReadOnlyPtr = false;
2033  if (Seen.insert(Ptr).second ||
2034  !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
2035  ++NumReads;
2036  IsReadOnlyPtr = true;
2037  }
2038 
2039  // See if there is an unsafe dependency between a load to a uniform address and
2040  // store to the same uniform address.
2041  if (UniformStores.count(Ptr)) {
2042  LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2043  "load and uniform store to the same address!\n");
2044  HasDependenceInvolvingLoopInvariantAddress = true;
2045  }
2046 
2048  // The TBAA metadata could have a control dependency on the predication
2049  // condition, so we cannot rely on it when determining whether or not we
2050  // need runtime pointer checks.
2051  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2052  Loc.AATags.TBAA = nullptr;
2053 
2054  visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2055  [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
2056  MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2057  Accesses.addLoad(NewLoc, IsReadOnlyPtr);
2058  });
2059  }
2060 
2061  // If we write (or read-write) to a single destination and there are no
2062  // other reads in this loop then is it safe to vectorize.
2063  if (NumReadWrites == 1 && NumReads == 0) {
2064  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2065  CanVecMem = true;
2066  return;
2067  }
2068 
2069  // Build dependence sets and check whether we need a runtime pointer bounds
2070  // check.
2071  Accesses.buildDependenceSets();
2072 
2073  // Find pointers with computable bounds. We are going to use this information
2074  // to place a runtime bound check.
2075  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
2076  TheLoop, SymbolicStrides);
2077  if (!CanDoRTIfNeeded) {
2078  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
2079  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2080  << "the array bounds.\n");
2081  CanVecMem = false;
2082  return;
2083  }
2084 
2085  LLVM_DEBUG(
2086  dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2087 
2088  CanVecMem = true;
2089  if (Accesses.isDependencyCheckNeeded()) {
2090  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2091  CanVecMem = DepChecker->areDepsSafe(
2092  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2093  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2094 
2095  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2096  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2097 
2098  // Clear the dependency checks. We assume they are not needed.
2099  Accesses.resetDepChecks(*DepChecker);
2100 
2101  PtrRtChecking->reset();
2102  PtrRtChecking->Need = true;
2103 
2104  auto *SE = PSE->getSE();
2105  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
2106  SymbolicStrides, true);
2107 
2108  // Check that we found the bounds for the pointer.
2109  if (!CanDoRTIfNeeded) {
2110  recordAnalysis("CantCheckMemDepsAtRunTime")
2111  << "cannot check memory dependencies at runtime";
2112  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2113  CanVecMem = false;
2114  return;
2115  }
2116 
2117  CanVecMem = true;
2118  }
2119  }
2120 
2121  if (HasConvergentOp) {
2122  recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2123  << "cannot add control dependency to convergent operation";
2124  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2125  "would be needed with a convergent operation\n");
2126  CanVecMem = false;
2127  return;
2128  }
2129 
2130  if (CanVecMem)
2131  LLVM_DEBUG(
2132  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2133  << (PtrRtChecking->Need ? "" : " don't")
2134  << " need runtime memory checks.\n");
2135  else {
2136  recordAnalysis("UnsafeMemDep")
2137  << "unsafe dependent memory operations in loop. Use "
2138  "#pragma loop distribute(enable) to allow loop distribution "
2139  "to attempt to isolate the offending operations into a separate "
2140  "loop";
2141  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2142  }
2143 }
2144 
2146  DominatorTree *DT) {
2147  assert(TheLoop->contains(BB) && "Unknown block used");
2148 
2149  // Blocks that do not dominate the latch need predication.
2150  BasicBlock* Latch = TheLoop->getLoopLatch();
2151  return !DT->dominates(BB, Latch);
2152 }
2153 
2154 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2155  Instruction *I) {
2156  assert(!Report && "Multiple reports generated");
2157 
2158  Value *CodeRegion = TheLoop->getHeader();
2159  DebugLoc DL = TheLoop->getStartLoc();
2160 
2161  if (I) {
2162  CodeRegion = I->getParent();
2163  // If there is no debug location attached to the instruction, revert back to
2164  // using the loop's.
2165  if (I->getDebugLoc())
2166  DL = I->getDebugLoc();
2167  }
2168 
2169  Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2170  CodeRegion);
2171  return *Report;
2172 }
2173 
2175  auto *SE = PSE->getSE();
2176  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2177  // never considered uniform.
2178  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2179  // trivially loop-invariant FP values to be considered uniform.
2180  if (!SE->isSCEVable(V->getType()))
2181  return false;
2182  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2183 }
2184 
2185 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2186  Value *Ptr = getLoadStorePointerOperand(MemAccess);
2187  if (!Ptr)
2188  return;
2189 
2190  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2191  if (!Stride)
2192  return;
2193 
2194  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2195  "versioning:");
2196  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2197 
2198  // Avoid adding the "Stride == 1" predicate when we know that
2199  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2200  // or zero iteration loop, as Trip-Count <= Stride == 1.
2201  //
2202  // TODO: We are currently not making a very informed decision on when it is
2203  // beneficial to apply stride versioning. It might make more sense that the
2204  // users of this analysis (such as the vectorizer) will trigger it, based on
2205  // their specific cost considerations; For example, in cases where stride
2206  // versioning does not help resolving memory accesses/dependences, the
2207  // vectorizer should evaluate the cost of the runtime test, and the benefit
2208  // of various possible stride specializations, considering the alternatives
2209  // of using gather/scatters (if available).
2210 
2211  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2212  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2213 
2214  // Match the types so we can compare the stride and the BETakenCount.
2215  // The Stride can be positive/negative, so we sign extend Stride;
2216  // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2217  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2218  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2219  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2220  const SCEV *CastedStride = StrideExpr;
2221  const SCEV *CastedBECount = BETakenCount;
2222  ScalarEvolution *SE = PSE->getSE();
2223  if (BETypeSize >= StrideTypeSize)
2224  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2225  else
2226  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2227  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2228  // Since TripCount == BackEdgeTakenCount + 1, checking:
2229  // "Stride >= TripCount" is equivalent to checking:
2230  // Stride - BETakenCount > 0
2231  if (SE->isKnownPositive(StrideMinusBETaken)) {
2232  LLVM_DEBUG(
2233  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2234  "Stride==1 predicate will imply that the loop executes "
2235  "at most once.\n");
2236  return;
2237  }
2238  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2239 
2240  SymbolicStrides[Ptr] = Stride;
2241  StrideSet.insert(Stride);
2242 }
2243 
2245  const TargetLibraryInfo *TLI, AAResults *AA,
2246  DominatorTree *DT, LoopInfo *LI)
2247  : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2248  PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
2249  DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2250  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2251  HasConvergentOp(false),
2252  HasDependenceInvolvingLoopInvariantAddress(false) {
2253  if (canAnalyzeLoop())
2254  analyzeLoop(AA, LI, TLI, DT);
2255 }
2256 
2257 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2258  if (CanVecMem) {
2259  OS.indent(Depth) << "Memory dependences are safe";
2260  if (MaxSafeDepDistBytes != -1ULL)
2261  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2262  << " bytes";
2263  if (PtrRtChecking->Need)
2264  OS << " with run-time checks";
2265  OS << "\n";
2266  }
2267 
2268  if (HasConvergentOp)
2269  OS.indent(Depth) << "Has convergent operation in loop\n";
2270 
2271  if (Report)
2272  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2273 
2274  if (auto *Dependences = DepChecker->getDependences()) {
2275  OS.indent(Depth) << "Dependences:\n";
2276  for (auto &Dep : *Dependences) {
2277  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2278  OS << "\n";
2279  }
2280  } else
2281  OS.indent(Depth) << "Too many dependences, not recorded\n";
2282 
2283  // List the pair of accesses need run-time checks to prove independence.
2284  PtrRtChecking->print(OS, Depth);
2285  OS << "\n";
2286 
2287  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2288  << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2289  << "found in loop.\n";
2290 
2291  OS.indent(Depth) << "SCEV assumptions:\n";
2292  PSE->getUnionPredicate().print(OS, Depth);
2293 
2294  OS << "\n";
2295 
2296  OS.indent(Depth) << "Expressions re-written:\n";
2297  PSE->print(OS, Depth);
2298 }
2299 
2302 }
2303 
2305  auto &LAI = LoopAccessInfoMap[L];
2306 
2307  if (!LAI)
2308  LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2309 
2310  return *LAI.get();
2311 }
2312 
2314  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2315 
2316  for (Loop *TopLevelLoop : *LI)
2317  for (Loop *L : depth_first(TopLevelLoop)) {
2318  OS.indent(2) << L->getHeader()->getName() << ":\n";
2319  auto &LAI = LAA.getInfo(L);
2320  LAI.print(OS, 4);
2321  }
2322 }
2323 
2325  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2326  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2327  TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2328  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2329  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2330  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2331 
2332  return false;
2333 }
2334 
2340 
2341  AU.setPreservesAll();
2342 }
2343 
2345 static const char laa_name[] = "Loop Access Analysis";
2346 #define LAA_NAME "loop-accesses"
2347 
2354 
2356 
2359  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2360 }
2361 
2362 namespace llvm {
2363 
2365  return new LoopAccessLegacyAnalysis();
2366  }
2367 
2368 } // end namespace llvm
llvm::sortPtrAccesses
bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
Definition: LoopAccessAnalysis.cpp:1206
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::MemoryDepChecker::clearDependences
void clearDependences()
Definition: LoopAccessAnalysis.h:228
llvm::APInt::isStrictlyPositive
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:341
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::LoopAccessLegacyAnalysis::ID
static char ID
Definition: LoopAccessAnalysis.h:716
llvm::MemoryDepChecker::Dependence::isBackward
bool isBackward() const
Lexically backward dependence.
Definition: LoopAccessAnalysis.cpp:1325
llvm::MemoryDepChecker
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
Definition: LoopAccessAnalysis.h:87
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:714
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4113
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
getMinFromExprs
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Definition: LoopAccessAnalysis.cpp:271
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:147
llvm::MemoryDepChecker::VectorizationSafetyStatus::Safe
@ Safe
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2174
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:379
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:217
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:363
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:130
llvm::RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup
RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
Definition: LoopAccessAnalysis.cpp:170
llvm::Function
Definition: Function.h:61
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:459
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
EnableMemAccessVersioning
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2097
High
uint64_t High
Definition: NVVMIntrRange.cpp:61
llvm::SmallVector< RuntimePointerCheck, 4 >
CheckType
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
Definition: SelectionDAGISel.cpp:2586
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1467
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:58
llvm::PredicatedScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
Definition: ScalarEvolution.cpp:13299
ErrorHandling.h
LoopAccessAnalysis.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:44
llvm::LoopAccessLegacyAnalysis::getInfo
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
Definition: LoopAccessAnalysis.cpp:2304
MaxDependences
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::SCEV::NoWrapMask
@ NoWrapMask
Definition: ScalarEvolution.h:138
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::MemoryDepChecker::Dependence::isPossiblyBackward
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Definition: LoopAccessAnalysis.cpp:1341
llvm::AliasSetTracker
Definition: AliasSetTracker.h:329
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:756
APInt.h
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5346
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::DenseMapIterator
Definition: DenseMap.h:56
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1029
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1403
llvm::RuntimePointerChecking::needsChecking
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition: LoopAccessAnalysis.cpp:260
Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:73
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1602
llvm::MemoryDepChecker::areDepsSafe
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
Definition: LoopAccessAnalysis.cpp:1710
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::isConsecutiveAccess
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
Definition: LoopAccessAnalysis.cpp:1253
llvm::Optional< int >
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet< Value *, 16 >
llvm::RuntimePointerChecking::getNumberOfChecks
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
Definition: LoopAccessAnalysis.h:437
Operator.h
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::PredicatedScalarEvolution::getAsAddRec
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
Definition: ScalarEvolution.cpp:13361
llvm::MemoryDepChecker::Dependence::NoDep
@ NoDep
Definition: LoopAccessAnalysis.h:112
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
llvm::stripIntegerCast
Value * stripIntegerCast(Value *V)
Definition: LoopAccessAnalysis.cpp:136
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MemoryDepChecker::Dependence::DepType
DepType
The type of the dependence.
Definition: LoopAccessAnalysis.h:110
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
LoopAnalysisManager.h
llvm::MemoryDepChecker::Dependence::isForward
bool isForward() const
Lexically forward dependence.
Definition: LoopAccessAnalysis.cpp:1345
AliasAnalysis.h
PointerIntPair.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2986
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
Instruction.h
CommandLine.h
llvm::MemoryLocation::AATags
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Definition: MemoryLocation.h:230
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1335
llvm::LoopAccessLegacyAnalysis::print
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
Definition: LoopAccessAnalysis.cpp:2313
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:314
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:629
Constants.h
llvm::MemoryDepChecker::VectorizationSafetyStatus
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
Definition: LoopAccessAnalysis.h:97
llvm::AAResults
Definition: AliasAnalysis.h:456
isNoWrapAddRec
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
Definition: LoopAccessAnalysis.cpp:982
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2145
llvm::MemoryDepChecker::Dependence::BackwardVectorizable
@ BackwardVectorizable
Definition: LoopAccessAnalysis.h:132
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::Loop::isAnnotatedParallel
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:566
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:204
llvm::ScalarEvolution::getUMaxExpr
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:3833
InstrTypes.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryDepChecker::Dependence::print
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Definition: LoopAccessAnalysis.cpp:1803
llvm::PredicatedScalarEvolution::hasNoOverflow
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
Definition: ScalarEvolution.cpp:13345
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
TargetLibraryInfo.h
llvm::RuntimePointerChecking::insert
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
Definition: LoopAccessAnalysis.cpp:192
false
Definition: StackSlotColoring.cpp:142
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:157
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::Instruction
Definition: Instruction.h:45
isInBoundsGep
static bool isInBoundsGep(Value *Ptr)
Definition: LoopAccessAnalysis.cpp:974
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::M68kBeads::DA
@ DA
Definition: M68kBaseInfo.h:59
llvm::MemoryDepChecker::addAccess
void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
Definition: LoopAccessAnalysis.cpp:1289
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1453
llvm::MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding
@ BackwardVectorizableButPreventsForwarding
Definition: LoopAccessAnalysis.h:134
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2066
DebugLoc.h
SmallPtrSet.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:144
llvm::EquivalenceClasses::getLeaderValue
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:170
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::getPointersDiff
Optional< int > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
Definition: LoopAccessAnalysis.cpp:1140
llvm::ScalarEvolution::getEqualPredicate
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:12963
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
VectorizationInterleave
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
llvm::RuntimePointerChecking::reset
void reset()
Reset the state of the pointer runtime information.
Definition: LoopAccessAnalysis.h:402
llvm::None
const NoneType None
Definition: None.h:23
Type.h
llvm::RuntimePointerChecking::PointerInfo::AliasSetId
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
Definition: LoopAccessAnalysis.h:387
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::MemoryDepChecker::getInstructionsForAccess
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
Definition: LoopAccessAnalysis.cpp:1788
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4066
isSafeDependenceDistance
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses,...
Definition: LoopAccessAnalysis.cpp:1422
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
Check
static bool Check(DecodeStatus &Out, DecodeStatus In)
Definition: AArch64Disassembler.cpp:242
llvm::RuntimePointerChecking::PointerInfo
Definition: LoopAccessAnalysis.h:372
VectorUtils.h
llvm::MemoryDepChecker::VectorizationSafetyStatus::Unsafe
@ Unsafe
llvm::initializeLoopAccessLegacyAnalysisPass
void initializeLoopAccessLegacyAnalysisPass(PassRegistry &)
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4422
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
llvm::RuntimePointerChecking
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Definition: LoopAccessAnalysis.h:368
llvm::VectorizerParams::VectorizationFactor
static unsigned VectorizationFactor
VF as overridden by the user.
Definition: LoopAccessAnalysis.h:42
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:448
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::MemoryDepChecker::Dependence
Dependece between memory access instructions.
Definition: LoopAccessAnalysis.h:108
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::PredicatedScalarEvolution::setNoOverflow
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
Definition: ScalarEvolution.cpp:13329
llvm::MemoryDepChecker::Dependence::Backward
@ Backward
Definition: LoopAccessAnalysis.h:129
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::RuntimePointerChecking::PointerInfo::DependencySetId
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
Definition: LoopAccessAnalysis.h:385
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::RuntimePointerChecking::CheckingGroups
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
Definition: LoopAccessAnalysis.h:454
laa_name
static const char laa_name[]
Definition: LoopAccessAnalysis.cpp:2345
MemoryLocation.h
llvm::DenseMap< const Value *, Value * >
llvm::SCEV::FlagNSW
@ FlagNSW
Definition: ScalarEvolution.h:137
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
llvm::RuntimeCheckingPtrGroup
A grouping of pointers.
Definition: LoopAccessAnalysis.h:329
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:515
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:39
llvm::SCEVWrapPredicate::IncrementNUSW
@ IncrementNUSW
Definition: ScalarEvolution.h:348
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
visitPointers
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
Definition: LoopAccessAnalysis.cpp:1266
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ScalarEvolution::getUMinExpr
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:3852
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:13280
llvm::MemoryDepChecker::isSafeForVectorization
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
Definition: LoopAccessAnalysis.h:194
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::VectorizerParams::RuntimeMemoryCheckThreshold
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
Definition: LoopAccessAnalysis.h:50
iterator_range.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
LAA_NAME
#define LAA_NAME
Definition: LoopAccessAnalysis.cpp:2346
llvm::ScalarEvolution::getNoopOrSignExtend
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4286
llvm::RuntimeCheckingPtrGroup::High
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
Definition: LoopAccessAnalysis.h:351
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::RuntimeCheckingPtrGroup::Low
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
Definition: LoopAccessAnalysis.h:354
Status
Definition: SIModeRegister.cpp:28
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:213
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:143
DataLayout.h
EnableForwardingConflictDetection
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MemoryDepChecker::MemAccessInfo
PointerIntPair< Value *, 1, bool > MemAccessInfo
Definition: LoopAccessAnalysis.h:89
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:446
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1648
llvm::cl::Sink
@ Sink
Definition: CommandLine.h:171
llvm::ScalarEvolution::getStoreSizeOfExpr
const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
Definition: ScalarEvolution.cpp:3883
isNoWrap
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
Definition: LoopAccessAnalysis.cpp:655
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
MemoryCheckMergeThreshold
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
llvm::RuntimeCheckingPtrGroup::addPointer
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
Definition: LoopAccessAnalysis.cpp:283
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopAccessAnalysis.cpp:70
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:775
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
ValueHandle.h
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:132
llvm::RuntimePointerChecking::RuntimeCheckingPtrGroup
friend struct RuntimeCheckingPtrGroup
Definition: LoopAccessAnalysis.h:369
llvm::LoopAccessLegacyAnalysis::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LoopAccessAnalysis.cpp:2335
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:723
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:671
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12619
RuntimeMemoryCheckThreshold
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
llvm::ScalarEvolution::isKnownPositive
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Definition: ScalarEvolution.cpp:9826
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
llvm::MemoryDepChecker::Dependence::Unknown
@ Unknown
Definition: LoopAccessAnalysis.h:114
llvm::createLAAPass
Pass * createLAAPass()
Definition: LoopAccessAnalysis.cpp:2364
llvm::RuntimePointerChecking::Pointers
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
Definition: LoopAccessAnalysis.h:451
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::RuntimePointerChecking::arePointersInSamePartition
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition: LoopAccessAnalysis.cpp:443
llvm::EquivalenceClasses::iterator
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
Definition: EquivalenceClasses.h:144
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:364
llvm::LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis
LoopAccessLegacyAnalysis()
Definition: LoopAccessAnalysis.cpp:2300
std
Definition: BitVector.h:838
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4199
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::LocationSize::beforeOrAfterPointer
constexpr static LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Definition: MemoryLocation.h:129
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
VectorizationFactor
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:976
llvm::RuntimePointerChecking::printChecks
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition: LoopAccessAnalysis.cpp:469
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopAccessInfo::LoopAccessInfo
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI)
Definition: LoopAccessAnalysis.cpp:2244
llvm::MemoryDepChecker::Dependence::DepName
static const char * DepName[]
String version of the types.
Definition: LoopAccessAnalysis.h:138
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::RuntimePointerChecking::PointerInfo::IsWritePtr
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Definition: LoopAccessAnalysis.h:382
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3932
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
EquivalenceClasses.h
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:13308
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
llvm::AAMDNodes::TBAA
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:668
hasComputableBounds
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
Definition: LoopAccessAnalysis.cpp:634
llvm::MemoryDepChecker::Dependence::Forward
@ Forward
Definition: LoopAccessAnalysis.h:124
areStridedAccessesIndependent
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
Definition: LoopAccessAnalysis.cpp:1482
llvm::APInt::abs
APInt abs() const
Get the absolute value.
Definition: APInt.h:1670
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1577
Instructions.h
llvm::PointerIntPair< Value *, 1, bool >
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
llvm::raw_ostream::indent
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Definition: raw_ostream.cpp:497
Dominators.h
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:380
N
#define N
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1281
llvm::LoopAccessInfo::print
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
Definition: LoopAccessAnalysis.cpp:2257
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl< int >
llvm::MemoryDepChecker::Dependence::isSafeForVectorization
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Definition: LoopAccessAnalysis.cpp:1308
llvm::EquivalenceClasses::insert
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
Definition: EquivalenceClasses.h:199
llvm::AnalysisUsage::addRequiredTransitive
AnalysisUsage & addRequiredTransitive()
Definition: PassAnalysisSupport.h:81
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
DerivedTypes.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5301
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2419
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:217
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
AliasSetTracker.h
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:267
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::SI::KernelInputOffsets::Offsets
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1263
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1965
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2129
llvm::EquivalenceClasses::member_iterator
Definition: EquivalenceClasses.h:251
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1815
Value.h
llvm::RuntimePointerChecking::print
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
Definition: LoopAccessAnalysis.cpp:488
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1284
llvm::RuntimeCheckingPtrGroup::Members
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
Definition: LoopAccessAnalysis.h:356
InitializePasses.h
llvm::EquivalenceClasses::findValue
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Definition: EquivalenceClasses.h:163
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:153
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::MemoryDepChecker::Dependence::ForwardButPreventsForwarding
@ ForwardButPreventsForwarding
Definition: LoopAccessAnalysis.h:127
llvm::RuntimePointerChecking::generateChecks
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
Definition: LoopAccessAnalysis.cpp:253
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:370
llvm::LoopAccessLegacyAnalysis::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: LoopAccessAnalysis.cpp:2324
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1025
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::MemoryDepChecker::VectorizationSafetyStatus::PossiblySafeWithRtChecks
@ PossiblySafeWithRtChecks