LLVM  13.0.0git
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The implementation for the loop memory dependence that was originally
10 // developed for the loop vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/DebugLoc.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/DiagnosticInfo.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Operator.h"
49 #include "llvm/IR/PassManager.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/IR/ValueHandle.h"
53 #include "llvm/InitializePasses.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <cstdlib>
64 #include <iterator>
65 #include <utility>
66 #include <vector>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "loop-accesses"
71 
73 VectorizationFactor("force-vector-width", cl::Hidden,
74  cl::desc("Sets the SIMD width. Zero is autoselect."),
77 
79 VectorizationInterleave("force-vector-interleave", cl::Hidden,
80  cl::desc("Sets the vectorization interleave count. "
81  "Zero is autoselect."),
85 
87  "runtime-memory-check-threshold", cl::Hidden,
88  cl::desc("When performing memory disambiguation checks at runtime do not "
89  "generate more than this number of comparisons (default = 8)."),
92 
93 /// The maximum iterations used to merge memory checks
95  "memory-check-merge-threshold", cl::Hidden,
96  cl::desc("Maximum number of comparisons done when trying to merge "
97  "runtime memory checks. (default = 100)"),
98  cl::init(100));
99 
100 /// Maximum SIMD width.
101 const unsigned VectorizerParams::MaxVectorWidth = 64;
102 
103 /// We collect dependences up to this threshold.
104 static cl::opt<unsigned>
105  MaxDependences("max-dependences", cl::Hidden,
106  cl::desc("Maximum number of dependences collected by "
107  "loop-access analysis (default = 100)"),
108  cl::init(100));
109 
110 /// This enables versioning on the strides of symbolically striding memory
111 /// accesses in code like the following.
112 /// for (i = 0; i < N; ++i)
113 /// A[i * Stride1] += B[i * Stride2] ...
114 ///
115 /// Will be roughly translated to
116 /// if (Stride1 == 1 && Stride2 == 1) {
117 /// for (i = 0; i < N; i+=4)
118 /// A[i:i+3] += ...
119 /// } else
120 /// ...
122  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
123  cl::desc("Enable symbolic stride memory access versioning"));
124 
125 /// Enable store-to-load forwarding conflict detection. This option can
126 /// be disabled for correctness testing.
128  "store-to-load-forwarding-conflict-detection", cl::Hidden,
129  cl::desc("Enable conflict detection in loop-access analysis"),
130  cl::init(true));
131 
133  return ::VectorizationInterleave.getNumOccurrences() > 0;
134 }
135 
137  if (auto *CI = dyn_cast<CastInst>(V))
138  if (CI->getOperand(0)->getType()->isIntegerTy())
139  return CI->getOperand(0);
140  return V;
141 }
142 
144  const ValueToValueMap &PtrToStride,
145  Value *Ptr, Value *OrigPtr) {
146  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
147 
148  // If there is an entry in the map return the SCEV of the pointer with the
149  // symbolic stride replaced by one.
151  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
152  if (SI == PtrToStride.end())
153  // For a non-symbolic stride, just return the original expression.
154  return OrigSCEV;
155 
156  Value *StrideVal = stripIntegerCast(SI->second);
157 
158  ScalarEvolution *SE = PSE.getSE();
159  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
160  const auto *CT =
161  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
162 
163  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
164  auto *Expr = PSE.getSCEV(Ptr);
165 
166  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
167  << " by: " << *Expr << "\n");
168  return Expr;
169 }
170 
172  unsigned Index, RuntimePointerChecking &RtCheck)
173  : RtCheck(RtCheck), High(RtCheck.Pointers[Index].End),
174  Low(RtCheck.Pointers[Index].Start) {
175  Members.push_back(Index);
176 }
177 
178 /// Calculate Start and End points of memory access.
179 /// Let's assume A is the first access and B is a memory access on N-th loop
180 /// iteration. Then B is calculated as:
181 /// B = A + Step*N .
182 /// Step value may be positive or negative.
183 /// N is a calculated back-edge taken count:
184 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
185 /// Start and End points are calculated in the following way:
186 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
187 /// where SizeOfElt is the size of single memory access in bytes.
188 ///
189 /// There is no conflict when the intervals are disjoint:
190 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
191 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
192  unsigned DepSetId, unsigned ASId,
193  const ValueToValueMap &Strides,
195  // Get the stride replaced scev.
196  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
197  ScalarEvolution *SE = PSE.getSE();
198 
199  const SCEV *ScStart;
200  const SCEV *ScEnd;
201 
202  if (SE->isLoopInvariant(Sc, Lp))
203  ScStart = ScEnd = Sc;
204  else {
205  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
206  assert(AR && "Invalid addrec expression");
207  const SCEV *Ex = PSE.getBackedgeTakenCount();
208 
209  ScStart = AR->getStart();
210  ScEnd = AR->evaluateAtIteration(Ex, *SE);
211  const SCEV *Step = AR->getStepRecurrence(*SE);
212 
213  // For expressions with negative step, the upper bound is ScStart and the
214  // lower bound is ScEnd.
215  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
216  if (CStep->getValue()->isNegative())
217  std::swap(ScStart, ScEnd);
218  } else {
219  // Fallback case: the step is not constant, but we can still
220  // get the upper and lower bounds of the interval by using min/max
221  // expressions.
222  ScStart = SE->getUMinExpr(ScStart, ScEnd);
223  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
224  }
225  // Add the size of the pointed element to ScEnd.
226  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
227  Type *IdxTy = DL.getIndexType(Ptr->getType());
228  const SCEV *EltSizeSCEV =
229  SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
230  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
231  }
232 
233  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
234 }
235 
237 RuntimePointerChecking::generateChecks() const {
239 
240  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
241  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
243  const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
244 
245  if (needsChecking(CGI, CGJ))
246  Checks.push_back(std::make_pair(&CGI, &CGJ));
247  }
248  }
249  return Checks;
250 }
251 
252 void RuntimePointerChecking::generateChecks(
253  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
254  assert(Checks.empty() && "Checks is not empty");
255  groupChecks(DepCands, UseDependencies);
256  Checks = generateChecks();
257 }
258 
260  const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
261  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
262  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
263  if (needsChecking(M.Members[I], N.Members[J]))
264  return true;
265  return false;
266 }
267 
268 /// Compare \p I and \p J and return the minimum.
269 /// Return nullptr in case we couldn't find an answer.
270 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
271  ScalarEvolution *SE) {
272  const SCEV *Diff = SE->getMinusSCEV(J, I);
273  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
274 
275  if (!C)
276  return nullptr;
277  if (C->getValue()->isNegative())
278  return J;
279  return I;
280 }
281 
283  const SCEV *Start = RtCheck.Pointers[Index].Start;
284  const SCEV *End = RtCheck.Pointers[Index].End;
285 
286  // Compare the starts and ends with the known minimum and maximum
287  // of this set. We need to know how we compare against the min/max
288  // of the set in order to be able to emit memchecks.
289  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
290  if (!Min0)
291  return false;
292 
293  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
294  if (!Min1)
295  return false;
296 
297  // Update the low bound expression if we've found a new min value.
298  if (Min0 == Start)
299  Low = Start;
300 
301  // Update the high bound expression if we've found a new max value.
302  if (Min1 != End)
303  High = End;
304 
305  Members.push_back(Index);
306  return true;
307 }
308 
309 void RuntimePointerChecking::groupChecks(
310  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
311  // We build the groups from dependency candidates equivalence classes
312  // because:
313  // - We know that pointers in the same equivalence class share
314  // the same underlying object and therefore there is a chance
315  // that we can compare pointers
316  // - We wouldn't be able to merge two pointers for which we need
317  // to emit a memcheck. The classes in DepCands are already
318  // conveniently built such that no two pointers in the same
319  // class need checking against each other.
320 
321  // We use the following (greedy) algorithm to construct the groups
322  // For every pointer in the equivalence class:
323  // For each existing group:
324  // - if the difference between this pointer and the min/max bounds
325  // of the group is a constant, then make the pointer part of the
326  // group and update the min/max bounds of that group as required.
327 
328  CheckingGroups.clear();
329 
330  // If we need to check two pointers to the same underlying object
331  // with a non-constant difference, we shouldn't perform any pointer
332  // grouping with those pointers. This is because we can easily get
333  // into cases where the resulting check would return false, even when
334  // the accesses are safe.
335  //
336  // The following example shows this:
337  // for (i = 0; i < 1000; ++i)
338  // a[5000 + i * m] = a[i] + a[i + 9000]
339  //
340  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
341  // (0, 10000) which is always false. However, if m is 1, there is no
342  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
343  // us to perform an accurate check in this case.
344  //
345  // The above case requires that we have an UnknownDependence between
346  // accesses to the same underlying object. This cannot happen unless
347  // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
348  // is also false. In this case we will use the fallback path and create
349  // separate checking groups for all pointers.
350 
351  // If we don't have the dependency partitions, construct a new
352  // checking pointer group for each pointer. This is also required
353  // for correctness, because in this case we can have checking between
354  // pointers to the same underlying object.
355  if (!UseDependencies) {
356  for (unsigned I = 0; I < Pointers.size(); ++I)
357  CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
358  return;
359  }
360 
361  unsigned TotalComparisons = 0;
362 
363  DenseMap<Value *, unsigned> PositionMap;
364  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
365  PositionMap[Pointers[Index].PointerValue] = Index;
366 
367  // We need to keep track of what pointers we've already seen so we
368  // don't process them twice.
370 
371  // Go through all equivalence classes, get the "pointer check groups"
372  // and add them to the overall solution. We use the order in which accesses
373  // appear in 'Pointers' to enforce determinism.
374  for (unsigned I = 0; I < Pointers.size(); ++I) {
375  // We've seen this pointer before, and therefore already processed
376  // its equivalence class.
377  if (Seen.count(I))
378  continue;
379 
380  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
381  Pointers[I].IsWritePtr);
382 
384  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
385 
386  // Because DepCands is constructed by visiting accesses in the order in
387  // which they appear in alias sets (which is deterministic) and the
388  // iteration order within an equivalence class member is only dependent on
389  // the order in which unions and insertions are performed on the
390  // equivalence class, the iteration order is deterministic.
391  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
392  MI != ME; ++MI) {
393  auto PointerI = PositionMap.find(MI->getPointer());
394  assert(PointerI != PositionMap.end() &&
395  "pointer in equivalence class not found in PositionMap");
396  unsigned Pointer = PointerI->second;
397  bool Merged = false;
398  // Mark this pointer as seen.
399  Seen.insert(Pointer);
400 
401  // Go through all the existing sets and see if we can find one
402  // which can include this pointer.
403  for (RuntimeCheckingPtrGroup &Group : Groups) {
404  // Don't perform more than a certain amount of comparisons.
405  // This should limit the cost of grouping the pointers to something
406  // reasonable. If we do end up hitting this threshold, the algorithm
407  // will create separate groups for all remaining pointers.
408  if (TotalComparisons > MemoryCheckMergeThreshold)
409  break;
410 
411  TotalComparisons++;
412 
413  if (Group.addPointer(Pointer)) {
414  Merged = true;
415  break;
416  }
417  }
418 
419  if (!Merged)
420  // We couldn't add this pointer to any existing set or the threshold
421  // for the number of comparisons has been reached. Create a new group
422  // to hold the current pointer.
423  Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
424  }
425 
426  // We've computed the grouped checks for this partition.
427  // Save the results and continue with the next one.
428  llvm::copy(Groups, std::back_inserter(CheckingGroups));
429  }
430 }
431 
433  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
434  unsigned PtrIdx2) {
435  return (PtrToPartition[PtrIdx1] != -1 &&
436  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
437 }
438 
439 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
440  const PointerInfo &PointerI = Pointers[I];
441  const PointerInfo &PointerJ = Pointers[J];
442 
443  // No need to check if two readonly pointers intersect.
444  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
445  return false;
446 
447  // Only need to check pointers between two different dependency sets.
448  if (PointerI.DependencySetId == PointerJ.DependencySetId)
449  return false;
450 
451  // Only need to check pointers in the same alias set.
452  if (PointerI.AliasSetId != PointerJ.AliasSetId)
453  return false;
454 
455  return true;
456 }
457 
460  unsigned Depth) const {
461  unsigned N = 0;
462  for (const auto &Check : Checks) {
463  const auto &First = Check.first->Members, &Second = Check.second->Members;
464 
465  OS.indent(Depth) << "Check " << N++ << ":\n";
466 
467  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
468  for (unsigned K = 0; K < First.size(); ++K)
469  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
470 
471  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
472  for (unsigned K = 0; K < Second.size(); ++K)
473  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
474  }
475 }
476 
478 
479  OS.indent(Depth) << "Run-time memory checks:\n";
480  printChecks(OS, Checks, Depth);
481 
482  OS.indent(Depth) << "Grouped accesses:\n";
483  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
484  const auto &CG = CheckingGroups[I];
485 
486  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
487  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
488  << ")\n";
489  for (unsigned J = 0; J < CG.Members.size(); ++J) {
490  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
491  << "\n";
492  }
493  }
494 }
495 
496 namespace {
497 
498 /// Analyses memory accesses in a loop.
499 ///
500 /// Checks whether run time pointer checks are needed and builds sets for data
501 /// dependence checking.
502 class AccessAnalysis {
503 public:
504  /// Read or write access location.
505  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
506  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
507 
508  AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
511  : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
512  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
513 
514  /// Register a load and whether it is only read from.
515  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
516  Value *Ptr = const_cast<Value*>(Loc.Ptr);
517  AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
518  Accesses.insert(MemAccessInfo(Ptr, false));
519  if (IsReadOnly)
520  ReadOnlyPtr.insert(Ptr);
521  }
522 
523  /// Register a store.
524  void addStore(MemoryLocation &Loc) {
525  Value *Ptr = const_cast<Value*>(Loc.Ptr);
526  AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
527  Accesses.insert(MemAccessInfo(Ptr, true));
528  }
529 
530  /// Check if we can emit a run-time no-alias check for \p Access.
531  ///
532  /// Returns true if we can emit a run-time no alias check for \p Access.
533  /// If we can check this access, this also adds it to a dependence set and
534  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
535  /// we will attempt to use additional run-time checks in order to get
536  /// the bounds of the pointer.
537  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
538  MemAccessInfo Access,
539  const ValueToValueMap &Strides,
540  DenseMap<Value *, unsigned> &DepSetId,
541  Loop *TheLoop, unsigned &RunningDepId,
542  unsigned ASId, bool ShouldCheckStride,
543  bool Assume);
544 
545  /// Check whether we can check the pointers at runtime for
546  /// non-intersection.
547  ///
548  /// Returns true if we need no check or if we do and we can generate them
549  /// (i.e. the pointers have computable bounds).
550  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
551  Loop *TheLoop, const ValueToValueMap &Strides,
552  bool ShouldCheckWrap = false);
553 
554  /// Goes over all memory accesses, checks whether a RT check is needed
555  /// and builds sets of dependent accesses.
556  void buildDependenceSets() {
557  processMemAccesses();
558  }
559 
560  /// Initial processing of memory accesses determined that we need to
561  /// perform dependency checking.
562  ///
563  /// Note that this can later be cleared if we retry memcheck analysis without
564  /// dependency checking (i.e. FoundNonConstantDistanceDependence).
565  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
566 
567  /// We decided that no dependence analysis would be used. Reset the state.
568  void resetDepChecks(MemoryDepChecker &DepChecker) {
569  CheckDeps.clear();
570  DepChecker.clearDependences();
571  }
572 
573  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
574 
575 private:
576  typedef SetVector<MemAccessInfo> PtrAccessSet;
577 
578  /// Go over all memory access and check whether runtime pointer checks
579  /// are needed and build sets of dependency check candidates.
580  void processMemAccesses();
581 
582  /// Set of all accesses.
583  PtrAccessSet Accesses;
584 
585  /// The loop being checked.
586  const Loop *TheLoop;
587 
588  /// List of accesses that need a further dependence check.
589  MemAccessInfoList CheckDeps;
590 
591  /// Set of pointers that are read only.
592  SmallPtrSet<Value*, 16> ReadOnlyPtr;
593 
594  /// An alias set tracker to partition the access set by underlying object and
595  //intrinsic property (such as TBAA metadata).
596  AliasSetTracker AST;
597 
598  LoopInfo *LI;
599 
600  /// Sets of potentially dependent accesses - members of one set share an
601  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
602  /// dependence check.
604 
605  /// Initial processing of memory accesses determined that we may need
606  /// to add memchecks. Perform the analysis to determine the necessary checks.
607  ///
608  /// Note that, this is different from isDependencyCheckNeeded. When we retry
609  /// memcheck analysis without dependency checking
610  /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
611  /// cleared while this remains set if we have potentially dependent accesses.
612  bool IsRTCheckAnalysisNeeded;
613 
614  /// The SCEV predicate containing all the SCEV-related assumptions.
616 };
617 
618 } // end anonymous namespace
619 
620 /// Check whether a pointer can participate in a runtime bounds check.
621 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
622 /// by adding run-time checks (overflow checks) if necessary.
624  const ValueToValueMap &Strides, Value *Ptr,
625  Loop *L, bool Assume) {
626  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
627 
628  // The bounds for loop-invariant pointer is trivial.
629  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
630  return true;
631 
632  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
633 
634  if (!AR && Assume)
635  AR = PSE.getAsAddRec(Ptr);
636 
637  if (!AR)
638  return false;
639 
640  return AR->isAffine();
641 }
642 
643 /// Check whether a pointer address cannot wrap.
645  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
646  const SCEV *PtrScev = PSE.getSCEV(Ptr);
647  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
648  return true;
649 
650  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
651  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
652  return true;
653 
654  return false;
655 }
656 
657 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
658  MemAccessInfo Access,
659  const ValueToValueMap &StridesMap,
660  DenseMap<Value *, unsigned> &DepSetId,
661  Loop *TheLoop, unsigned &RunningDepId,
662  unsigned ASId, bool ShouldCheckWrap,
663  bool Assume) {
664  Value *Ptr = Access.getPointer();
665 
666  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
667  return false;
668 
669  // When we run after a failing dependency check we have to make sure
670  // we don't have wrapping pointers.
671  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
672  auto *Expr = PSE.getSCEV(Ptr);
673  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
674  return false;
675  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
676  }
677 
678  // The id of the dependence set.
679  unsigned DepId;
680 
681  if (isDependencyCheckNeeded()) {
682  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
683  unsigned &LeaderId = DepSetId[Leader];
684  if (!LeaderId)
685  LeaderId = RunningDepId++;
686  DepId = LeaderId;
687  } else
688  // Each access has its own dependence set.
689  DepId = RunningDepId++;
690 
691  bool IsWrite = Access.getInt();
692  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
693  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
694 
695  return true;
696  }
697 
698 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
699  ScalarEvolution *SE, Loop *TheLoop,
700  const ValueToValueMap &StridesMap,
701  bool ShouldCheckWrap) {
702  // Find pointers with computable bounds. We are going to use this information
703  // to place a runtime bound check.
704  bool CanDoRT = true;
705 
706  bool MayNeedRTCheck = false;
707  if (!IsRTCheckAnalysisNeeded) return true;
708 
709  bool IsDepCheckNeeded = isDependencyCheckNeeded();
710 
711  // We assign a consecutive id to access from different alias sets.
712  // Accesses between different groups doesn't need to be checked.
713  unsigned ASId = 0;
714  for (auto &AS : AST) {
715  int NumReadPtrChecks = 0;
716  int NumWritePtrChecks = 0;
717  bool CanDoAliasSetRT = true;
718  ++ASId;
719 
720  // We assign consecutive id to access from different dependence sets.
721  // Accesses within the same set don't need a runtime check.
722  unsigned RunningDepId = 1;
724 
726 
727  // First, count how many write and read accesses are in the alias set. Also
728  // collect MemAccessInfos for later.
729  SmallVector<MemAccessInfo, 4> AccessInfos;
730  for (const auto &A : AS) {
731  Value *Ptr = A.getValue();
732  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
733 
734  if (IsWrite)
735  ++NumWritePtrChecks;
736  else
737  ++NumReadPtrChecks;
738  AccessInfos.emplace_back(Ptr, IsWrite);
739  }
740 
741  // We do not need runtime checks for this alias set, if there are no writes
742  // or a single write and no reads.
743  if (NumWritePtrChecks == 0 ||
744  (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
745  assert((AS.size() <= 1 ||
746  all_of(AS,
747  [this](auto AC) {
748  MemAccessInfo AccessWrite(AC.getValue(), true);
749  return DepCands.findValue(AccessWrite) == DepCands.end();
750  })) &&
751  "Can only skip updating CanDoRT below, if all entries in AS "
752  "are reads or there is at most 1 entry");
753  continue;
754  }
755 
756  for (auto &Access : AccessInfos) {
757  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
758  RunningDepId, ASId, ShouldCheckWrap, false)) {
759  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
760  << *Access.getPointer() << '\n');
761  Retries.push_back(Access);
762  CanDoAliasSetRT = false;
763  }
764  }
765 
766  // Note that this function computes CanDoRT and MayNeedRTCheck
767  // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
768  // we have a pointer for which we couldn't find the bounds but we don't
769  // actually need to emit any checks so it does not matter.
770  //
771  // We need runtime checks for this alias set, if there are at least 2
772  // dependence sets (in which case RunningDepId > 2) or if we need to re-try
773  // any bound checks (because in that case the number of dependence sets is
774  // incomplete).
775  bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
776 
777  // We need to perform run-time alias checks, but some pointers had bounds
778  // that couldn't be checked.
779  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
780  // Reset the CanDoSetRt flag and retry all accesses that have failed.
781  // We know that we need these checks, so we can now be more aggressive
782  // and add further checks if required (overflow checks).
783  CanDoAliasSetRT = true;
784  for (auto Access : Retries)
785  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
786  TheLoop, RunningDepId, ASId,
787  ShouldCheckWrap, /*Assume=*/true)) {
788  CanDoAliasSetRT = false;
789  break;
790  }
791  }
792 
793  CanDoRT &= CanDoAliasSetRT;
794  MayNeedRTCheck |= NeedsAliasSetRTCheck;
795  ++ASId;
796  }
797 
798  // If the pointers that we would use for the bounds comparison have different
799  // address spaces, assume the values aren't directly comparable, so we can't
800  // use them for the runtime check. We also have to assume they could
801  // overlap. In the future there should be metadata for whether address spaces
802  // are disjoint.
803  unsigned NumPointers = RtCheck.Pointers.size();
804  for (unsigned i = 0; i < NumPointers; ++i) {
805  for (unsigned j = i + 1; j < NumPointers; ++j) {
806  // Only need to check pointers between two different dependency sets.
807  if (RtCheck.Pointers[i].DependencySetId ==
808  RtCheck.Pointers[j].DependencySetId)
809  continue;
810  // Only need to check pointers in the same alias set.
811  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
812  continue;
813 
814  Value *PtrI = RtCheck.Pointers[i].PointerValue;
815  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
816 
817  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
818  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
819  if (ASi != ASj) {
820  LLVM_DEBUG(
821  dbgs() << "LAA: Runtime check would require comparison between"
822  " different address spaces\n");
823  return false;
824  }
825  }
826  }
827 
828  if (MayNeedRTCheck && CanDoRT)
829  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
830 
831  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
832  << " pointer comparisons.\n");
833 
834  // If we can do run-time checks, but there are no checks, no runtime checks
835  // are needed. This can happen when all pointers point to the same underlying
836  // object for example.
837  RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
838 
839  bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
840  if (!CanDoRTIfNeeded)
841  RtCheck.reset();
842  return CanDoRTIfNeeded;
843 }
844 
845 void AccessAnalysis::processMemAccesses() {
846  // We process the set twice: first we process read-write pointers, last we
847  // process read-only pointers. This allows us to skip dependence tests for
848  // read-only pointers.
849 
850  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
851  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
852  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
853  LLVM_DEBUG({
854  for (auto A : Accesses)
855  dbgs() << "\t" << *A.getPointer() << " (" <<
856  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
857  "read-only" : "read")) << ")\n";
858  });
859 
860  // The AliasSetTracker has nicely partitioned our pointers by metadata
861  // compatibility and potential for underlying-object overlap. As a result, we
862  // only need to check for potential pointer dependencies within each alias
863  // set.
864  for (const auto &AS : AST) {
865  // Note that both the alias-set tracker and the alias sets themselves used
866  // linked lists internally and so the iteration order here is deterministic
867  // (matching the original instruction order within each set).
868 
869  bool SetHasWrite = false;
870 
871  // Map of pointers to last access encountered.
872  typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
873  UnderlyingObjToAccessMap ObjToLastAccess;
874 
875  // Set of access to check after all writes have been processed.
876  PtrAccessSet DeferredAccesses;
877 
878  // Iterate over each alias set twice, once to process read/write pointers,
879  // and then to process read-only pointers.
880  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
881  bool UseDeferred = SetIteration > 0;
882  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
883 
884  for (const auto &AV : AS) {
885  Value *Ptr = AV.getValue();
886 
887  // For a single memory access in AliasSetTracker, Accesses may contain
888  // both read and write, and they both need to be handled for CheckDeps.
889  for (const auto &AC : S) {
890  if (AC.getPointer() != Ptr)
891  continue;
892 
893  bool IsWrite = AC.getInt();
894 
895  // If we're using the deferred access set, then it contains only
896  // reads.
897  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
898  if (UseDeferred && !IsReadOnlyPtr)
899  continue;
900  // Otherwise, the pointer must be in the PtrAccessSet, either as a
901  // read or a write.
902  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
903  S.count(MemAccessInfo(Ptr, false))) &&
904  "Alias-set pointer not in the access set?");
905 
906  MemAccessInfo Access(Ptr, IsWrite);
907  DepCands.insert(Access);
908 
909  // Memorize read-only pointers for later processing and skip them in
910  // the first round (they need to be checked after we have seen all
911  // write pointers). Note: we also mark pointer that are not
912  // consecutive as "read-only" pointers (so that we check
913  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
914  if (!UseDeferred && IsReadOnlyPtr) {
915  DeferredAccesses.insert(Access);
916  continue;
917  }
918 
919  // If this is a write - check other reads and writes for conflicts. If
920  // this is a read only check other writes for conflicts (but only if
921  // there is no other write to the ptr - this is an optimization to
922  // catch "a[i] = a[i] + " without having to do a dependence check).
923  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
924  CheckDeps.push_back(Access);
925  IsRTCheckAnalysisNeeded = true;
926  }
927 
928  if (IsWrite)
929  SetHasWrite = true;
930 
931  // Create sets of pointers connected by a shared alias set and
932  // underlying object.
933  typedef SmallVector<const Value *, 16> ValueVector;
934  ValueVector TempObjects;
935 
936  getUnderlyingObjects(Ptr, TempObjects, LI);
937  LLVM_DEBUG(dbgs()
938  << "Underlying objects for pointer " << *Ptr << "\n");
939  for (const Value *UnderlyingObj : TempObjects) {
940  // nullptr never alias, don't join sets for pointer that have "null"
941  // in their UnderlyingObjects list.
942  if (isa<ConstantPointerNull>(UnderlyingObj) &&
944  TheLoop->getHeader()->getParent(),
945  UnderlyingObj->getType()->getPointerAddressSpace()))
946  continue;
947 
948  UnderlyingObjToAccessMap::iterator Prev =
949  ObjToLastAccess.find(UnderlyingObj);
950  if (Prev != ObjToLastAccess.end())
951  DepCands.unionSets(Access, Prev->second);
952 
953  ObjToLastAccess[UnderlyingObj] = Access;
954  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
955  }
956  }
957  }
958  }
959  }
960 }
961 
962 static bool isInBoundsGep(Value *Ptr) {
963  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
964  return GEP->isInBounds();
965  return false;
966 }
967 
968 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
969 /// i.e. monotonically increasing/decreasing.
970 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
971  PredicatedScalarEvolution &PSE, const Loop *L) {
972  // FIXME: This should probably only return true for NUW.
974  return true;
975 
976  // Scalar evolution does not propagate the non-wrapping flags to values that
977  // are derived from a non-wrapping induction variable because non-wrapping
978  // could be flow-sensitive.
979  //
980  // Look through the potentially overflowing instruction to try to prove
981  // non-wrapping for the *specific* value of Ptr.
982 
983  // The arithmetic implied by an inbounds GEP can't overflow.
984  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
985  if (!GEP || !GEP->isInBounds())
986  return false;
987 
988  // Make sure there is only one non-const index and analyze that.
989  Value *NonConstIndex = nullptr;
990  for (Value *Index : GEP->indices())
991  if (!isa<ConstantInt>(Index)) {
992  if (NonConstIndex)
993  return false;
994  NonConstIndex = Index;
995  }
996  if (!NonConstIndex)
997  // The recurrence is on the pointer, ignore for now.
998  return false;
999 
1000  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
1001  // AddRec using a NSW operation.
1002  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1003  if (OBO->hasNoSignedWrap() &&
1004  // Assume constant for other the operand so that the AddRec can be
1005  // easily found.
1006  isa<ConstantInt>(OBO->getOperand(1))) {
1007  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
1008 
1009  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1010  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
1011  }
1012 
1013  return false;
1014 }
1015 
1016 /// Check whether the access through \p Ptr has a constant stride.
1018  const Loop *Lp, const ValueToValueMap &StridesMap,
1019  bool Assume, bool ShouldCheckWrap) {
1020  Type *Ty = Ptr->getType();
1021  assert(Ty->isPointerTy() && "Unexpected non-ptr");
1022 
1023  // Make sure that the pointer does not point to aggregate types.
1024  auto *PtrTy = cast<PointerType>(Ty);
1025  if (PtrTy->getElementType()->isAggregateType()) {
1026  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
1027  << *Ptr << "\n");
1028  return 0;
1029  }
1030 
1031  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1032 
1033  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1034  if (Assume && !AR)
1035  AR = PSE.getAsAddRec(Ptr);
1036 
1037  if (!AR) {
1038  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1039  << " SCEV: " << *PtrScev << "\n");
1040  return 0;
1041  }
1042 
1043  // The access function must stride over the innermost loop.
1044  if (Lp != AR->getLoop()) {
1045  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1046  << *Ptr << " SCEV: " << *AR << "\n");
1047  return 0;
1048  }
1049 
1050  // The address calculation must not wrap. Otherwise, a dependence could be
1051  // inverted.
1052  // An inbounds getelementptr that is a AddRec with a unit stride
1053  // cannot wrap per definition. The unit stride requirement is checked later.
1054  // An getelementptr without an inbounds attribute and unit stride would have
1055  // to access the pointer value "0" which is undefined behavior in address
1056  // space 0, therefore we can also vectorize this case.
1057  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1058  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1060  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1061  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1063  PtrTy->getAddressSpace())) {
1064  if (Assume) {
1066  IsNoWrapAddRec = true;
1067  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1068  << "LAA: Pointer: " << *Ptr << "\n"
1069  << "LAA: SCEV: " << *AR << "\n"
1070  << "LAA: Added an overflow assumption\n");
1071  } else {
1072  LLVM_DEBUG(
1073  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1074  << *Ptr << " SCEV: " << *AR << "\n");
1075  return 0;
1076  }
1077  }
1078 
1079  // Check the step is constant.
1080  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1081 
1082  // Calculate the pointer stride and check if it is constant.
1083  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1084  if (!C) {
1085  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1086  << " SCEV: " << *AR << "\n");
1087  return 0;
1088  }
1089 
1090  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1091  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1092  const APInt &APStepVal = C->getAPInt();
1093 
1094  // Huge step value - give up.
1095  if (APStepVal.getBitWidth() > 64)
1096  return 0;
1097 
1098  int64_t StepVal = APStepVal.getSExtValue();
1099 
1100  // Strided access.
1101  int64_t Stride = StepVal / Size;
1102  int64_t Rem = StepVal % Size;
1103  if (Rem)
1104  return 0;
1105 
1106  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1107  // know we can't "wrap around the address space". In case of address space
1108  // zero we know that this won't happen without triggering undefined behavior.
1109  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1110  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1111  PtrTy->getAddressSpace()))) {
1112  if (Assume) {
1113  // We can avoid this case by adding a run-time check.
1114  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1115  << "inbounds or in address space 0 may wrap:\n"
1116  << "LAA: Pointer: " << *Ptr << "\n"
1117  << "LAA: SCEV: " << *AR << "\n"
1118  << "LAA: Added an overflow assumption\n");
1120  } else
1121  return 0;
1122  }
1123 
1124  return Stride;
1125 }
1126 
1128  const DataLayout &DL, ScalarEvolution &SE,
1129  bool StrictCheck, bool CheckType) {
1130  assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1131  // Make sure that A and B are different pointers.
1132  if (PtrA == PtrB)
1133  return 0;
1134 
1135  // Make sure that PtrA and PtrB have the same type if required
1136  if (CheckType && PtrA->getType() != PtrB->getType())
1137  return None;
1138 
1139  unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1140  unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1141 
1142  // Check that the address spaces match.
1143  if (ASA != ASB)
1144  return None;
1145  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1146 
1147  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1148  Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1149  Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1150 
1151  int Val;
1152  if (PtrA1 == PtrB1) {
1153  // Retrieve the address space again as pointer stripping now tracks through
1154  // `addrspacecast`.
1155  ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1156  ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1157  // Check that the address spaces match and that the pointers are valid.
1158  if (ASA != ASB)
1159  return None;
1160 
1161  IdxWidth = DL.getIndexSizeInBits(ASA);
1162  OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1163  OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1164 
1165  OffsetB -= OffsetA;
1166  Val = OffsetB.getSExtValue();
1167  } else {
1168  // Otherwise compute the distance with SCEV between the base pointers.
1169  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1170  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1171  const auto *Diff =
1172  dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
1173  if (!Diff)
1174  return None;
1175  Val = Diff->getAPInt().getSExtValue();
1176  }
1177  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1178  int Size = DL.getTypeStoreSize(Ty);
1179  int Dist = Val / Size;
1180 
1181  // Ensure that the calculated distance matches the type-based one after all
1182  // the bitcasts removal in the provided pointers.
1183  if (!StrictCheck || Dist * Size == Val)
1184  return Dist;
1185  return None;
1186 }
1187 
1189  ScalarEvolution &SE,
1190  SmallVectorImpl<unsigned> &SortedIndices) {
1192  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1193  "Expected list of pointer operands.");
1194  // Walk over the pointers, and map each of them to an offset relative to
1195  // first pointer in the array.
1196  Value *Ptr0 = VL[0];
1197 
1198  using DistOrdPair = std::pair<int64_t, int>;
1199  auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
1200  return L.first < R.first;
1201  };
1202  std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1203  Offsets.emplace(0, 0);
1204  int Cnt = 1;
1205  bool IsConsecutive = true;
1206  for (auto *Ptr : VL.drop_front()) {
1207  Optional<int> Diff =
1208  getPointersDiff(Ptr0, Ptr, DL, SE, /*StrictCheck=*/true);
1209  if (!Diff)
1210  return false;
1211 
1212  // Check if the pointer with the same offset is found.
1213  int64_t Offset = *Diff;
1214  auto Res = Offsets.emplace(Offset, Cnt);
1215  if (!Res.second)
1216  return false;
1217  // Consecutive order if the inserted element is the last one.
1218  IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
1219  ++Cnt;
1220  }
1221  SortedIndices.clear();
1222  if (!IsConsecutive) {
1223  // Fill SortedIndices array only if it is non-consecutive.
1224  SortedIndices.resize(VL.size());
1225  Cnt = 0;
1226  for (const std::pair<int64_t, int> &Pair : Offsets) {
1227  SortedIndices[Cnt] = Pair.second;
1228  ++Cnt;
1229  }
1230  }
1231  return true;
1232 }
1233 
1234 /// Returns true if the memory operations \p A and \p B are consecutive.
1236  ScalarEvolution &SE, bool CheckType) {
1237  Value *PtrA = getLoadStorePointerOperand(A);
1239  if (!PtrA || !PtrB)
1240  return false;
1241  Optional<int> Diff =
1242  getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType);
1243  return Diff && *Diff == 1;
1244 }
1245 
1248  switch (Type) {
1249  case NoDep:
1250  case Forward:
1251  case BackwardVectorizable:
1253 
1254  case Unknown:
1257  case Backward:
1260  }
1261  llvm_unreachable("unexpected DepType!");
1262 }
1263 
1265  switch (Type) {
1266  case NoDep:
1267  case Forward:
1268  case ForwardButPreventsForwarding:
1269  case Unknown:
1270  return false;
1271 
1272  case BackwardVectorizable:
1273  case Backward:
1274  case BackwardVectorizableButPreventsForwarding:
1275  return true;
1276  }
1277  llvm_unreachable("unexpected DepType!");
1278 }
1279 
1281  return isBackward() || Type == Unknown;
1282 }
1283 
1285  switch (Type) {
1286  case Forward:
1287  case ForwardButPreventsForwarding:
1288  return true;
1289 
1290  case NoDep:
1291  case Unknown:
1292  case BackwardVectorizable:
1293  case Backward:
1294  case BackwardVectorizableButPreventsForwarding:
1295  return false;
1296  }
1297  llvm_unreachable("unexpected DepType!");
1298 }
1299 
1300 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1301  uint64_t TypeByteSize) {
1302  // If loads occur at a distance that is not a multiple of a feasible vector
1303  // factor store-load forwarding does not take place.
1304  // Positive dependences might cause troubles because vectorizing them might
1305  // prevent store-load forwarding making vectorized code run a lot slower.
1306  // a[i] = a[i-3] ^ a[i-8];
1307  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1308  // hence on your typical architecture store-load forwarding does not take
1309  // place. Vectorizing in such cases does not make sense.
1310  // Store-load forwarding distance.
1311 
1312  // After this many iterations store-to-load forwarding conflicts should not
1313  // cause any slowdowns.
1314  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1315  // Maximum vector factor.
1316  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1317  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1318 
1319  // Compute the smallest VF at which the store and load would be misaligned.
1320  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1321  VF *= 2) {
1322  // If the number of vector iteration between the store and the load are
1323  // small we could incur conflicts.
1324  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1325  MaxVFWithoutSLForwardIssues = (VF >> 1);
1326  break;
1327  }
1328  }
1329 
1330  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1331  LLVM_DEBUG(
1332  dbgs() << "LAA: Distance " << Distance
1333  << " that could cause a store-load forwarding conflict\n");
1334  return true;
1335  }
1336 
1337  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1338  MaxVFWithoutSLForwardIssues !=
1339  VectorizerParams::MaxVectorWidth * TypeByteSize)
1340  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1341  return false;
1342 }
1343 
1344 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1345  if (Status < S)
1346  Status = S;
1347 }
1348 
1349 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1350 /// memory accesses, that have the same stride whose absolute value is given
1351 /// in \p Stride, and that have the same type size \p TypeByteSize,
1352 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1353 /// possible to prove statically that the dependence distance is larger
1354 /// than the range that the accesses will travel through the execution of
1355 /// the loop. If so, return true; false otherwise. This is useful for
1356 /// example in loops such as the following (PR31098):
1357 /// for (i = 0; i < D; ++i) {
1358 /// = out[i];
1359 /// out[i+D] =
1360 /// }
1362  const SCEV &BackedgeTakenCount,
1363  const SCEV &Dist, uint64_t Stride,
1364  uint64_t TypeByteSize) {
1365 
1366  // If we can prove that
1367  // (**) |Dist| > BackedgeTakenCount * Step
1368  // where Step is the absolute stride of the memory accesses in bytes,
1369  // then there is no dependence.
1370  //
1371  // Rationale:
1372  // We basically want to check if the absolute distance (|Dist/Step|)
1373  // is >= the loop iteration count (or > BackedgeTakenCount).
1374  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1375  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1376  // that the dependence distance is >= VF; This is checked elsewhere.
1377  // But in some cases we can prune unknown dependence distances early, and
1378  // even before selecting the VF, and without a runtime test, by comparing
1379  // the distance against the loop iteration count. Since the vectorized code
1380  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1381  // also guarantees that distance >= VF.
1382  //
1383  const uint64_t ByteStride = Stride * TypeByteSize;
1384  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1385  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1386 
1387  const SCEV *CastedDist = &Dist;
1388  const SCEV *CastedProduct = Product;
1389  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1390  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1391 
1392  // The dependence distance can be positive/negative, so we sign extend Dist;
1393  // The multiplication of the absolute stride in bytes and the
1394  // backedgeTakenCount is non-negative, so we zero extend Product.
1395  if (DistTypeSize > ProductTypeSize)
1396  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1397  else
1398  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1399 
1400  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1401  // (If so, then we have proven (**) because |Dist| >= Dist)
1402  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1403  if (SE.isKnownPositive(Minus))
1404  return true;
1405 
1406  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1407  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1408  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1409  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1410  if (SE.isKnownPositive(Minus))
1411  return true;
1412 
1413  return false;
1414 }
1415 
1416 /// Check the dependence for two accesses with the same stride \p Stride.
1417 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1418 /// bytes.
1419 ///
1420 /// \returns true if they are independent.
1421 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1422  uint64_t TypeByteSize) {
1423  assert(Stride > 1 && "The stride must be greater than 1");
1424  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1425  assert(Distance > 0 && "The distance must be non-zero");
1426 
1427  // Skip if the distance is not multiple of type byte size.
1428  if (Distance % TypeByteSize)
1429  return false;
1430 
1431  uint64_t ScaledDist = Distance / TypeByteSize;
1432 
1433  // No dependence if the scaled distance is not multiple of the stride.
1434  // E.g.
1435  // for (i = 0; i < 1024 ; i += 4)
1436  // A[i+2] = A[i] + 1;
1437  //
1438  // Two accesses in memory (scaled distance is 2, stride is 4):
1439  // | A[0] | | | | A[4] | | | |
1440  // | | | A[2] | | | | A[6] | |
1441  //
1442  // E.g.
1443  // for (i = 0; i < 1024 ; i += 3)
1444  // A[i+4] = A[i] + 1;
1445  //
1446  // Two accesses in memory (scaled distance is 4, stride is 3):
1447  // | A[0] | | | A[3] | | | A[6] | | |
1448  // | | | | | A[4] | | | A[7] | |
1449  return ScaledDist % Stride;
1450 }
1451 
1453 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1454  const MemAccessInfo &B, unsigned BIdx,
1455  const ValueToValueMap &Strides) {
1456  assert (AIdx < BIdx && "Must pass arguments in program order");
1457 
1458  Value *APtr = A.getPointer();
1459  Value *BPtr = B.getPointer();
1460  bool AIsWrite = A.getInt();
1461  bool BIsWrite = B.getInt();
1462 
1463  // Two reads are independent.
1464  if (!AIsWrite && !BIsWrite)
1465  return Dependence::NoDep;
1466 
1467  // We cannot check pointers in different address spaces.
1468  if (APtr->getType()->getPointerAddressSpace() !=
1469  BPtr->getType()->getPointerAddressSpace())
1470  return Dependence::Unknown;
1471 
1472  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1473  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1474 
1475  const SCEV *Src = PSE.getSCEV(APtr);
1476  const SCEV *Sink = PSE.getSCEV(BPtr);
1477 
1478  // If the induction step is negative we have to invert source and sink of the
1479  // dependence.
1480  if (StrideAPtr < 0) {
1481  std::swap(APtr, BPtr);
1482  std::swap(Src, Sink);
1483  std::swap(AIsWrite, BIsWrite);
1484  std::swap(AIdx, BIdx);
1485  std::swap(StrideAPtr, StrideBPtr);
1486  }
1487 
1488  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1489 
1490  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1491  << "(Induction step: " << StrideAPtr << ")\n");
1492  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1493  << *InstMap[BIdx] << ": " << *Dist << "\n");
1494 
1495  // Need accesses with constant stride. We don't want to vectorize
1496  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1497  // the address space.
1498  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1499  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1500  return Dependence::Unknown;
1501  }
1502 
1503  Type *ATy = APtr->getType()->getPointerElementType();
1504  Type *BTy = BPtr->getType()->getPointerElementType();
1505  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1506  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1507  uint64_t Stride = std::abs(StrideAPtr);
1508  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1509  if (!C) {
1510  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1511  isSafeDependenceDistance(DL, *(PSE.getSE()),
1512  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1513  TypeByteSize))
1514  return Dependence::NoDep;
1515 
1516  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1517  FoundNonConstantDistanceDependence = true;
1518  return Dependence::Unknown;
1519  }
1520 
1521  const APInt &Val = C->getAPInt();
1522  int64_t Distance = Val.getSExtValue();
1523 
1524  // Attempt to prove strided accesses independent.
1525  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1526  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1527  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1528  return Dependence::NoDep;
1529  }
1530 
1531  // Negative distances are not plausible dependencies.
1532  if (Val.isNegative()) {
1533  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1534  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1535  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1536  ATy != BTy)) {
1537  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1539  }
1540 
1541  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1542  return Dependence::Forward;
1543  }
1544 
1545  // Write to the same location with the same size.
1546  // Could be improved to assert type sizes are the same (i32 == float, etc).
1547  if (Val == 0) {
1548  if (ATy == BTy)
1549  return Dependence::Forward;
1550  LLVM_DEBUG(
1551  dbgs() << "LAA: Zero dependence difference but different types\n");
1552  return Dependence::Unknown;
1553  }
1554 
1555  assert(Val.isStrictlyPositive() && "Expect a positive value");
1556 
1557  if (ATy != BTy) {
1558  LLVM_DEBUG(
1559  dbgs()
1560  << "LAA: ReadWrite-Write positive dependency with different types\n");
1561  return Dependence::Unknown;
1562  }
1563 
1564  // Bail out early if passed-in parameters make vectorization not feasible.
1565  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1567  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1569  // The minimum number of iterations for a vectorized/unrolled version.
1570  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1571 
1572  // It's not vectorizable if the distance is smaller than the minimum distance
1573  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1574  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1575  // TypeByteSize (No need to plus the last gap distance).
1576  //
1577  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1578  // foo(int *A) {
1579  // int *B = (int *)((char *)A + 14);
1580  // for (i = 0 ; i < 1024 ; i += 2)
1581  // B[i] = A[i] + 1;
1582  // }
1583  //
1584  // Two accesses in memory (stride is 2):
1585  // | A[0] | | A[2] | | A[4] | | A[6] | |
1586  // | B[0] | | B[2] | | B[4] |
1587  //
1588  // Distance needs for vectorizing iterations except the last iteration:
1589  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1590  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1591  //
1592  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1593  // 12, which is less than distance.
1594  //
1595  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1596  // the minimum distance needed is 28, which is greater than distance. It is
1597  // not safe to do vectorization.
1598  uint64_t MinDistanceNeeded =
1599  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1600  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1601  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1602  << Distance << '\n');
1603  return Dependence::Backward;
1604  }
1605 
1606  // Unsafe if the minimum distance needed is greater than max safe distance.
1607  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1608  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1609  << MinDistanceNeeded << " size in bytes");
1610  return Dependence::Backward;
1611  }
1612 
1613  // Positive distance bigger than max vectorization factor.
1614  // FIXME: Should use max factor instead of max distance in bytes, which could
1615  // not handle different types.
1616  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1617  // void foo (int *A, char *B) {
1618  // for (unsigned i = 0; i < 1024; i++) {
1619  // A[i+2] = A[i] + 1;
1620  // B[i+2] = B[i] + 1;
1621  // }
1622  // }
1623  //
1624  // This case is currently unsafe according to the max safe distance. If we
1625  // analyze the two accesses on array B, the max safe dependence distance
1626  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1627  // is 8, which is less than 2 and forbidden vectorization, But actually
1628  // both A and B could be vectorized by 2 iterations.
1629  MaxSafeDepDistBytes =
1630  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1631 
1632  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1633  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1634  couldPreventStoreLoadForward(Distance, TypeByteSize))
1636 
1637  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1638  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1639  << " with max VF = " << MaxVF << '\n');
1640  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1641  MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
1643 }
1644 
1646  MemAccessInfoList &CheckDeps,
1647  const ValueToValueMap &Strides) {
1648 
1649  MaxSafeDepDistBytes = -1;
1651  for (MemAccessInfo CurAccess : CheckDeps) {
1652  if (Visited.count(CurAccess))
1653  continue;
1654 
1655  // Get the relevant memory access set.
1657  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1658 
1659  // Check accesses within this set.
1661  AccessSets.member_begin(I);
1663  AccessSets.member_end();
1664 
1665  // Check every access pair.
1666  while (AI != AE) {
1667  Visited.insert(*AI);
1668  bool AIIsWrite = AI->getInt();
1669  // Check loads only against next equivalent class, but stores also against
1670  // other stores in the same equivalence class - to the same address.
1672  (AIIsWrite ? AI : std::next(AI));
1673  while (OI != AE) {
1674  // Check every accessing instruction pair in program order.
1675  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1676  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1677  // Scan all accesses of another equivalence class, but only the next
1678  // accesses of the same equivalent class.
1679  for (std::vector<unsigned>::iterator
1680  I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
1681  I2E = (OI == AI ? I1E : Accesses[*OI].end());
1682  I2 != I2E; ++I2) {
1683  auto A = std::make_pair(&*AI, *I1);
1684  auto B = std::make_pair(&*OI, *I2);
1685 
1686  assert(*I1 != *I2);
1687  if (*I1 > *I2)
1688  std::swap(A, B);
1689 
1691  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1692  mergeInStatus(Dependence::isSafeForVectorization(Type));
1693 
1694  // Gather dependences unless we accumulated MaxDependences
1695  // dependences. In that case return as soon as we find the first
1696  // unsafe dependence. This puts a limit on this quadratic
1697  // algorithm.
1698  if (RecordDependences) {
1699  if (Type != Dependence::NoDep)
1700  Dependences.push_back(Dependence(A.second, B.second, Type));
1701 
1702  if (Dependences.size() >= MaxDependences) {
1703  RecordDependences = false;
1704  Dependences.clear();
1705  LLVM_DEBUG(dbgs()
1706  << "Too many dependences, stopped recording\n");
1707  }
1708  }
1709  if (!RecordDependences && !isSafeForVectorization())
1710  return false;
1711  }
1712  ++OI;
1713  }
1714  AI++;
1715  }
1716  }
1717 
1718  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1719  return isSafeForVectorization();
1720 }
1721 
1724  MemAccessInfo Access(Ptr, isWrite);
1725  auto &IndexVector = Accesses.find(Access)->second;
1726 
1728  transform(IndexVector,
1729  std::back_inserter(Insts),
1730  [&](unsigned Idx) { return this->InstMap[Idx]; });
1731  return Insts;
1732 }
1733 
1734 const char *MemoryDepChecker::Dependence::DepName[] = {
1735  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1736  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1737 
1739  raw_ostream &OS, unsigned Depth,
1740  const SmallVectorImpl<Instruction *> &Instrs) const {
1741  OS.indent(Depth) << DepName[Type] << ":\n";
1742  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1743  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1744 }
1745 
1746 bool LoopAccessInfo::canAnalyzeLoop() {
1747  // We need to have a loop header.
1748  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1749  << TheLoop->getHeader()->getParent()->getName() << ": "
1750  << TheLoop->getHeader()->getName() << '\n');
1751 
1752  // We can only analyze innermost loops.
1753  if (!TheLoop->isInnermost()) {
1754  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1755  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1756  return false;
1757  }
1758 
1759  // We must have a single backedge.
1760  if (TheLoop->getNumBackEdges() != 1) {
1761  LLVM_DEBUG(
1762  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1763  recordAnalysis("CFGNotUnderstood")
1764  << "loop control flow is not understood by analyzer";
1765  return false;
1766  }
1767 
1768  // ScalarEvolution needs to be able to find the exit count.
1769  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1770  if (isa<SCEVCouldNotCompute>(ExitCount)) {
1771  recordAnalysis("CantComputeNumberOfIterations")
1772  << "could not determine number of loop iterations";
1773  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1774  return false;
1775  }
1776 
1777  return true;
1778 }
1779 
1780 void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
1781  const TargetLibraryInfo *TLI,
1782  DominatorTree *DT) {
1783  typedef SmallPtrSet<Value*, 16> ValueSet;
1784 
1785  // Holds the Load and Store instructions.
1788 
1789  // Holds all the different accesses in the loop.
1790  unsigned NumReads = 0;
1791  unsigned NumReadWrites = 0;
1792 
1793  bool HasComplexMemInst = false;
1794 
1795  // A runtime check is only legal to insert if there are no convergent calls.
1796  HasConvergentOp = false;
1797 
1798  PtrRtChecking->Pointers.clear();
1799  PtrRtChecking->Need = false;
1800 
1801  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1802 
1803  const bool EnableMemAccessVersioningOfLoop =
1805  !TheLoop->getHeader()->getParent()->hasOptSize();
1806 
1807  // For each block.
1808  for (BasicBlock *BB : TheLoop->blocks()) {
1809  // Scan the BB and collect legal loads and stores. Also detect any
1810  // convergent instructions.
1811  for (Instruction &I : *BB) {
1812  if (auto *Call = dyn_cast<CallBase>(&I)) {
1813  if (Call->isConvergent())
1814  HasConvergentOp = true;
1815  }
1816 
1817  // With both a non-vectorizable memory instruction and a convergent
1818  // operation, found in this loop, no reason to continue the search.
1819  if (HasComplexMemInst && HasConvergentOp) {
1820  CanVecMem = false;
1821  return;
1822  }
1823 
1824  // Avoid hitting recordAnalysis multiple times.
1825  if (HasComplexMemInst)
1826  continue;
1827 
1828  // If this is a load, save it. If this instruction can read from memory
1829  // but is not a load, then we quit. Notice that we don't handle function
1830  // calls that read or write.
1831  if (I.mayReadFromMemory()) {
1832  // Many math library functions read the rounding mode. We will only
1833  // vectorize a loop if it contains known function calls that don't set
1834  // the flag. Therefore, it is safe to ignore this read from memory.
1835  auto *Call = dyn_cast<CallInst>(&I);
1836  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1837  continue;
1838 
1839  // If the function has an explicit vectorized counterpart, we can safely
1840  // assume that it can be vectorized.
1841  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1842  !VFDatabase::getMappings(*Call).empty())
1843  continue;
1844 
1845  auto *Ld = dyn_cast<LoadInst>(&I);
1846  if (!Ld) {
1847  recordAnalysis("CantVectorizeInstruction", Ld)
1848  << "instruction cannot be vectorized";
1849  HasComplexMemInst = true;
1850  continue;
1851  }
1852  if (!Ld->isSimple() && !IsAnnotatedParallel) {
1853  recordAnalysis("NonSimpleLoad", Ld)
1854  << "read with atomic ordering or volatile read";
1855  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1856  HasComplexMemInst = true;
1857  continue;
1858  }
1859  NumLoads++;
1860  Loads.push_back(Ld);
1861  DepChecker->addAccess(Ld);
1862  if (EnableMemAccessVersioningOfLoop)
1863  collectStridedAccess(Ld);
1864  continue;
1865  }
1866 
1867  // Save 'store' instructions. Abort if other instructions write to memory.
1868  if (I.mayWriteToMemory()) {
1869  auto *St = dyn_cast<StoreInst>(&I);
1870  if (!St) {
1871  recordAnalysis("CantVectorizeInstruction", St)
1872  << "instruction cannot be vectorized";
1873  HasComplexMemInst = true;
1874  continue;
1875  }
1876  if (!St->isSimple() && !IsAnnotatedParallel) {
1877  recordAnalysis("NonSimpleStore", St)
1878  << "write with atomic ordering or volatile write";
1879  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1880  HasComplexMemInst = true;
1881  continue;
1882  }
1883  NumStores++;
1884  Stores.push_back(St);
1885  DepChecker->addAccess(St);
1886  if (EnableMemAccessVersioningOfLoop)
1887  collectStridedAccess(St);
1888  }
1889  } // Next instr.
1890  } // Next block.
1891 
1892  if (HasComplexMemInst) {
1893  CanVecMem = false;
1894  return;
1895  }
1896 
1897  // Now we have two lists that hold the loads and the stores.
1898  // Next, we find the pointers that they use.
1899 
1900  // Check if we see any stores. If there are no stores, then we don't
1901  // care if the pointers are *restrict*.
1902  if (!Stores.size()) {
1903  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1904  CanVecMem = true;
1905  return;
1906  }
1907 
1908  MemoryDepChecker::DepCandidates DependentAccesses;
1909  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
1910 
1911  // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
1912  // multiple times on the same object. If the ptr is accessed twice, once
1913  // for read and once for write, it will only appear once (on the write
1914  // list). This is okay, since we are going to check for conflicts between
1915  // writes and between reads and writes, but not between reads and reads.
1916  ValueSet Seen;
1917 
1918  // Record uniform store addresses to identify if we have multiple stores
1919  // to the same address.
1920  ValueSet UniformStores;
1921 
1922  for (StoreInst *ST : Stores) {
1923  Value *Ptr = ST->getPointerOperand();
1924 
1925  if (isUniform(Ptr))
1926  HasDependenceInvolvingLoopInvariantAddress |=
1927  !UniformStores.insert(Ptr).second;
1928 
1929  // If we did *not* see this pointer before, insert it to the read-write
1930  // list. At this phase it is only a 'write' list.
1931  if (Seen.insert(Ptr).second) {
1932  ++NumReadWrites;
1933 
1935  // The TBAA metadata could have a control dependency on the predication
1936  // condition, so we cannot rely on it when determining whether or not we
1937  // need runtime pointer checks.
1938  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1939  Loc.AATags.TBAA = nullptr;
1940 
1941  Accesses.addStore(Loc);
1942  }
1943  }
1944 
1945  if (IsAnnotatedParallel) {
1946  LLVM_DEBUG(
1947  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
1948  << "checks.\n");
1949  CanVecMem = true;
1950  return;
1951  }
1952 
1953  for (LoadInst *LD : Loads) {
1954  Value *Ptr = LD->getPointerOperand();
1955  // If we did *not* see this pointer before, insert it to the
1956  // read list. If we *did* see it before, then it is already in
1957  // the read-write list. This allows us to vectorize expressions
1958  // such as A[i] += x; Because the address of A[i] is a read-write
1959  // pointer. This only works if the index of A[i] is consecutive.
1960  // If the address of i is unknown (for example A[B[i]]) then we may
1961  // read a few words, modify, and write a few words, and some of the
1962  // words may be written to the same address.
1963  bool IsReadOnlyPtr = false;
1964  if (Seen.insert(Ptr).second ||
1965  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1966  ++NumReads;
1967  IsReadOnlyPtr = true;
1968  }
1969 
1970  // See if there is an unsafe dependency between a load to a uniform address and
1971  // store to the same uniform address.
1972  if (UniformStores.count(Ptr)) {
1973  LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
1974  "load and uniform store to the same address!\n");
1975  HasDependenceInvolvingLoopInvariantAddress = true;
1976  }
1977 
1979  // The TBAA metadata could have a control dependency on the predication
1980  // condition, so we cannot rely on it when determining whether or not we
1981  // need runtime pointer checks.
1982  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1983  Loc.AATags.TBAA = nullptr;
1984 
1985  Accesses.addLoad(Loc, IsReadOnlyPtr);
1986  }
1987 
1988  // If we write (or read-write) to a single destination and there are no
1989  // other reads in this loop then is it safe to vectorize.
1990  if (NumReadWrites == 1 && NumReads == 0) {
1991  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1992  CanVecMem = true;
1993  return;
1994  }
1995 
1996  // Build dependence sets and check whether we need a runtime pointer bounds
1997  // check.
1998  Accesses.buildDependenceSets();
1999 
2000  // Find pointers with computable bounds. We are going to use this information
2001  // to place a runtime bound check.
2002  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
2003  TheLoop, SymbolicStrides);
2004  if (!CanDoRTIfNeeded) {
2005  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
2006  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2007  << "the array bounds.\n");
2008  CanVecMem = false;
2009  return;
2010  }
2011 
2012  LLVM_DEBUG(
2013  dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2014 
2015  CanVecMem = true;
2016  if (Accesses.isDependencyCheckNeeded()) {
2017  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2018  CanVecMem = DepChecker->areDepsSafe(
2019  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2020  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2021 
2022  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2023  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2024 
2025  // Clear the dependency checks. We assume they are not needed.
2026  Accesses.resetDepChecks(*DepChecker);
2027 
2028  PtrRtChecking->reset();
2029  PtrRtChecking->Need = true;
2030 
2031  auto *SE = PSE->getSE();
2032  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
2033  SymbolicStrides, true);
2034 
2035  // Check that we found the bounds for the pointer.
2036  if (!CanDoRTIfNeeded) {
2037  recordAnalysis("CantCheckMemDepsAtRunTime")
2038  << "cannot check memory dependencies at runtime";
2039  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2040  CanVecMem = false;
2041  return;
2042  }
2043 
2044  CanVecMem = true;
2045  }
2046  }
2047 
2048  if (HasConvergentOp) {
2049  recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2050  << "cannot add control dependency to convergent operation";
2051  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2052  "would be needed with a convergent operation\n");
2053  CanVecMem = false;
2054  return;
2055  }
2056 
2057  if (CanVecMem)
2058  LLVM_DEBUG(
2059  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2060  << (PtrRtChecking->Need ? "" : " don't")
2061  << " need runtime memory checks.\n");
2062  else {
2063  recordAnalysis("UnsafeMemDep")
2064  << "unsafe dependent memory operations in loop. Use "
2065  "#pragma loop distribute(enable) to allow loop distribution "
2066  "to attempt to isolate the offending operations into a separate "
2067  "loop";
2068  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2069  }
2070 }
2071 
2073  DominatorTree *DT) {
2074  assert(TheLoop->contains(BB) && "Unknown block used");
2075 
2076  // Blocks that do not dominate the latch need predication.
2077  BasicBlock* Latch = TheLoop->getLoopLatch();
2078  return !DT->dominates(BB, Latch);
2079 }
2080 
2081 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2082  Instruction *I) {
2083  assert(!Report && "Multiple reports generated");
2084 
2085  Value *CodeRegion = TheLoop->getHeader();
2086  DebugLoc DL = TheLoop->getStartLoc();
2087 
2088  if (I) {
2089  CodeRegion = I->getParent();
2090  // If there is no debug location attached to the instruction, revert back to
2091  // using the loop's.
2092  if (I->getDebugLoc())
2093  DL = I->getDebugLoc();
2094  }
2095 
2096  Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2097  CodeRegion);
2098  return *Report;
2099 }
2100 
2102  auto *SE = PSE->getSE();
2103  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2104  // never considered uniform.
2105  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2106  // trivially loop-invariant FP values to be considered uniform.
2107  if (!SE->isSCEVable(V->getType()))
2108  return false;
2109  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2110 }
2111 
2112 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2113  Value *Ptr = getLoadStorePointerOperand(MemAccess);
2114  if (!Ptr)
2115  return;
2116 
2117  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2118  if (!Stride)
2119  return;
2120 
2121  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2122  "versioning:");
2123  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2124 
2125  // Avoid adding the "Stride == 1" predicate when we know that
2126  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2127  // or zero iteration loop, as Trip-Count <= Stride == 1.
2128  //
2129  // TODO: We are currently not making a very informed decision on when it is
2130  // beneficial to apply stride versioning. It might make more sense that the
2131  // users of this analysis (such as the vectorizer) will trigger it, based on
2132  // their specific cost considerations; For example, in cases where stride
2133  // versioning does not help resolving memory accesses/dependences, the
2134  // vectorizer should evaluate the cost of the runtime test, and the benefit
2135  // of various possible stride specializations, considering the alternatives
2136  // of using gather/scatters (if available).
2137 
2138  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2139  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2140 
2141  // Match the types so we can compare the stride and the BETakenCount.
2142  // The Stride can be positive/negative, so we sign extend Stride;
2143  // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2144  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2145  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2146  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2147  const SCEV *CastedStride = StrideExpr;
2148  const SCEV *CastedBECount = BETakenCount;
2149  ScalarEvolution *SE = PSE->getSE();
2150  if (BETypeSize >= StrideTypeSize)
2151  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2152  else
2153  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2154  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2155  // Since TripCount == BackEdgeTakenCount + 1, checking:
2156  // "Stride >= TripCount" is equivalent to checking:
2157  // Stride - BETakenCount > 0
2158  if (SE->isKnownPositive(StrideMinusBETaken)) {
2159  LLVM_DEBUG(
2160  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2161  "Stride==1 predicate will imply that the loop executes "
2162  "at most once.\n");
2163  return;
2164  }
2165  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2166 
2167  SymbolicStrides[Ptr] = Stride;
2168  StrideSet.insert(Stride);
2169 }
2170 
2172  const TargetLibraryInfo *TLI, AAResults *AA,
2173  DominatorTree *DT, LoopInfo *LI)
2174  : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2175  PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
2176  DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2177  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2178  HasConvergentOp(false),
2179  HasDependenceInvolvingLoopInvariantAddress(false) {
2180  if (canAnalyzeLoop())
2181  analyzeLoop(AA, LI, TLI, DT);
2182 }
2183 
2184 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2185  if (CanVecMem) {
2186  OS.indent(Depth) << "Memory dependences are safe";
2187  if (MaxSafeDepDistBytes != -1ULL)
2188  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2189  << " bytes";
2190  if (PtrRtChecking->Need)
2191  OS << " with run-time checks";
2192  OS << "\n";
2193  }
2194 
2195  if (HasConvergentOp)
2196  OS.indent(Depth) << "Has convergent operation in loop\n";
2197 
2198  if (Report)
2199  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2200 
2201  if (auto *Dependences = DepChecker->getDependences()) {
2202  OS.indent(Depth) << "Dependences:\n";
2203  for (auto &Dep : *Dependences) {
2204  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2205  OS << "\n";
2206  }
2207  } else
2208  OS.indent(Depth) << "Too many dependences, not recorded\n";
2209 
2210  // List the pair of accesses need run-time checks to prove independence.
2211  PtrRtChecking->print(OS, Depth);
2212  OS << "\n";
2213 
2214  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2215  << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2216  << "found in loop.\n";
2217 
2218  OS.indent(Depth) << "SCEV assumptions:\n";
2219  PSE->getUnionPredicate().print(OS, Depth);
2220 
2221  OS << "\n";
2222 
2223  OS.indent(Depth) << "Expressions re-written:\n";
2224  PSE->print(OS, Depth);
2225 }
2226 
2229 }
2230 
2232  auto &LAI = LoopAccessInfoMap[L];
2233 
2234  if (!LAI)
2235  LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2236 
2237  return *LAI.get();
2238 }
2239 
2241  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2242 
2243  for (Loop *TopLevelLoop : *LI)
2244  for (Loop *L : depth_first(TopLevelLoop)) {
2245  OS.indent(2) << L->getHeader()->getName() << ":\n";
2246  auto &LAI = LAA.getInfo(L);
2247  LAI.print(OS, 4);
2248  }
2249 }
2250 
2252  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2253  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2254  TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2255  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2256  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2257  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2258 
2259  return false;
2260 }
2261 
2267 
2268  AU.setPreservesAll();
2269 }
2270 
2272 static const char laa_name[] = "Loop Access Analysis";
2273 #define LAA_NAME "loop-accesses"
2274 
2281 
2283 
2286  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2287 }
2288 
2289 namespace llvm {
2290 
2292  return new LoopAccessLegacyAnalysis();
2293  }
2294 
2295 } // end namespace llvm
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::MemoryDepChecker::clearDependences
void clearDependences()
Definition: LoopAccessAnalysis.h:238
llvm::APInt::isStrictlyPositive
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:392
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::LoopAccessLegacyAnalysis::ID
static char ID
Definition: LoopAccessAnalysis.h:719
llvm::MemoryDepChecker::Dependence::isBackward
bool isBackward() const
Lexically backward dependence.
Definition: LoopAccessAnalysis.cpp:1264
llvm::MemoryDepChecker
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
Definition: LoopAccessAnalysis.h:87
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:717
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:3932
llvm
Definition: AllocatorList.h:23
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
getMinFromExprs
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Definition: LoopAccessAnalysis.cpp:270
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:147
llvm::MemoryDepChecker::VectorizationSafetyStatus::Safe
@ Safe
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::M68kBeads::DA
@ DA
Definition: M68kBaseInfo.h:59
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2101
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:378
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:217
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:362
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:120
llvm::RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup
RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
Definition: LoopAccessAnalysis.cpp:171
llvm::Function
Definition: Function.h:61
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:456
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::getPointersDiff
Optional< int > getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
Definition: LoopAccessAnalysis.cpp:1127
EnableMemAccessVersioning
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2138
High
uint64_t High
Definition: NVVMIntrRange.cpp:61
llvm::SmallVector< RuntimePointerCheck, 4 >
CheckType
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
Definition: SelectionDAGISel.cpp:2565
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1644
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:58
llvm::PredicatedScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
Definition: ScalarEvolution.cpp:13166
ErrorHandling.h
LoopAccessAnalysis.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:693
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:626
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:44
llvm::LoopAccessLegacyAnalysis::getInfo
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
Definition: LoopAccessAnalysis.cpp:2231
MaxDependences
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::SCEV::NoWrapMask
@ NoWrapMask
Definition: ScalarEvolution.h:120
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::MemoryDepChecker::Dependence::isPossiblyBackward
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Definition: LoopAccessAnalysis.cpp:1280
llvm::AliasSetTracker
Definition: AliasSetTracker.h:329
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:759
APInt.h
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
llvm::DenseMapIterator
Definition: DenseMap.h:56
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1582
llvm::RuntimePointerChecking::needsChecking
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition: LoopAccessAnalysis.cpp:259
Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:65
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1258
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1556
llvm::MemoryDepChecker::areDepsSafe
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
Definition: LoopAccessAnalysis.cpp:1645
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::isConsecutiveAccess
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
Definition: LoopAccessAnalysis.cpp:1235
llvm::Optional< int >
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet< Value *, 16 >
llvm::RuntimePointerChecking::getNumberOfChecks
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
Definition: LoopAccessAnalysis.h:441
Operator.h
STLExtras.h
llvm::PredicatedScalarEvolution::getAsAddRec
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
Definition: ScalarEvolution.cpp:13228
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::MemoryDepChecker::Dependence::NoDep
@ NoDep
Definition: LoopAccessAnalysis.h:112
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
DepthFirstIterator.h
llvm::stripIntegerCast
Value * stripIntegerCast(Value *V)
Definition: LoopAccessAnalysis.cpp:136
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MemoryDepChecker::Dependence::DepType
DepType
The type of the dependence.
Definition: LoopAccessAnalysis.h:110
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
LoopAnalysisManager.h
llvm::MemoryDepChecker::Dependence::isForward
bool isForward() const
Lexically forward dependence.
Definition: LoopAccessAnalysis.cpp:1284
AliasAnalysis.h
PointerIntPair.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2816
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
llvm::RuntimeCheckingPtrGroup::addPointer
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
Definition: LoopAccessAnalysis.cpp:282
Instruction.h
CommandLine.h
llvm::MemoryLocation::AATags
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Definition: MemoryLocation.h:230
llvm::LoopAccessLegacyAnalysis::print
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
Definition: LoopAccessAnalysis.cpp:2240
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:365
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:596
Constants.h
llvm::MemoryDepChecker::VectorizationSafetyStatus
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
Definition: LoopAccessAnalysis.h:97
llvm::AAResults
Definition: AliasAnalysis.h:456
isNoWrapAddRec
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
Definition: LoopAccessAnalysis.cpp:970
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2072
llvm::MemoryDepChecker::Dependence::BackwardVectorizable
@ BackwardVectorizable
Definition: LoopAccessAnalysis.h:132
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::Loop::isAnnotatedParallel
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:559
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:194
llvm::ScalarEvolution::getUMaxExpr
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:3652
InstrTypes.h
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
Definition: LoopAccessAnalysis.cpp:1017
llvm::MemoryDepChecker::Dependence::print
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Definition: LoopAccessAnalysis.cpp:1738
llvm::PredicatedScalarEvolution::hasNoOverflow
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
Definition: ScalarEvolution.cpp:13212
llvm::RuntimeCheckingPtrGroup::RtCheck
RuntimePointerChecking & RtCheck
Constitutes the context of this pointer checking group.
Definition: LoopAccessAnalysis.h:354
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
TargetLibraryInfo.h
llvm::RuntimePointerChecking::insert
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
Definition: LoopAccessAnalysis.cpp:191
false
Definition: StackSlotColoring.cpp:142
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:157
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::Instruction
Definition: Instruction.h:45
isInBoundsGep
static bool isInBoundsGep(Value *Ptr)
Definition: LoopAccessAnalysis.cpp:962
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1632
llvm::MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding
@ BackwardVectorizableButPreventsForwarding
Definition: LoopAccessAnalysis.h:134
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2107
DebugLoc.h
SmallPtrSet.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:144
llvm::EquivalenceClasses::getLeaderValue
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:170
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:154
llvm::ScalarEvolution::getEqualPredicate
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:12830
VectorizationInterleave
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
llvm::RuntimePointerChecking::reset
void reset()
Reset the state of the pointer runtime information.
Definition: LoopAccessAnalysis.h:406
llvm::None
const NoneType None
Definition: None.h:23
Type.h
llvm::RuntimePointerChecking::PointerInfo::AliasSetId
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
Definition: LoopAccessAnalysis.h:391
LoopInfo.h
llvm::MemoryDepChecker::getInstructionsForAccess
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
Definition: LoopAccessAnalysis.cpp:1723
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3885
isSafeDependenceDistance
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses,...
Definition: LoopAccessAnalysis.cpp:1361
Check
static bool Check(DecodeStatus &Out, DecodeStatus In)
Definition: AArch64Disassembler.cpp:230
llvm::RuntimePointerChecking::PointerInfo
Definition: LoopAccessAnalysis.h:376
VectorUtils.h
llvm::MemoryDepChecker::VectorizationSafetyStatus::Unsafe
@ Unsafe
llvm::initializeLoopAccessLegacyAnalysisPass
void initializeLoopAccessLegacyAnalysisPass(PassRegistry &)
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4317
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:202
llvm::RuntimePointerChecking
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Definition: LoopAccessAnalysis.h:372
llvm::VectorizerParams::VectorizationFactor
static unsigned VectorizationFactor
VF as overridden by the user.
Definition: LoopAccessAnalysis.h:42
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:452
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MemoryDepChecker::Dependence
Dependece between memory access instructions.
Definition: LoopAccessAnalysis.h:108
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:257
llvm::PredicatedScalarEvolution::setNoOverflow
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
Definition: ScalarEvolution.cpp:13196
llvm::MemoryDepChecker::Dependence::Backward
@ Backward
Definition: LoopAccessAnalysis.h:129
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::RuntimePointerChecking::PointerInfo::DependencySetId
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
Definition: LoopAccessAnalysis.h:389
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::RuntimePointerChecking::CheckingGroups
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
Definition: LoopAccessAnalysis.h:458
laa_name
static const char laa_name[]
Definition: LoopAccessAnalysis.cpp:2272
MemoryLocation.h
llvm::DenseMap< const Value *, Value * >
llvm::SCEV::FlagNSW
@ FlagNSW
Definition: ScalarEvolution.h:119
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
llvm::RuntimeCheckingPtrGroup
A grouping of pointers.
Definition: LoopAccessAnalysis.h:339
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:519
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:39
llvm::SCEVWrapPredicate::IncrementNUSW
@ IncrementNUSW
Definition: ScalarEvolution.h:330
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ScalarEvolution::getUMinExpr
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:3671
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:13147
llvm::MemoryDepChecker::isSafeForVectorization
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
Definition: LoopAccessAnalysis.h:204
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::VectorizerParams::RuntimeMemoryCheckThreshold
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
Definition: LoopAccessAnalysis.h:50
iterator_range.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
LAA_NAME
#define LAA_NAME
Definition: LoopAccessAnalysis.cpp:2273
llvm::ScalarEvolution::getNoopOrSignExtend
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4062
llvm::RuntimeCheckingPtrGroup::High
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
Definition: LoopAccessAnalysis.h:357
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::RuntimeCheckingPtrGroup::Low
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
Definition: LoopAccessAnalysis.h:360
Status
Definition: SIModeRegister.cpp:28
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:71
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:215
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
DataLayout.h
EnableForwardingConflictDetection
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MemoryDepChecker::MemAccessInfo
PointerIntPair< Value *, 1, bool > MemAccessInfo
Definition: LoopAccessAnalysis.h:89
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1602
llvm::cl::Sink
@ Sink
Definition: CommandLine.h:168
llvm::ScalarEvolution::getStoreSizeOfExpr
const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
Definition: ScalarEvolution.cpp:3702
isNoWrap
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
Definition: LoopAccessAnalysis.cpp:644
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
MemoryCheckMergeThreshold
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopAccessAnalysis.cpp:70
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:770
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:298
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
ValueHandle.h
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:132
llvm::sortPtrAccesses
bool sortPtrAccesses(ArrayRef< Value * > VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
Definition: LoopAccessAnalysis.cpp:1188
llvm::RuntimePointerChecking::RuntimeCheckingPtrGroup
friend struct RuntimeCheckingPtrGroup
Definition: LoopAccessAnalysis.h:373
llvm::LoopAccessLegacyAnalysis::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LoopAccessAnalysis.cpp:2262
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:728
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12477
RuntimeMemoryCheckThreshold
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
llvm::ScalarEvolution::isKnownPositive
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Definition: ScalarEvolution.cpp:9525
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
llvm::MemoryDepChecker::Dependence::Unknown
@ Unknown
Definition: LoopAccessAnalysis.h:114
llvm::createLAAPass
Pass * createLAAPass()
Definition: LoopAccessAnalysis.cpp:2291
llvm::RuntimePointerChecking::Pointers
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
Definition: LoopAccessAnalysis.h:455
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::RuntimePointerChecking::arePointersInSamePartition
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition: LoopAccessAnalysis.cpp:432
llvm::EquivalenceClasses::iterator
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
Definition: EquivalenceClasses.h:144
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:363
llvm::LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis
LoopAccessLegacyAnalysis()
Definition: LoopAccessAnalysis.cpp:2227
std
Definition: BitVector.h:838
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Definition: ScalarEvolution.cpp:3986
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::LocationSize::beforeOrAfterPointer
constexpr static LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Definition: MemoryLocation.h:129
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
VectorizationFactor
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:143
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:956
llvm::RuntimePointerChecking::printChecks
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition: LoopAccessAnalysis.cpp:458
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopAccessInfo::LoopAccessInfo
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI)
Definition: LoopAccessAnalysis.cpp:2171
llvm::MemoryDepChecker::Dependence::DepName
static const char * DepName[]
String version of the types.
Definition: LoopAccessAnalysis.h:138
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::RuntimePointerChecking::PointerInfo::IsWritePtr
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Definition: LoopAccessAnalysis.h:386
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3751
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
EquivalenceClasses.h
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:13175
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
llvm::AAMDNodes::TBAA
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:668
hasComputableBounds
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
Definition: LoopAccessAnalysis.cpp:623
llvm::MemoryDepChecker::Dependence::Forward
@ Forward
Definition: LoopAccessAnalysis.h:124
areStridedAccessesIndependent
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
Definition: LoopAccessAnalysis.cpp:1421
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:112
llvm::APInt::abs
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1869
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1553
Instructions.h
llvm::PointerIntPair< Value *, 1, bool >
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
llvm::raw_ostream::indent
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Definition: raw_ostream.cpp:497
Dominators.h
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:378
N
#define N
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::LoopAccessInfo::print
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
Definition: LoopAccessAnalysis.cpp:2184
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::SmallVectorImpl< int >
llvm::MemoryDepChecker::Dependence::isSafeForVectorization
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Definition: LoopAccessAnalysis.cpp:1247
llvm::EquivalenceClasses::insert
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
Definition: EquivalenceClasses.h:199
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
DerivedTypes.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5237
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2300
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:217
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
AliasSetTracker.h
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:411
raw_ostream.h
llvm::SI::KernelInputOffsets::Offsets
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1224
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1851
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2170
llvm::EquivalenceClasses::member_iterator
Definition: EquivalenceClasses.h:251
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1789
Value.h
llvm::RuntimePointerChecking::print
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
Definition: LoopAccessAnalysis.cpp:477
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1272
llvm::RuntimeCheckingPtrGroup::Members
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
Definition: LoopAccessAnalysis.h:362
InitializePasses.h
llvm::EquivalenceClasses::findValue
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Definition: EquivalenceClasses.h:163
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:153
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::MemoryDepChecker::Dependence::ForwardButPreventsForwarding
@ ForwardButPreventsForwarding
Definition: LoopAccessAnalysis.h:127
llvm::RuntimePointerChecking::generateChecks
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
Definition: LoopAccessAnalysis.cpp:252
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:369
llvm::LoopAccessLegacyAnalysis::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: LoopAccessAnalysis.cpp:2251
SetVector.h
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1026
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::MemoryDepChecker::VectorizationSafetyStatus::PossiblySafeWithRtChecks
@ PossiblySafeWithRtChecks