LLVM  14.0.0git
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The implementation for the loop memory dependence that was originally
10 // developed for the loop vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/DebugLoc.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/DiagnosticInfo.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Operator.h"
49 #include "llvm/IR/PassManager.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/IR/ValueHandle.h"
53 #include "llvm/InitializePasses.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <cstdlib>
64 #include <iterator>
65 #include <utility>
66 #include <vector>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "loop-accesses"
71 
73 VectorizationFactor("force-vector-width", cl::Hidden,
74  cl::desc("Sets the SIMD width. Zero is autoselect."),
77 
79 VectorizationInterleave("force-vector-interleave", cl::Hidden,
80  cl::desc("Sets the vectorization interleave count. "
81  "Zero is autoselect."),
85 
87  "runtime-memory-check-threshold", cl::Hidden,
88  cl::desc("When performing memory disambiguation checks at runtime do not "
89  "generate more than this number of comparisons (default = 8)."),
92 
93 /// The maximum iterations used to merge memory checks
95  "memory-check-merge-threshold", cl::Hidden,
96  cl::desc("Maximum number of comparisons done when trying to merge "
97  "runtime memory checks. (default = 100)"),
98  cl::init(100));
99 
100 /// Maximum SIMD width.
101 const unsigned VectorizerParams::MaxVectorWidth = 64;
102 
103 /// We collect dependences up to this threshold.
104 static cl::opt<unsigned>
105  MaxDependences("max-dependences", cl::Hidden,
106  cl::desc("Maximum number of dependences collected by "
107  "loop-access analysis (default = 100)"),
108  cl::init(100));
109 
110 /// This enables versioning on the strides of symbolically striding memory
111 /// accesses in code like the following.
112 /// for (i = 0; i < N; ++i)
113 /// A[i * Stride1] += B[i * Stride2] ...
114 ///
115 /// Will be roughly translated to
116 /// if (Stride1 == 1 && Stride2 == 1) {
117 /// for (i = 0; i < N; i+=4)
118 /// A[i:i+3] += ...
119 /// } else
120 /// ...
122  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
123  cl::desc("Enable symbolic stride memory access versioning"));
124 
125 /// Enable store-to-load forwarding conflict detection. This option can
126 /// be disabled for correctness testing.
128  "store-to-load-forwarding-conflict-detection", cl::Hidden,
129  cl::desc("Enable conflict detection in loop-access analysis"),
130  cl::init(true));
131 
133  return ::VectorizationInterleave.getNumOccurrences() > 0;
134 }
135 
137  if (auto *CI = dyn_cast<CastInst>(V))
138  if (CI->getOperand(0)->getType()->isIntegerTy())
139  return CI->getOperand(0);
140  return V;
141 }
142 
144  const ValueToValueMap &PtrToStride,
145  Value *Ptr) {
146  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
147 
148  // If there is an entry in the map return the SCEV of the pointer with the
149  // symbolic stride replaced by one.
150  ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr);
151  if (SI == PtrToStride.end())
152  // For a non-symbolic stride, just return the original expression.
153  return OrigSCEV;
154 
155  Value *StrideVal = stripIntegerCast(SI->second);
156 
157  ScalarEvolution *SE = PSE.getSE();
158  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
159  const auto *CT =
160  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
161 
162  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
163  auto *Expr = PSE.getSCEV(Ptr);
164 
165  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
166  << " by: " << *Expr << "\n");
167  return Expr;
168 }
169 
171  unsigned Index, RuntimePointerChecking &RtCheck)
172  : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
173  AddressSpace(RtCheck.Pointers[Index]
174  .PointerValue->getType()
175  ->getPointerAddressSpace()) {
176  Members.push_back(Index);
177 }
178 
179 /// Calculate Start and End points of memory access.
180 /// Let's assume A is the first access and B is a memory access on N-th loop
181 /// iteration. Then B is calculated as:
182 /// B = A + Step*N .
183 /// Step value may be positive or negative.
184 /// N is a calculated back-edge taken count:
185 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
186 /// Start and End points are calculated in the following way:
187 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
188 /// where SizeOfElt is the size of single memory access in bytes.
189 ///
190 /// There is no conflict when the intervals are disjoint:
191 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
192 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
193  unsigned DepSetId, unsigned ASId,
194  const ValueToValueMap &Strides,
196  // Get the stride replaced scev.
197  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
198  ScalarEvolution *SE = PSE.getSE();
199 
200  const SCEV *ScStart;
201  const SCEV *ScEnd;
202 
203  if (SE->isLoopInvariant(Sc, Lp)) {
204  ScStart = ScEnd = Sc;
205  } else {
206  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
207  assert(AR && "Invalid addrec expression");
208  const SCEV *Ex = PSE.getBackedgeTakenCount();
209 
210  ScStart = AR->getStart();
211  ScEnd = AR->evaluateAtIteration(Ex, *SE);
212  const SCEV *Step = AR->getStepRecurrence(*SE);
213 
214  // For expressions with negative step, the upper bound is ScStart and the
215  // lower bound is ScEnd.
216  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
217  if (CStep->getValue()->isNegative())
218  std::swap(ScStart, ScEnd);
219  } else {
220  // Fallback case: the step is not constant, but we can still
221  // get the upper and lower bounds of the interval by using min/max
222  // expressions.
223  ScStart = SE->getUMinExpr(ScStart, ScEnd);
224  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
225  }
226  }
227  // Add the size of the pointed element to ScEnd.
228  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
229  Type *IdxTy = DL.getIndexType(Ptr->getType());
230  const SCEV *EltSizeSCEV =
231  SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
232  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
233 
234  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
235 }
236 
238 RuntimePointerChecking::generateChecks() const {
240 
241  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
242  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
244  const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
245 
246  if (needsChecking(CGI, CGJ))
247  Checks.push_back(std::make_pair(&CGI, &CGJ));
248  }
249  }
250  return Checks;
251 }
252 
253 void RuntimePointerChecking::generateChecks(
254  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
255  assert(Checks.empty() && "Checks is not empty");
256  groupChecks(DepCands, UseDependencies);
257  Checks = generateChecks();
258 }
259 
261  const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
262  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
263  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
264  if (needsChecking(M.Members[I], N.Members[J]))
265  return true;
266  return false;
267 }
268 
269 /// Compare \p I and \p J and return the minimum.
270 /// Return nullptr in case we couldn't find an answer.
271 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
272  ScalarEvolution *SE) {
273  const SCEV *Diff = SE->getMinusSCEV(J, I);
274  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
275 
276  if (!C)
277  return nullptr;
278  if (C->getValue()->isNegative())
279  return J;
280  return I;
281 }
282 
284  RuntimePointerChecking &RtCheck) {
285  return addPointer(
286  Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
287  RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
288  *RtCheck.SE);
289 }
290 
291 bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
292  const SCEV *End, unsigned AS,
293  ScalarEvolution &SE) {
294  assert(AddressSpace == AS &&
295  "all pointers in a checking group must be in the same address space");
296 
297  // Compare the starts and ends with the known minimum and maximum
298  // of this set. We need to know how we compare against the min/max
299  // of the set in order to be able to emit memchecks.
300  const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
301  if (!Min0)
302  return false;
303 
304  const SCEV *Min1 = getMinFromExprs(End, High, &SE);
305  if (!Min1)
306  return false;
307 
308  // Update the low bound expression if we've found a new min value.
309  if (Min0 == Start)
310  Low = Start;
311 
312  // Update the high bound expression if we've found a new max value.
313  if (Min1 != End)
314  High = End;
315 
316  Members.push_back(Index);
317  return true;
318 }
319 
320 void RuntimePointerChecking::groupChecks(
321  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
322  // We build the groups from dependency candidates equivalence classes
323  // because:
324  // - We know that pointers in the same equivalence class share
325  // the same underlying object and therefore there is a chance
326  // that we can compare pointers
327  // - We wouldn't be able to merge two pointers for which we need
328  // to emit a memcheck. The classes in DepCands are already
329  // conveniently built such that no two pointers in the same
330  // class need checking against each other.
331 
332  // We use the following (greedy) algorithm to construct the groups
333  // For every pointer in the equivalence class:
334  // For each existing group:
335  // - if the difference between this pointer and the min/max bounds
336  // of the group is a constant, then make the pointer part of the
337  // group and update the min/max bounds of that group as required.
338 
339  CheckingGroups.clear();
340 
341  // If we need to check two pointers to the same underlying object
342  // with a non-constant difference, we shouldn't perform any pointer
343  // grouping with those pointers. This is because we can easily get
344  // into cases where the resulting check would return false, even when
345  // the accesses are safe.
346  //
347  // The following example shows this:
348  // for (i = 0; i < 1000; ++i)
349  // a[5000 + i * m] = a[i] + a[i + 9000]
350  //
351  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
352  // (0, 10000) which is always false. However, if m is 1, there is no
353  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
354  // us to perform an accurate check in this case.
355  //
356  // The above case requires that we have an UnknownDependence between
357  // accesses to the same underlying object. This cannot happen unless
358  // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
359  // is also false. In this case we will use the fallback path and create
360  // separate checking groups for all pointers.
361 
362  // If we don't have the dependency partitions, construct a new
363  // checking pointer group for each pointer. This is also required
364  // for correctness, because in this case we can have checking between
365  // pointers to the same underlying object.
366  if (!UseDependencies) {
367  for (unsigned I = 0; I < Pointers.size(); ++I)
368  CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
369  return;
370  }
371 
372  unsigned TotalComparisons = 0;
373 
374  DenseMap<Value *, unsigned> PositionMap;
375  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
376  PositionMap[Pointers[Index].PointerValue] = Index;
377 
378  // We need to keep track of what pointers we've already seen so we
379  // don't process them twice.
381 
382  // Go through all equivalence classes, get the "pointer check groups"
383  // and add them to the overall solution. We use the order in which accesses
384  // appear in 'Pointers' to enforce determinism.
385  for (unsigned I = 0; I < Pointers.size(); ++I) {
386  // We've seen this pointer before, and therefore already processed
387  // its equivalence class.
388  if (Seen.count(I))
389  continue;
390 
391  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
392  Pointers[I].IsWritePtr);
393 
395  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
396 
397  // Because DepCands is constructed by visiting accesses in the order in
398  // which they appear in alias sets (which is deterministic) and the
399  // iteration order within an equivalence class member is only dependent on
400  // the order in which unions and insertions are performed on the
401  // equivalence class, the iteration order is deterministic.
402  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
403  MI != ME; ++MI) {
404  auto PointerI = PositionMap.find(MI->getPointer());
405  assert(PointerI != PositionMap.end() &&
406  "pointer in equivalence class not found in PositionMap");
407  unsigned Pointer = PointerI->second;
408  bool Merged = false;
409  // Mark this pointer as seen.
410  Seen.insert(Pointer);
411 
412  // Go through all the existing sets and see if we can find one
413  // which can include this pointer.
414  for (RuntimeCheckingPtrGroup &Group : Groups) {
415  // Don't perform more than a certain amount of comparisons.
416  // This should limit the cost of grouping the pointers to something
417  // reasonable. If we do end up hitting this threshold, the algorithm
418  // will create separate groups for all remaining pointers.
419  if (TotalComparisons > MemoryCheckMergeThreshold)
420  break;
421 
422  TotalComparisons++;
423 
424  if (Group.addPointer(Pointer, *this)) {
425  Merged = true;
426  break;
427  }
428  }
429 
430  if (!Merged)
431  // We couldn't add this pointer to any existing set or the threshold
432  // for the number of comparisons has been reached. Create a new group
433  // to hold the current pointer.
434  Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
435  }
436 
437  // We've computed the grouped checks for this partition.
438  // Save the results and continue with the next one.
439  llvm::copy(Groups, std::back_inserter(CheckingGroups));
440  }
441 }
442 
444  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
445  unsigned PtrIdx2) {
446  return (PtrToPartition[PtrIdx1] != -1 &&
447  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
448 }
449 
450 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
451  const PointerInfo &PointerI = Pointers[I];
452  const PointerInfo &PointerJ = Pointers[J];
453 
454  // No need to check if two readonly pointers intersect.
455  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
456  return false;
457 
458  // Only need to check pointers between two different dependency sets.
459  if (PointerI.DependencySetId == PointerJ.DependencySetId)
460  return false;
461 
462  // Only need to check pointers in the same alias set.
463  if (PointerI.AliasSetId != PointerJ.AliasSetId)
464  return false;
465 
466  return true;
467 }
468 
471  unsigned Depth) const {
472  unsigned N = 0;
473  for (const auto &Check : Checks) {
474  const auto &First = Check.first->Members, &Second = Check.second->Members;
475 
476  OS.indent(Depth) << "Check " << N++ << ":\n";
477 
478  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
479  for (unsigned K = 0; K < First.size(); ++K)
480  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
481 
482  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
483  for (unsigned K = 0; K < Second.size(); ++K)
484  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
485  }
486 }
487 
489 
490  OS.indent(Depth) << "Run-time memory checks:\n";
491  printChecks(OS, Checks, Depth);
492 
493  OS.indent(Depth) << "Grouped accesses:\n";
494  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
495  const auto &CG = CheckingGroups[I];
496 
497  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
498  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
499  << ")\n";
500  for (unsigned J = 0; J < CG.Members.size(); ++J) {
501  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
502  << "\n";
503  }
504  }
505 }
506 
507 namespace {
508 
509 /// Analyses memory accesses in a loop.
510 ///
511 /// Checks whether run time pointer checks are needed and builds sets for data
512 /// dependence checking.
513 class AccessAnalysis {
514 public:
515  /// Read or write access location.
516  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
517  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
518 
519  AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
522  : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
523  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
524 
525  /// Register a load and whether it is only read from.
526  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
527  Value *Ptr = const_cast<Value*>(Loc.Ptr);
528  AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
529  Accesses.insert(MemAccessInfo(Ptr, false));
530  if (IsReadOnly)
531  ReadOnlyPtr.insert(Ptr);
532  }
533 
534  /// Register a store.
535  void addStore(MemoryLocation &Loc) {
536  Value *Ptr = const_cast<Value*>(Loc.Ptr);
537  AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
538  Accesses.insert(MemAccessInfo(Ptr, true));
539  }
540 
541  /// Check if we can emit a run-time no-alias check for \p Access.
542  ///
543  /// Returns true if we can emit a run-time no alias check for \p Access.
544  /// If we can check this access, this also adds it to a dependence set and
545  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
546  /// we will attempt to use additional run-time checks in order to get
547  /// the bounds of the pointer.
548  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
549  MemAccessInfo Access,
550  const ValueToValueMap &Strides,
551  DenseMap<Value *, unsigned> &DepSetId,
552  Loop *TheLoop, unsigned &RunningDepId,
553  unsigned ASId, bool ShouldCheckStride,
554  bool Assume);
555 
556  /// Check whether we can check the pointers at runtime for
557  /// non-intersection.
558  ///
559  /// Returns true if we need no check or if we do and we can generate them
560  /// (i.e. the pointers have computable bounds).
561  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
562  Loop *TheLoop, const ValueToValueMap &Strides,
563  bool ShouldCheckWrap = false);
564 
565  /// Goes over all memory accesses, checks whether a RT check is needed
566  /// and builds sets of dependent accesses.
567  void buildDependenceSets() {
568  processMemAccesses();
569  }
570 
571  /// Initial processing of memory accesses determined that we need to
572  /// perform dependency checking.
573  ///
574  /// Note that this can later be cleared if we retry memcheck analysis without
575  /// dependency checking (i.e. FoundNonConstantDistanceDependence).
576  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
577 
578  /// We decided that no dependence analysis would be used. Reset the state.
579  void resetDepChecks(MemoryDepChecker &DepChecker) {
580  CheckDeps.clear();
581  DepChecker.clearDependences();
582  }
583 
584  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
585 
586 private:
587  typedef SetVector<MemAccessInfo> PtrAccessSet;
588 
589  /// Go over all memory access and check whether runtime pointer checks
590  /// are needed and build sets of dependency check candidates.
591  void processMemAccesses();
592 
593  /// Set of all accesses.
594  PtrAccessSet Accesses;
595 
596  /// The loop being checked.
597  const Loop *TheLoop;
598 
599  /// List of accesses that need a further dependence check.
600  MemAccessInfoList CheckDeps;
601 
602  /// Set of pointers that are read only.
603  SmallPtrSet<Value*, 16> ReadOnlyPtr;
604 
605  /// An alias set tracker to partition the access set by underlying object and
606  //intrinsic property (such as TBAA metadata).
607  AliasSetTracker AST;
608 
609  LoopInfo *LI;
610 
611  /// Sets of potentially dependent accesses - members of one set share an
612  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
613  /// dependence check.
615 
616  /// Initial processing of memory accesses determined that we may need
617  /// to add memchecks. Perform the analysis to determine the necessary checks.
618  ///
619  /// Note that, this is different from isDependencyCheckNeeded. When we retry
620  /// memcheck analysis without dependency checking
621  /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
622  /// cleared while this remains set if we have potentially dependent accesses.
623  bool IsRTCheckAnalysisNeeded;
624 
625  /// The SCEV predicate containing all the SCEV-related assumptions.
627 };
628 
629 } // end anonymous namespace
630 
631 /// Check whether a pointer can participate in a runtime bounds check.
632 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
633 /// by adding run-time checks (overflow checks) if necessary.
635  const ValueToValueMap &Strides, Value *Ptr,
636  Loop *L, bool Assume) {
637  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
638 
639  // The bounds for loop-invariant pointer is trivial.
640  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
641  return true;
642 
643  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
644 
645  if (!AR && Assume)
646  AR = PSE.getAsAddRec(Ptr);
647 
648  if (!AR)
649  return false;
650 
651  return AR->isAffine();
652 }
653 
654 /// Check whether a pointer address cannot wrap.
656  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
657  const SCEV *PtrScev = PSE.getSCEV(Ptr);
658  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
659  return true;
660 
661  Type *AccessTy = Ptr->getType()->getPointerElementType();
662  int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
663  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
664  return true;
665 
666  return false;
667 }
668 
669 static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
670  function_ref<void(Value *)> AddPointer) {
671  SmallPtrSet<Value *, 8> Visited;
672  SmallVector<Value *> WorkList;
673  WorkList.push_back(StartPtr);
674 
675  while (!WorkList.empty()) {
676  Value *Ptr = WorkList.pop_back_val();
677  if (!Visited.insert(Ptr).second)
678  continue;
679  auto *PN = dyn_cast<PHINode>(Ptr);
680  // SCEV does not look through non-header PHIs inside the loop. Such phis
681  // can be analyzed by adding separate accesses for each incoming pointer
682  // value.
683  if (PN && InnermostLoop.contains(PN->getParent()) &&
684  PN->getParent() != InnermostLoop.getHeader()) {
685  for (const Use &Inc : PN->incoming_values())
686  WorkList.push_back(Inc);
687  } else
688  AddPointer(Ptr);
689  }
690 }
691 
692 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
693  MemAccessInfo Access,
694  const ValueToValueMap &StridesMap,
695  DenseMap<Value *, unsigned> &DepSetId,
696  Loop *TheLoop, unsigned &RunningDepId,
697  unsigned ASId, bool ShouldCheckWrap,
698  bool Assume) {
699  Value *Ptr = Access.getPointer();
700 
701  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
702  return false;
703 
704  // When we run after a failing dependency check we have to make sure
705  // we don't have wrapping pointers.
706  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
707  auto *Expr = PSE.getSCEV(Ptr);
708  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
709  return false;
710  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
711  }
712 
713  // The id of the dependence set.
714  unsigned DepId;
715 
716  if (isDependencyCheckNeeded()) {
717  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
718  unsigned &LeaderId = DepSetId[Leader];
719  if (!LeaderId)
720  LeaderId = RunningDepId++;
721  DepId = LeaderId;
722  } else
723  // Each access has its own dependence set.
724  DepId = RunningDepId++;
725 
726  bool IsWrite = Access.getInt();
727  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
728  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
729 
730  return true;
731  }
732 
733 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
734  ScalarEvolution *SE, Loop *TheLoop,
735  const ValueToValueMap &StridesMap,
736  bool ShouldCheckWrap) {
737  // Find pointers with computable bounds. We are going to use this information
738  // to place a runtime bound check.
739  bool CanDoRT = true;
740 
741  bool MayNeedRTCheck = false;
742  if (!IsRTCheckAnalysisNeeded) return true;
743 
744  bool IsDepCheckNeeded = isDependencyCheckNeeded();
745 
746  // We assign a consecutive id to access from different alias sets.
747  // Accesses between different groups doesn't need to be checked.
748  unsigned ASId = 0;
749  for (auto &AS : AST) {
750  int NumReadPtrChecks = 0;
751  int NumWritePtrChecks = 0;
752  bool CanDoAliasSetRT = true;
753  ++ASId;
754 
755  // We assign consecutive id to access from different dependence sets.
756  // Accesses within the same set don't need a runtime check.
757  unsigned RunningDepId = 1;
759 
761 
762  // First, count how many write and read accesses are in the alias set. Also
763  // collect MemAccessInfos for later.
764  SmallVector<MemAccessInfo, 4> AccessInfos;
765  for (const auto &A : AS) {
766  Value *Ptr = A.getValue();
767  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
768 
769  if (IsWrite)
770  ++NumWritePtrChecks;
771  else
772  ++NumReadPtrChecks;
773  AccessInfos.emplace_back(Ptr, IsWrite);
774  }
775 
776  // We do not need runtime checks for this alias set, if there are no writes
777  // or a single write and no reads.
778  if (NumWritePtrChecks == 0 ||
779  (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
780  assert((AS.size() <= 1 ||
781  all_of(AS,
782  [this](auto AC) {
783  MemAccessInfo AccessWrite(AC.getValue(), true);
784  return DepCands.findValue(AccessWrite) == DepCands.end();
785  })) &&
786  "Can only skip updating CanDoRT below, if all entries in AS "
787  "are reads or there is at most 1 entry");
788  continue;
789  }
790 
791  for (auto &Access : AccessInfos) {
792  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
793  RunningDepId, ASId, ShouldCheckWrap, false)) {
794  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
795  << *Access.getPointer() << '\n');
796  Retries.push_back(Access);
797  CanDoAliasSetRT = false;
798  }
799  }
800 
801  // Note that this function computes CanDoRT and MayNeedRTCheck
802  // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
803  // we have a pointer for which we couldn't find the bounds but we don't
804  // actually need to emit any checks so it does not matter.
805  //
806  // We need runtime checks for this alias set, if there are at least 2
807  // dependence sets (in which case RunningDepId > 2) or if we need to re-try
808  // any bound checks (because in that case the number of dependence sets is
809  // incomplete).
810  bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
811 
812  // We need to perform run-time alias checks, but some pointers had bounds
813  // that couldn't be checked.
814  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
815  // Reset the CanDoSetRt flag and retry all accesses that have failed.
816  // We know that we need these checks, so we can now be more aggressive
817  // and add further checks if required (overflow checks).
818  CanDoAliasSetRT = true;
819  for (auto Access : Retries)
820  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
821  TheLoop, RunningDepId, ASId,
822  ShouldCheckWrap, /*Assume=*/true)) {
823  CanDoAliasSetRT = false;
824  break;
825  }
826  }
827 
828  CanDoRT &= CanDoAliasSetRT;
829  MayNeedRTCheck |= NeedsAliasSetRTCheck;
830  ++ASId;
831  }
832 
833  // If the pointers that we would use for the bounds comparison have different
834  // address spaces, assume the values aren't directly comparable, so we can't
835  // use them for the runtime check. We also have to assume they could
836  // overlap. In the future there should be metadata for whether address spaces
837  // are disjoint.
838  unsigned NumPointers = RtCheck.Pointers.size();
839  for (unsigned i = 0; i < NumPointers; ++i) {
840  for (unsigned j = i + 1; j < NumPointers; ++j) {
841  // Only need to check pointers between two different dependency sets.
842  if (RtCheck.Pointers[i].DependencySetId ==
843  RtCheck.Pointers[j].DependencySetId)
844  continue;
845  // Only need to check pointers in the same alias set.
846  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
847  continue;
848 
849  Value *PtrI = RtCheck.Pointers[i].PointerValue;
850  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
851 
852  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
853  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
854  if (ASi != ASj) {
855  LLVM_DEBUG(
856  dbgs() << "LAA: Runtime check would require comparison between"
857  " different address spaces\n");
858  return false;
859  }
860  }
861  }
862 
863  if (MayNeedRTCheck && CanDoRT)
864  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
865 
866  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
867  << " pointer comparisons.\n");
868 
869  // If we can do run-time checks, but there are no checks, no runtime checks
870  // are needed. This can happen when all pointers point to the same underlying
871  // object for example.
872  RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
873 
874  bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
875  if (!CanDoRTIfNeeded)
876  RtCheck.reset();
877  return CanDoRTIfNeeded;
878 }
879 
880 void AccessAnalysis::processMemAccesses() {
881  // We process the set twice: first we process read-write pointers, last we
882  // process read-only pointers. This allows us to skip dependence tests for
883  // read-only pointers.
884 
885  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
886  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
887  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
888  LLVM_DEBUG({
889  for (auto A : Accesses)
890  dbgs() << "\t" << *A.getPointer() << " (" <<
891  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
892  "read-only" : "read")) << ")\n";
893  });
894 
895  // The AliasSetTracker has nicely partitioned our pointers by metadata
896  // compatibility and potential for underlying-object overlap. As a result, we
897  // only need to check for potential pointer dependencies within each alias
898  // set.
899  for (const auto &AS : AST) {
900  // Note that both the alias-set tracker and the alias sets themselves used
901  // linked lists internally and so the iteration order here is deterministic
902  // (matching the original instruction order within each set).
903 
904  bool SetHasWrite = false;
905 
906  // Map of pointers to last access encountered.
907  typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
908  UnderlyingObjToAccessMap ObjToLastAccess;
909 
910  // Set of access to check after all writes have been processed.
911  PtrAccessSet DeferredAccesses;
912 
913  // Iterate over each alias set twice, once to process read/write pointers,
914  // and then to process read-only pointers.
915  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
916  bool UseDeferred = SetIteration > 0;
917  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
918 
919  for (const auto &AV : AS) {
920  Value *Ptr = AV.getValue();
921 
922  // For a single memory access in AliasSetTracker, Accesses may contain
923  // both read and write, and they both need to be handled for CheckDeps.
924  for (const auto &AC : S) {
925  if (AC.getPointer() != Ptr)
926  continue;
927 
928  bool IsWrite = AC.getInt();
929 
930  // If we're using the deferred access set, then it contains only
931  // reads.
932  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
933  if (UseDeferred && !IsReadOnlyPtr)
934  continue;
935  // Otherwise, the pointer must be in the PtrAccessSet, either as a
936  // read or a write.
937  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
938  S.count(MemAccessInfo(Ptr, false))) &&
939  "Alias-set pointer not in the access set?");
940 
941  MemAccessInfo Access(Ptr, IsWrite);
942  DepCands.insert(Access);
943 
944  // Memorize read-only pointers for later processing and skip them in
945  // the first round (they need to be checked after we have seen all
946  // write pointers). Note: we also mark pointer that are not
947  // consecutive as "read-only" pointers (so that we check
948  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
949  if (!UseDeferred && IsReadOnlyPtr) {
950  DeferredAccesses.insert(Access);
951  continue;
952  }
953 
954  // If this is a write - check other reads and writes for conflicts. If
955  // this is a read only check other writes for conflicts (but only if
956  // there is no other write to the ptr - this is an optimization to
957  // catch "a[i] = a[i] + " without having to do a dependence check).
958  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
959  CheckDeps.push_back(Access);
960  IsRTCheckAnalysisNeeded = true;
961  }
962 
963  if (IsWrite)
964  SetHasWrite = true;
965 
966  // Create sets of pointers connected by a shared alias set and
967  // underlying object.
968  typedef SmallVector<const Value *, 16> ValueVector;
969  ValueVector TempObjects;
970 
971  getUnderlyingObjects(Ptr, TempObjects, LI);
972  LLVM_DEBUG(dbgs()
973  << "Underlying objects for pointer " << *Ptr << "\n");
974  for (const Value *UnderlyingObj : TempObjects) {
975  // nullptr never alias, don't join sets for pointer that have "null"
976  // in their UnderlyingObjects list.
977  if (isa<ConstantPointerNull>(UnderlyingObj) &&
979  TheLoop->getHeader()->getParent(),
980  UnderlyingObj->getType()->getPointerAddressSpace()))
981  continue;
982 
983  UnderlyingObjToAccessMap::iterator Prev =
984  ObjToLastAccess.find(UnderlyingObj);
985  if (Prev != ObjToLastAccess.end())
986  DepCands.unionSets(Access, Prev->second);
987 
988  ObjToLastAccess[UnderlyingObj] = Access;
989  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
990  }
991  }
992  }
993  }
994  }
995 }
996 
997 static bool isInBoundsGep(Value *Ptr) {
998  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
999  return GEP->isInBounds();
1000  return false;
1001 }
1002 
1003 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
1004 /// i.e. monotonically increasing/decreasing.
1005 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
1006  PredicatedScalarEvolution &PSE, const Loop *L) {
1007  // FIXME: This should probably only return true for NUW.
1009  return true;
1010 
1011  // Scalar evolution does not propagate the non-wrapping flags to values that
1012  // are derived from a non-wrapping induction variable because non-wrapping
1013  // could be flow-sensitive.
1014  //
1015  // Look through the potentially overflowing instruction to try to prove
1016  // non-wrapping for the *specific* value of Ptr.
1017 
1018  // The arithmetic implied by an inbounds GEP can't overflow.
1019  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1020  if (!GEP || !GEP->isInBounds())
1021  return false;
1022 
1023  // Make sure there is only one non-const index and analyze that.
1024  Value *NonConstIndex = nullptr;
1025  for (Value *Index : GEP->indices())
1026  if (!isa<ConstantInt>(Index)) {
1027  if (NonConstIndex)
1028  return false;
1029  NonConstIndex = Index;
1030  }
1031  if (!NonConstIndex)
1032  // The recurrence is on the pointer, ignore for now.
1033  return false;
1034 
1035  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
1036  // AddRec using a NSW operation.
1037  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1038  if (OBO->hasNoSignedWrap() &&
1039  // Assume constant for other the operand so that the AddRec can be
1040  // easily found.
1041  isa<ConstantInt>(OBO->getOperand(1))) {
1042  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
1043 
1044  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1045  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
1046  }
1047 
1048  return false;
1049 }
1050 
1051 /// Check whether the access through \p Ptr has a constant stride.
1053  Value *Ptr, const Loop *Lp,
1054  const ValueToValueMap &StridesMap, bool Assume,
1055  bool ShouldCheckWrap) {
1056  Type *Ty = Ptr->getType();
1057  assert(Ty->isPointerTy() && "Unexpected non-ptr");
1058 
1059  if (isa<ScalableVectorType>(AccessTy)) {
1060  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
1061  << "\n");
1062  return 0;
1063  }
1064 
1065  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1066 
1067  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1068  if (Assume && !AR)
1069  AR = PSE.getAsAddRec(Ptr);
1070 
1071  if (!AR) {
1072  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1073  << " SCEV: " << *PtrScev << "\n");
1074  return 0;
1075  }
1076 
1077  // The access function must stride over the innermost loop.
1078  if (Lp != AR->getLoop()) {
1079  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1080  << *Ptr << " SCEV: " << *AR << "\n");
1081  return 0;
1082  }
1083 
1084  // The address calculation must not wrap. Otherwise, a dependence could be
1085  // inverted.
1086  // An inbounds getelementptr that is a AddRec with a unit stride
1087  // cannot wrap per definition. The unit stride requirement is checked later.
1088  // An getelementptr without an inbounds attribute and unit stride would have
1089  // to access the pointer value "0" which is undefined behavior in address
1090  // space 0, therefore we can also vectorize this case.
1091  unsigned AddrSpace = Ty->getPointerAddressSpace();
1092  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1093  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1095  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1096  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1097  NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
1098  if (Assume) {
1100  IsNoWrapAddRec = true;
1101  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1102  << "LAA: Pointer: " << *Ptr << "\n"
1103  << "LAA: SCEV: " << *AR << "\n"
1104  << "LAA: Added an overflow assumption\n");
1105  } else {
1106  LLVM_DEBUG(
1107  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1108  << *Ptr << " SCEV: " << *AR << "\n");
1109  return 0;
1110  }
1111  }
1112 
1113  // Check the step is constant.
1114  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1115 
1116  // Calculate the pointer stride and check if it is constant.
1117  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1118  if (!C) {
1119  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1120  << " SCEV: " << *AR << "\n");
1121  return 0;
1122  }
1123 
1124  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1125  TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1126  int64_t Size = AllocSize.getFixedSize();
1127  const APInt &APStepVal = C->getAPInt();
1128 
1129  // Huge step value - give up.
1130  if (APStepVal.getBitWidth() > 64)
1131  return 0;
1132 
1133  int64_t StepVal = APStepVal.getSExtValue();
1134 
1135  // Strided access.
1136  int64_t Stride = StepVal / Size;
1137  int64_t Rem = StepVal % Size;
1138  if (Rem)
1139  return 0;
1140 
1141  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1142  // know we can't "wrap around the address space". In case of address space
1143  // zero we know that this won't happen without triggering undefined behavior.
1144  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1145  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1146  AddrSpace))) {
1147  if (Assume) {
1148  // We can avoid this case by adding a run-time check.
1149  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1150  << "inbounds or in address space 0 may wrap:\n"
1151  << "LAA: Pointer: " << *Ptr << "\n"
1152  << "LAA: SCEV: " << *AR << "\n"
1153  << "LAA: Added an overflow assumption\n");
1155  } else
1156  return 0;
1157  }
1158 
1159  return Stride;
1160 }
1161 
1163  Value *PtrB, const DataLayout &DL,
1164  ScalarEvolution &SE, bool StrictCheck,
1165  bool CheckType) {
1166  assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1167  assert(cast<PointerType>(PtrA->getType())
1168  ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
1169  assert(cast<PointerType>(PtrB->getType())
1170  ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
1171 
1172  // Make sure that A and B are different pointers.
1173  if (PtrA == PtrB)
1174  return 0;
1175 
1176  // Make sure that the element types are the same if required.
1177  if (CheckType && ElemTyA != ElemTyB)
1178  return None;
1179 
1180  unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1181  unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1182 
1183  // Check that the address spaces match.
1184  if (ASA != ASB)
1185  return None;
1186  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1187 
1188  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1189  Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1190  Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1191 
1192  int Val;
1193  if (PtrA1 == PtrB1) {
1194  // Retrieve the address space again as pointer stripping now tracks through
1195  // `addrspacecast`.
1196  ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1197  ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1198  // Check that the address spaces match and that the pointers are valid.
1199  if (ASA != ASB)
1200  return None;
1201 
1202  IdxWidth = DL.getIndexSizeInBits(ASA);
1203  OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1204  OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1205 
1206  OffsetB -= OffsetA;
1207  Val = OffsetB.getSExtValue();
1208  } else {
1209  // Otherwise compute the distance with SCEV between the base pointers.
1210  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1211  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1212  const auto *Diff =
1213  dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
1214  if (!Diff)
1215  return None;
1216  Val = Diff->getAPInt().getSExtValue();
1217  }
1218  int Size = DL.getTypeStoreSize(ElemTyA);
1219  int Dist = Val / Size;
1220 
1221  // Ensure that the calculated distance matches the type-based one after all
1222  // the bitcasts removal in the provided pointers.
1223  if (!StrictCheck || Dist * Size == Val)
1224  return Dist;
1225  return None;
1226 }
1227 
1229  const DataLayout &DL, ScalarEvolution &SE,
1230  SmallVectorImpl<unsigned> &SortedIndices) {
1232  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1233  "Expected list of pointer operands.");
1234  // Walk over the pointers, and map each of them to an offset relative to
1235  // first pointer in the array.
1236  Value *Ptr0 = VL[0];
1237 
1238  using DistOrdPair = std::pair<int64_t, int>;
1239  auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
1240  return L.first < R.first;
1241  };
1242  std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1243  Offsets.emplace(0, 0);
1244  int Cnt = 1;
1245  bool IsConsecutive = true;
1246  for (auto *Ptr : VL.drop_front()) {
1247  Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1248  /*StrictCheck=*/true);
1249  if (!Diff)
1250  return false;
1251 
1252  // Check if the pointer with the same offset is found.
1253  int64_t Offset = *Diff;
1254  auto Res = Offsets.emplace(Offset, Cnt);
1255  if (!Res.second)
1256  return false;
1257  // Consecutive order if the inserted element is the last one.
1258  IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
1259  ++Cnt;
1260  }
1261  SortedIndices.clear();
1262  if (!IsConsecutive) {
1263  // Fill SortedIndices array only if it is non-consecutive.
1264  SortedIndices.resize(VL.size());
1265  Cnt = 0;
1266  for (const std::pair<int64_t, int> &Pair : Offsets) {
1267  SortedIndices[Cnt] = Pair.second;
1268  ++Cnt;
1269  }
1270  }
1271  return true;
1272 }
1273 
1274 /// Returns true if the memory operations \p A and \p B are consecutive.
1276  ScalarEvolution &SE, bool CheckType) {
1277  Value *PtrA = getLoadStorePointerOperand(A);
1279  if (!PtrA || !PtrB)
1280  return false;
1281  Type *ElemTyA = getLoadStoreType(A);
1282  Type *ElemTyB = getLoadStoreType(B);
1283  Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1284  /*StrictCheck=*/true, CheckType);
1285  return Diff && *Diff == 1;
1286 }
1287 
1289  visitPointers(SI->getPointerOperand(), *InnermostLoop,
1290  [this, SI](Value *Ptr) {
1291  Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1292  InstMap.push_back(SI);
1293  ++AccessIdx;
1294  });
1295 }
1296 
1298  visitPointers(LI->getPointerOperand(), *InnermostLoop,
1299  [this, LI](Value *Ptr) {
1300  Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1301  InstMap.push_back(LI);
1302  ++AccessIdx;
1303  });
1304 }
1305 
1308  switch (Type) {
1309  case NoDep:
1310  case Forward:
1311  case BackwardVectorizable:
1313 
1314  case Unknown:
1317  case Backward:
1320  }
1321  llvm_unreachable("unexpected DepType!");
1322 }
1323 
1325  switch (Type) {
1326  case NoDep:
1327  case Forward:
1328  case ForwardButPreventsForwarding:
1329  case Unknown:
1330  return false;
1331 
1332  case BackwardVectorizable:
1333  case Backward:
1334  case BackwardVectorizableButPreventsForwarding:
1335  return true;
1336  }
1337  llvm_unreachable("unexpected DepType!");
1338 }
1339 
1341  return isBackward() || Type == Unknown;
1342 }
1343 
1345  switch (Type) {
1346  case Forward:
1347  case ForwardButPreventsForwarding:
1348  return true;
1349 
1350  case NoDep:
1351  case Unknown:
1352  case BackwardVectorizable:
1353  case Backward:
1354  case BackwardVectorizableButPreventsForwarding:
1355  return false;
1356  }
1357  llvm_unreachable("unexpected DepType!");
1358 }
1359 
1360 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1361  uint64_t TypeByteSize) {
1362  // If loads occur at a distance that is not a multiple of a feasible vector
1363  // factor store-load forwarding does not take place.
1364  // Positive dependences might cause troubles because vectorizing them might
1365  // prevent store-load forwarding making vectorized code run a lot slower.
1366  // a[i] = a[i-3] ^ a[i-8];
1367  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1368  // hence on your typical architecture store-load forwarding does not take
1369  // place. Vectorizing in such cases does not make sense.
1370  // Store-load forwarding distance.
1371 
1372  // After this many iterations store-to-load forwarding conflicts should not
1373  // cause any slowdowns.
1374  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1375  // Maximum vector factor.
1376  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1377  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1378 
1379  // Compute the smallest VF at which the store and load would be misaligned.
1380  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1381  VF *= 2) {
1382  // If the number of vector iteration between the store and the load are
1383  // small we could incur conflicts.
1384  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1385  MaxVFWithoutSLForwardIssues = (VF >> 1);
1386  break;
1387  }
1388  }
1389 
1390  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1391  LLVM_DEBUG(
1392  dbgs() << "LAA: Distance " << Distance
1393  << " that could cause a store-load forwarding conflict\n");
1394  return true;
1395  }
1396 
1397  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1398  MaxVFWithoutSLForwardIssues !=
1399  VectorizerParams::MaxVectorWidth * TypeByteSize)
1400  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1401  return false;
1402 }
1403 
1404 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1405  if (Status < S)
1406  Status = S;
1407 }
1408 
1409 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1410 /// memory accesses, that have the same stride whose absolute value is given
1411 /// in \p Stride, and that have the same type size \p TypeByteSize,
1412 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1413 /// possible to prove statically that the dependence distance is larger
1414 /// than the range that the accesses will travel through the execution of
1415 /// the loop. If so, return true; false otherwise. This is useful for
1416 /// example in loops such as the following (PR31098):
1417 /// for (i = 0; i < D; ++i) {
1418 /// = out[i];
1419 /// out[i+D] =
1420 /// }
1422  const SCEV &BackedgeTakenCount,
1423  const SCEV &Dist, uint64_t Stride,
1424  uint64_t TypeByteSize) {
1425 
1426  // If we can prove that
1427  // (**) |Dist| > BackedgeTakenCount * Step
1428  // where Step is the absolute stride of the memory accesses in bytes,
1429  // then there is no dependence.
1430  //
1431  // Rationale:
1432  // We basically want to check if the absolute distance (|Dist/Step|)
1433  // is >= the loop iteration count (or > BackedgeTakenCount).
1434  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1435  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1436  // that the dependence distance is >= VF; This is checked elsewhere.
1437  // But in some cases we can prune unknown dependence distances early, and
1438  // even before selecting the VF, and without a runtime test, by comparing
1439  // the distance against the loop iteration count. Since the vectorized code
1440  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1441  // also guarantees that distance >= VF.
1442  //
1443  const uint64_t ByteStride = Stride * TypeByteSize;
1444  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1445  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1446 
1447  const SCEV *CastedDist = &Dist;
1448  const SCEV *CastedProduct = Product;
1449  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1450  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1451 
1452  // The dependence distance can be positive/negative, so we sign extend Dist;
1453  // The multiplication of the absolute stride in bytes and the
1454  // backedgeTakenCount is non-negative, so we zero extend Product.
1455  if (DistTypeSize > ProductTypeSize)
1456  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1457  else
1458  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1459 
1460  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1461  // (If so, then we have proven (**) because |Dist| >= Dist)
1462  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1463  if (SE.isKnownPositive(Minus))
1464  return true;
1465 
1466  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1467  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1468  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1469  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1470  if (SE.isKnownPositive(Minus))
1471  return true;
1472 
1473  return false;
1474 }
1475 
1476 /// Check the dependence for two accesses with the same stride \p Stride.
1477 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1478 /// bytes.
1479 ///
1480 /// \returns true if they are independent.
1481 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1482  uint64_t TypeByteSize) {
1483  assert(Stride > 1 && "The stride must be greater than 1");
1484  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1485  assert(Distance > 0 && "The distance must be non-zero");
1486 
1487  // Skip if the distance is not multiple of type byte size.
1488  if (Distance % TypeByteSize)
1489  return false;
1490 
1491  uint64_t ScaledDist = Distance / TypeByteSize;
1492 
1493  // No dependence if the scaled distance is not multiple of the stride.
1494  // E.g.
1495  // for (i = 0; i < 1024 ; i += 4)
1496  // A[i+2] = A[i] + 1;
1497  //
1498  // Two accesses in memory (scaled distance is 2, stride is 4):
1499  // | A[0] | | | | A[4] | | | |
1500  // | | | A[2] | | | | A[6] | |
1501  //
1502  // E.g.
1503  // for (i = 0; i < 1024 ; i += 3)
1504  // A[i+4] = A[i] + 1;
1505  //
1506  // Two accesses in memory (scaled distance is 4, stride is 3):
1507  // | A[0] | | | A[3] | | | A[6] | | |
1508  // | | | | | A[4] | | | A[7] | |
1509  return ScaledDist % Stride;
1510 }
1511 
1513 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1514  const MemAccessInfo &B, unsigned BIdx,
1515  const ValueToValueMap &Strides) {
1516  assert (AIdx < BIdx && "Must pass arguments in program order");
1517 
1518  Value *APtr = A.getPointer();
1519  Value *BPtr = B.getPointer();
1520  bool AIsWrite = A.getInt();
1521  bool BIsWrite = B.getInt();
1522  Type *ATy = APtr->getType()->getPointerElementType();
1523  Type *BTy = BPtr->getType()->getPointerElementType();
1524 
1525  // Two reads are independent.
1526  if (!AIsWrite && !BIsWrite)
1527  return Dependence::NoDep;
1528 
1529  // We cannot check pointers in different address spaces.
1530  if (APtr->getType()->getPointerAddressSpace() !=
1531  BPtr->getType()->getPointerAddressSpace())
1532  return Dependence::Unknown;
1533 
1534  int64_t StrideAPtr =
1535  getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
1536  int64_t StrideBPtr =
1537  getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
1538 
1539  const SCEV *Src = PSE.getSCEV(APtr);
1540  const SCEV *Sink = PSE.getSCEV(BPtr);
1541 
1542  // If the induction step is negative we have to invert source and sink of the
1543  // dependence.
1544  if (StrideAPtr < 0) {
1545  std::swap(APtr, BPtr);
1546  std::swap(ATy, BTy);
1547  std::swap(Src, Sink);
1548  std::swap(AIsWrite, BIsWrite);
1549  std::swap(AIdx, BIdx);
1550  std::swap(StrideAPtr, StrideBPtr);
1551  }
1552 
1553  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1554 
1555  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1556  << "(Induction step: " << StrideAPtr << ")\n");
1557  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1558  << *InstMap[BIdx] << ": " << *Dist << "\n");
1559 
1560  // Need accesses with constant stride. We don't want to vectorize
1561  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1562  // the address space.
1563  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1564  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1565  return Dependence::Unknown;
1566  }
1567 
1568  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1569  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1570  bool HasSameSize =
1571  DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
1572  uint64_t Stride = std::abs(StrideAPtr);
1573  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1574  if (!C) {
1575  if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
1576  isSafeDependenceDistance(DL, *(PSE.getSE()),
1577  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1578  TypeByteSize))
1579  return Dependence::NoDep;
1580 
1581  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1582  FoundNonConstantDistanceDependence = true;
1583  return Dependence::Unknown;
1584  }
1585 
1586  const APInt &Val = C->getAPInt();
1587  int64_t Distance = Val.getSExtValue();
1588 
1589  // Attempt to prove strided accesses independent.
1590  if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
1591  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1592  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1593  return Dependence::NoDep;
1594  }
1595 
1596  // Negative distances are not plausible dependencies.
1597  if (Val.isNegative()) {
1598  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1599  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1600  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1601  !HasSameSize)) {
1602  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1604  }
1605 
1606  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1607  return Dependence::Forward;
1608  }
1609 
1610  // Write to the same location with the same size.
1611  if (Val == 0) {
1612  if (HasSameSize)
1613  return Dependence::Forward;
1614  LLVM_DEBUG(
1615  dbgs() << "LAA: Zero dependence difference but different type sizes\n");
1616  return Dependence::Unknown;
1617  }
1618 
1619  assert(Val.isStrictlyPositive() && "Expect a positive value");
1620 
1621  if (!HasSameSize) {
1622  LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
1623  "different type sizes\n");
1624  return Dependence::Unknown;
1625  }
1626 
1627  // Bail out early if passed-in parameters make vectorization not feasible.
1628  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1630  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1632  // The minimum number of iterations for a vectorized/unrolled version.
1633  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1634 
1635  // It's not vectorizable if the distance is smaller than the minimum distance
1636  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1637  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1638  // TypeByteSize (No need to plus the last gap distance).
1639  //
1640  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1641  // foo(int *A) {
1642  // int *B = (int *)((char *)A + 14);
1643  // for (i = 0 ; i < 1024 ; i += 2)
1644  // B[i] = A[i] + 1;
1645  // }
1646  //
1647  // Two accesses in memory (stride is 2):
1648  // | A[0] | | A[2] | | A[4] | | A[6] | |
1649  // | B[0] | | B[2] | | B[4] |
1650  //
1651  // Distance needs for vectorizing iterations except the last iteration:
1652  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1653  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1654  //
1655  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1656  // 12, which is less than distance.
1657  //
1658  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1659  // the minimum distance needed is 28, which is greater than distance. It is
1660  // not safe to do vectorization.
1661  uint64_t MinDistanceNeeded =
1662  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1663  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1664  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1665  << Distance << '\n');
1666  return Dependence::Backward;
1667  }
1668 
1669  // Unsafe if the minimum distance needed is greater than max safe distance.
1670  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1671  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1672  << MinDistanceNeeded << " size in bytes");
1673  return Dependence::Backward;
1674  }
1675 
1676  // Positive distance bigger than max vectorization factor.
1677  // FIXME: Should use max factor instead of max distance in bytes, which could
1678  // not handle different types.
1679  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1680  // void foo (int *A, char *B) {
1681  // for (unsigned i = 0; i < 1024; i++) {
1682  // A[i+2] = A[i] + 1;
1683  // B[i+2] = B[i] + 1;
1684  // }
1685  // }
1686  //
1687  // This case is currently unsafe according to the max safe distance. If we
1688  // analyze the two accesses on array B, the max safe dependence distance
1689  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1690  // is 8, which is less than 2 and forbidden vectorization, But actually
1691  // both A and B could be vectorized by 2 iterations.
1692  MaxSafeDepDistBytes =
1693  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1694 
1695  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1696  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1697  couldPreventStoreLoadForward(Distance, TypeByteSize))
1699 
1700  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1701  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1702  << " with max VF = " << MaxVF << '\n');
1703  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1704  MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
1706 }
1707 
1709  MemAccessInfoList &CheckDeps,
1710  const ValueToValueMap &Strides) {
1711 
1712  MaxSafeDepDistBytes = -1;
1714  for (MemAccessInfo CurAccess : CheckDeps) {
1715  if (Visited.count(CurAccess))
1716  continue;
1717 
1718  // Get the relevant memory access set.
1720  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1721 
1722  // Check accesses within this set.
1724  AccessSets.member_begin(I);
1726  AccessSets.member_end();
1727 
1728  // Check every access pair.
1729  while (AI != AE) {
1730  Visited.insert(*AI);
1731  bool AIIsWrite = AI->getInt();
1732  // Check loads only against next equivalent class, but stores also against
1733  // other stores in the same equivalence class - to the same address.
1735  (AIIsWrite ? AI : std::next(AI));
1736  while (OI != AE) {
1737  // Check every accessing instruction pair in program order.
1738  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1739  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1740  // Scan all accesses of another equivalence class, but only the next
1741  // accesses of the same equivalent class.
1742  for (std::vector<unsigned>::iterator
1743  I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
1744  I2E = (OI == AI ? I1E : Accesses[*OI].end());
1745  I2 != I2E; ++I2) {
1746  auto A = std::make_pair(&*AI, *I1);
1747  auto B = std::make_pair(&*OI, *I2);
1748 
1749  assert(*I1 != *I2);
1750  if (*I1 > *I2)
1751  std::swap(A, B);
1752 
1754  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1755  mergeInStatus(Dependence::isSafeForVectorization(Type));
1756 
1757  // Gather dependences unless we accumulated MaxDependences
1758  // dependences. In that case return as soon as we find the first
1759  // unsafe dependence. This puts a limit on this quadratic
1760  // algorithm.
1761  if (RecordDependences) {
1762  if (Type != Dependence::NoDep)
1763  Dependences.push_back(Dependence(A.second, B.second, Type));
1764 
1765  if (Dependences.size() >= MaxDependences) {
1766  RecordDependences = false;
1767  Dependences.clear();
1768  LLVM_DEBUG(dbgs()
1769  << "Too many dependences, stopped recording\n");
1770  }
1771  }
1772  if (!RecordDependences && !isSafeForVectorization())
1773  return false;
1774  }
1775  ++OI;
1776  }
1777  AI++;
1778  }
1779  }
1780 
1781  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1782  return isSafeForVectorization();
1783 }
1784 
1787  MemAccessInfo Access(Ptr, isWrite);
1788  auto &IndexVector = Accesses.find(Access)->second;
1789 
1791  transform(IndexVector,
1792  std::back_inserter(Insts),
1793  [&](unsigned Idx) { return this->InstMap[Idx]; });
1794  return Insts;
1795 }
1796 
1797 const char *MemoryDepChecker::Dependence::DepName[] = {
1798  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1799  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1800 
1802  raw_ostream &OS, unsigned Depth,
1803  const SmallVectorImpl<Instruction *> &Instrs) const {
1804  OS.indent(Depth) << DepName[Type] << ":\n";
1805  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1806  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1807 }
1808 
1809 bool LoopAccessInfo::canAnalyzeLoop() {
1810  // We need to have a loop header.
1811  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1812  << TheLoop->getHeader()->getParent()->getName() << ": "
1813  << TheLoop->getHeader()->getName() << '\n');
1814 
1815  // We can only analyze innermost loops.
1816  if (!TheLoop->isInnermost()) {
1817  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1818  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1819  return false;
1820  }
1821 
1822  // We must have a single backedge.
1823  if (TheLoop->getNumBackEdges() != 1) {
1824  LLVM_DEBUG(
1825  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1826  recordAnalysis("CFGNotUnderstood")
1827  << "loop control flow is not understood by analyzer";
1828  return false;
1829  }
1830 
1831  // ScalarEvolution needs to be able to find the exit count.
1832  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1833  if (isa<SCEVCouldNotCompute>(ExitCount)) {
1834  recordAnalysis("CantComputeNumberOfIterations")
1835  << "could not determine number of loop iterations";
1836  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1837  return false;
1838  }
1839 
1840  return true;
1841 }
1842 
1843 void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
1844  const TargetLibraryInfo *TLI,
1845  DominatorTree *DT) {
1846  typedef SmallPtrSet<Value*, 16> ValueSet;
1847 
1848  // Holds the Load and Store instructions.
1851 
1852  // Holds all the different accesses in the loop.
1853  unsigned NumReads = 0;
1854  unsigned NumReadWrites = 0;
1855 
1856  bool HasComplexMemInst = false;
1857 
1858  // A runtime check is only legal to insert if there are no convergent calls.
1859  HasConvergentOp = false;
1860 
1861  PtrRtChecking->Pointers.clear();
1862  PtrRtChecking->Need = false;
1863 
1864  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1865 
1866  const bool EnableMemAccessVersioningOfLoop =
1868  !TheLoop->getHeader()->getParent()->hasOptSize();
1869 
1870  // For each block.
1871  for (BasicBlock *BB : TheLoop->blocks()) {
1872  // Scan the BB and collect legal loads and stores. Also detect any
1873  // convergent instructions.
1874  for (Instruction &I : *BB) {
1875  if (auto *Call = dyn_cast<CallBase>(&I)) {
1876  if (Call->isConvergent())
1877  HasConvergentOp = true;
1878  }
1879 
1880  // With both a non-vectorizable memory instruction and a convergent
1881  // operation, found in this loop, no reason to continue the search.
1882  if (HasComplexMemInst && HasConvergentOp) {
1883  CanVecMem = false;
1884  return;
1885  }
1886 
1887  // Avoid hitting recordAnalysis multiple times.
1888  if (HasComplexMemInst)
1889  continue;
1890 
1891  // If this is a load, save it. If this instruction can read from memory
1892  // but is not a load, then we quit. Notice that we don't handle function
1893  // calls that read or write.
1894  if (I.mayReadFromMemory()) {
1895  // Many math library functions read the rounding mode. We will only
1896  // vectorize a loop if it contains known function calls that don't set
1897  // the flag. Therefore, it is safe to ignore this read from memory.
1898  auto *Call = dyn_cast<CallInst>(&I);
1899  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1900  continue;
1901 
1902  // If the function has an explicit vectorized counterpart, we can safely
1903  // assume that it can be vectorized.
1904  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1905  !VFDatabase::getMappings(*Call).empty())
1906  continue;
1907 
1908  auto *Ld = dyn_cast<LoadInst>(&I);
1909  if (!Ld) {
1910  recordAnalysis("CantVectorizeInstruction", Ld)
1911  << "instruction cannot be vectorized";
1912  HasComplexMemInst = true;
1913  continue;
1914  }
1915  if (!Ld->isSimple() && !IsAnnotatedParallel) {
1916  recordAnalysis("NonSimpleLoad", Ld)
1917  << "read with atomic ordering or volatile read";
1918  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1919  HasComplexMemInst = true;
1920  continue;
1921  }
1922  NumLoads++;
1923  Loads.push_back(Ld);
1924  DepChecker->addAccess(Ld);
1925  if (EnableMemAccessVersioningOfLoop)
1926  collectStridedAccess(Ld);
1927  continue;
1928  }
1929 
1930  // Save 'store' instructions. Abort if other instructions write to memory.
1931  if (I.mayWriteToMemory()) {
1932  auto *St = dyn_cast<StoreInst>(&I);
1933  if (!St) {
1934  recordAnalysis("CantVectorizeInstruction", St)
1935  << "instruction cannot be vectorized";
1936  HasComplexMemInst = true;
1937  continue;
1938  }
1939  if (!St->isSimple() && !IsAnnotatedParallel) {
1940  recordAnalysis("NonSimpleStore", St)
1941  << "write with atomic ordering or volatile write";
1942  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1943  HasComplexMemInst = true;
1944  continue;
1945  }
1946  NumStores++;
1947  Stores.push_back(St);
1948  DepChecker->addAccess(St);
1949  if (EnableMemAccessVersioningOfLoop)
1950  collectStridedAccess(St);
1951  }
1952  } // Next instr.
1953  } // Next block.
1954 
1955  if (HasComplexMemInst) {
1956  CanVecMem = false;
1957  return;
1958  }
1959 
1960  // Now we have two lists that hold the loads and the stores.
1961  // Next, we find the pointers that they use.
1962 
1963  // Check if we see any stores. If there are no stores, then we don't
1964  // care if the pointers are *restrict*.
1965  if (!Stores.size()) {
1966  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1967  CanVecMem = true;
1968  return;
1969  }
1970 
1971  MemoryDepChecker::DepCandidates DependentAccesses;
1972  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
1973 
1974  // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
1975  // multiple times on the same object. If the ptr is accessed twice, once
1976  // for read and once for write, it will only appear once (on the write
1977  // list). This is okay, since we are going to check for conflicts between
1978  // writes and between reads and writes, but not between reads and reads.
1979  ValueSet Seen;
1980 
1981  // Record uniform store addresses to identify if we have multiple stores
1982  // to the same address.
1983  ValueSet UniformStores;
1984 
1985  for (StoreInst *ST : Stores) {
1986  Value *Ptr = ST->getPointerOperand();
1987 
1988  if (isUniform(Ptr))
1989  HasDependenceInvolvingLoopInvariantAddress |=
1990  !UniformStores.insert(Ptr).second;
1991 
1992  // If we did *not* see this pointer before, insert it to the read-write
1993  // list. At this phase it is only a 'write' list.
1994  if (Seen.insert(Ptr).second) {
1995  ++NumReadWrites;
1996 
1998  // The TBAA metadata could have a control dependency on the predication
1999  // condition, so we cannot rely on it when determining whether or not we
2000  // need runtime pointer checks.
2001  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2002  Loc.AATags.TBAA = nullptr;
2003 
2004  visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2005  [&Accesses, Loc](Value *Ptr) {
2006  MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2007  Accesses.addStore(NewLoc);
2008  });
2009  }
2010  }
2011 
2012  if (IsAnnotatedParallel) {
2013  LLVM_DEBUG(
2014  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2015  << "checks.\n");
2016  CanVecMem = true;
2017  return;
2018  }
2019 
2020  for (LoadInst *LD : Loads) {
2021  Value *Ptr = LD->getPointerOperand();
2022  // If we did *not* see this pointer before, insert it to the
2023  // read list. If we *did* see it before, then it is already in
2024  // the read-write list. This allows us to vectorize expressions
2025  // such as A[i] += x; Because the address of A[i] is a read-write
2026  // pointer. This only works if the index of A[i] is consecutive.
2027  // If the address of i is unknown (for example A[B[i]]) then we may
2028  // read a few words, modify, and write a few words, and some of the
2029  // words may be written to the same address.
2030  bool IsReadOnlyPtr = false;
2031  if (Seen.insert(Ptr).second ||
2032  !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
2033  ++NumReads;
2034  IsReadOnlyPtr = true;
2035  }
2036 
2037  // See if there is an unsafe dependency between a load to a uniform address and
2038  // store to the same uniform address.
2039  if (UniformStores.count(Ptr)) {
2040  LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2041  "load and uniform store to the same address!\n");
2042  HasDependenceInvolvingLoopInvariantAddress = true;
2043  }
2044 
2046  // The TBAA metadata could have a control dependency on the predication
2047  // condition, so we cannot rely on it when determining whether or not we
2048  // need runtime pointer checks.
2049  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2050  Loc.AATags.TBAA = nullptr;
2051 
2052  visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2053  [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
2054  MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2055  Accesses.addLoad(NewLoc, IsReadOnlyPtr);
2056  });
2057  }
2058 
2059  // If we write (or read-write) to a single destination and there are no
2060  // other reads in this loop then is it safe to vectorize.
2061  if (NumReadWrites == 1 && NumReads == 0) {
2062  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2063  CanVecMem = true;
2064  return;
2065  }
2066 
2067  // Build dependence sets and check whether we need a runtime pointer bounds
2068  // check.
2069  Accesses.buildDependenceSets();
2070 
2071  // Find pointers with computable bounds. We are going to use this information
2072  // to place a runtime bound check.
2073  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
2074  TheLoop, SymbolicStrides);
2075  if (!CanDoRTIfNeeded) {
2076  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
2077  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2078  << "the array bounds.\n");
2079  CanVecMem = false;
2080  return;
2081  }
2082 
2083  LLVM_DEBUG(
2084  dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2085 
2086  CanVecMem = true;
2087  if (Accesses.isDependencyCheckNeeded()) {
2088  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2089  CanVecMem = DepChecker->areDepsSafe(
2090  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2091  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2092 
2093  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2094  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2095 
2096  // Clear the dependency checks. We assume they are not needed.
2097  Accesses.resetDepChecks(*DepChecker);
2098 
2099  PtrRtChecking->reset();
2100  PtrRtChecking->Need = true;
2101 
2102  auto *SE = PSE->getSE();
2103  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
2104  SymbolicStrides, true);
2105 
2106  // Check that we found the bounds for the pointer.
2107  if (!CanDoRTIfNeeded) {
2108  recordAnalysis("CantCheckMemDepsAtRunTime")
2109  << "cannot check memory dependencies at runtime";
2110  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2111  CanVecMem = false;
2112  return;
2113  }
2114 
2115  CanVecMem = true;
2116  }
2117  }
2118 
2119  if (HasConvergentOp) {
2120  recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2121  << "cannot add control dependency to convergent operation";
2122  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2123  "would be needed with a convergent operation\n");
2124  CanVecMem = false;
2125  return;
2126  }
2127 
2128  if (CanVecMem)
2129  LLVM_DEBUG(
2130  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2131  << (PtrRtChecking->Need ? "" : " don't")
2132  << " need runtime memory checks.\n");
2133  else {
2134  recordAnalysis("UnsafeMemDep")
2135  << "unsafe dependent memory operations in loop. Use "
2136  "#pragma loop distribute(enable) to allow loop distribution "
2137  "to attempt to isolate the offending operations into a separate "
2138  "loop";
2139  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2140  }
2141 }
2142 
2144  DominatorTree *DT) {
2145  assert(TheLoop->contains(BB) && "Unknown block used");
2146 
2147  // Blocks that do not dominate the latch need predication.
2148  BasicBlock* Latch = TheLoop->getLoopLatch();
2149  return !DT->dominates(BB, Latch);
2150 }
2151 
2152 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2153  Instruction *I) {
2154  assert(!Report && "Multiple reports generated");
2155 
2156  Value *CodeRegion = TheLoop->getHeader();
2157  DebugLoc DL = TheLoop->getStartLoc();
2158 
2159  if (I) {
2160  CodeRegion = I->getParent();
2161  // If there is no debug location attached to the instruction, revert back to
2162  // using the loop's.
2163  if (I->getDebugLoc())
2164  DL = I->getDebugLoc();
2165  }
2166 
2167  Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2168  CodeRegion);
2169  return *Report;
2170 }
2171 
2173  auto *SE = PSE->getSE();
2174  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2175  // never considered uniform.
2176  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2177  // trivially loop-invariant FP values to be considered uniform.
2178  if (!SE->isSCEVable(V->getType()))
2179  return false;
2180  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2181 }
2182 
2183 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2184  Value *Ptr = getLoadStorePointerOperand(MemAccess);
2185  if (!Ptr)
2186  return;
2187 
2188  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2189  if (!Stride)
2190  return;
2191 
2192  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2193  "versioning:");
2194  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2195 
2196  // Avoid adding the "Stride == 1" predicate when we know that
2197  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2198  // or zero iteration loop, as Trip-Count <= Stride == 1.
2199  //
2200  // TODO: We are currently not making a very informed decision on when it is
2201  // beneficial to apply stride versioning. It might make more sense that the
2202  // users of this analysis (such as the vectorizer) will trigger it, based on
2203  // their specific cost considerations; For example, in cases where stride
2204  // versioning does not help resolving memory accesses/dependences, the
2205  // vectorizer should evaluate the cost of the runtime test, and the benefit
2206  // of various possible stride specializations, considering the alternatives
2207  // of using gather/scatters (if available).
2208 
2209  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2210  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2211 
2212  // Match the types so we can compare the stride and the BETakenCount.
2213  // The Stride can be positive/negative, so we sign extend Stride;
2214  // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2215  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2216  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2217  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2218  const SCEV *CastedStride = StrideExpr;
2219  const SCEV *CastedBECount = BETakenCount;
2220  ScalarEvolution *SE = PSE->getSE();
2221  if (BETypeSize >= StrideTypeSize)
2222  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2223  else
2224  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2225  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2226  // Since TripCount == BackEdgeTakenCount + 1, checking:
2227  // "Stride >= TripCount" is equivalent to checking:
2228  // Stride - BETakenCount > 0
2229  if (SE->isKnownPositive(StrideMinusBETaken)) {
2230  LLVM_DEBUG(
2231  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2232  "Stride==1 predicate will imply that the loop executes "
2233  "at most once.\n");
2234  return;
2235  }
2236  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2237 
2238  SymbolicStrides[Ptr] = Stride;
2239  StrideSet.insert(Stride);
2240 }
2241 
2243  const TargetLibraryInfo *TLI, AAResults *AA,
2244  DominatorTree *DT, LoopInfo *LI)
2245  : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2246  PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
2247  DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2248  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2249  HasConvergentOp(false),
2250  HasDependenceInvolvingLoopInvariantAddress(false) {
2251  if (canAnalyzeLoop())
2252  analyzeLoop(AA, LI, TLI, DT);
2253 }
2254 
2255 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2256  if (CanVecMem) {
2257  OS.indent(Depth) << "Memory dependences are safe";
2258  if (MaxSafeDepDistBytes != -1ULL)
2259  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2260  << " bytes";
2261  if (PtrRtChecking->Need)
2262  OS << " with run-time checks";
2263  OS << "\n";
2264  }
2265 
2266  if (HasConvergentOp)
2267  OS.indent(Depth) << "Has convergent operation in loop\n";
2268 
2269  if (Report)
2270  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2271 
2272  if (auto *Dependences = DepChecker->getDependences()) {
2273  OS.indent(Depth) << "Dependences:\n";
2274  for (auto &Dep : *Dependences) {
2275  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2276  OS << "\n";
2277  }
2278  } else
2279  OS.indent(Depth) << "Too many dependences, not recorded\n";
2280 
2281  // List the pair of accesses need run-time checks to prove independence.
2282  PtrRtChecking->print(OS, Depth);
2283  OS << "\n";
2284 
2285  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2286  << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2287  << "found in loop.\n";
2288 
2289  OS.indent(Depth) << "SCEV assumptions:\n";
2290  PSE->getUnionPredicate().print(OS, Depth);
2291 
2292  OS << "\n";
2293 
2294  OS.indent(Depth) << "Expressions re-written:\n";
2295  PSE->print(OS, Depth);
2296 }
2297 
2300 }
2301 
2303  auto &LAI = LoopAccessInfoMap[L];
2304 
2305  if (!LAI)
2306  LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2307 
2308  return *LAI.get();
2309 }
2310 
2312  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2313 
2314  for (Loop *TopLevelLoop : *LI)
2315  for (Loop *L : depth_first(TopLevelLoop)) {
2316  OS.indent(2) << L->getHeader()->getName() << ":\n";
2317  auto &LAI = LAA.getInfo(L);
2318  LAI.print(OS, 4);
2319  }
2320 }
2321 
2323  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2324  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2325  TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2326  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2327  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2328  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2329 
2330  return false;
2331 }
2332 
2338 
2339  AU.setPreservesAll();
2340 }
2341 
2343 static const char laa_name[] = "Loop Access Analysis";
2344 #define LAA_NAME "loop-accesses"
2345 
2352 
2354 
2357  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2358 }
2359 
2360 namespace llvm {
2361 
2363  return new LoopAccessLegacyAnalysis();
2364  }
2365 
2366 } // end namespace llvm
llvm::sortPtrAccesses
bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
Definition: LoopAccessAnalysis.cpp:1228
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::MemoryDepChecker::clearDependences
void clearDependences()
Definition: LoopAccessAnalysis.h:227
llvm::APInt::isStrictlyPositive
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:339
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::LoopAccessLegacyAnalysis::ID
static char ID
Definition: LoopAccessAnalysis.h:715
llvm::MemoryDepChecker::Dependence::isBackward
bool isBackward() const
Lexically backward dependence.
Definition: LoopAccessAnalysis.cpp:1324
llvm::MemoryDepChecker
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
Definition: LoopAccessAnalysis.h:86
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:713
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4385
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
getMinFromExprs
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Definition: LoopAccessAnalysis.cpp:271
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MemoryDepChecker::VectorizationSafetyStatus::Safe
@ Safe
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2172
llvm::EquivalenceClasses::getLeaderValue
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:189
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:217
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:130
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:425
llvm::RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup
RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
Definition: LoopAccessAnalysis.cpp:170
llvm::Function
Definition: Function.h:62
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:457
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
EnableMemAccessVersioning
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2170
High
uint64_t High
Definition: NVVMIntrRange.cpp:61
llvm::SmallVector< RuntimePointerCheck, 4 >
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:176
CheckType
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
Definition: SelectionDAGISel.cpp:2477
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1479
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:59
llvm::PredicatedScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
Definition: ScalarEvolution.cpp:13835
ErrorHandling.h
LoopAccessAnalysis.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:634
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:43
llvm::LoopAccessLegacyAnalysis::getInfo
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
Definition: LoopAccessAnalysis.cpp:2302
MaxDependences
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::SCEV::NoWrapMask
@ NoWrapMask
Definition: ScalarEvolution.h:137
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::MemoryDepChecker::Dependence::isPossiblyBackward
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Definition: LoopAccessAnalysis.cpp:1340
llvm::AliasSetTracker
Definition: AliasSetTracker.h:329
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:755
APInt.h
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5364
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1886
llvm::DenseMapIterator
Definition: DenseMap.h:56
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1052
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1412
llvm::RuntimePointerChecking::needsChecking
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition: LoopAccessAnalysis.cpp:260
Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:73
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1700
llvm::MemoryDepChecker::areDepsSafe
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
Definition: LoopAccessAnalysis.cpp:1708
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::isConsecutiveAccess
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
Definition: LoopAccessAnalysis.cpp:1275
llvm::Optional< int >
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::SmallPtrSet< Value *, 16 >
llvm::RuntimePointerChecking::getNumberOfChecks
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
Definition: LoopAccessAnalysis.h:436
Operator.h
llvm::EquivalenceClasses::insert
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
Definition: EquivalenceClasses.h:218
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:642
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:272
llvm::PredicatedScalarEvolution::getAsAddRec
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
Definition: ScalarEvolution.cpp:13897
llvm::MemoryDepChecker::Dependence::NoDep
@ NoDep
Definition: LoopAccessAnalysis.h:111
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
llvm::stripIntegerCast
Value * stripIntegerCast(Value *V)
Definition: LoopAccessAnalysis.cpp:136
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MemoryDepChecker::Dependence::DepType
DepType
The type of the dependence.
Definition: LoopAccessAnalysis.h:109
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
LoopAnalysisManager.h
llvm::MemoryDepChecker::Dependence::isForward
bool isForward() const
Lexically forward dependence.
Definition: LoopAccessAnalysis.cpp:1344
AliasAnalysis.h
PointerIntPair.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3037
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
Instruction.h
CommandLine.h
llvm::MemoryLocation::AATags
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Definition: MemoryLocation.h:230
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1355
llvm::LoopAccessLegacyAnalysis::print
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
Definition: LoopAccessAnalysis.cpp:2311
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1649
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:312
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:652
Constants.h
llvm::MemoryDepChecker::VectorizationSafetyStatus
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
Definition: LoopAccessAnalysis.h:96
llvm::AAResults
Definition: AliasAnalysis.h:507
isNoWrapAddRec
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
Definition: LoopAccessAnalysis.cpp:1005
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2143
llvm::MemoryDepChecker::Dependence::BackwardVectorizable
@ BackwardVectorizable
Definition: LoopAccessAnalysis.h:131
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::Loop::isAnnotatedParallel
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:567
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:204
llvm::ScalarEvolution::getUMaxExpr
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:4096
InstrTypes.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryDepChecker::Dependence::print
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Definition: LoopAccessAnalysis.cpp:1801
llvm::PredicatedScalarEvolution::hasNoOverflow
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
Definition: ScalarEvolution.cpp:13881
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
TargetLibraryInfo.h
llvm::RuntimePointerChecking::insert
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
Definition: LoopAccessAnalysis.cpp:192
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:166
llvm::Instruction
Definition: Instruction.h:45
isInBoundsGep
static bool isInBoundsGep(Value *Ptr)
Definition: LoopAccessAnalysis.cpp:997
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::MemoryDepChecker::addAccess
void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
Definition: LoopAccessAnalysis.cpp:1288
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1467
llvm::MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding
@ BackwardVectorizableButPreventsForwarding
Definition: LoopAccessAnalysis.h:133
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:607
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2139
DebugLoc.h
SmallPtrSet.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::getPointersDiff
Optional< int > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
Definition: LoopAccessAnalysis.cpp:1162
llvm::ScalarEvolution::getEqualPredicate
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:13489
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
VectorizationInterleave
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
llvm::RuntimePointerChecking::reset
void reset()
Reset the state of the pointer runtime information.
Definition: LoopAccessAnalysis.h:401
llvm::None
const NoneType None
Definition: None.h:23
Type.h
llvm::RuntimePointerChecking::PointerInfo::AliasSetId
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
Definition: LoopAccessAnalysis.h:386
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::MemoryDepChecker::getInstructionsForAccess
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
Definition: LoopAccessAnalysis.cpp:1786
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4339
isSafeDependenceDistance
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses,...
Definition: LoopAccessAnalysis.cpp:1421
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:223
Check
static bool Check(DecodeStatus &Out, DecodeStatus In)
Definition: AArch64Disassembler.cpp:248
llvm::RuntimePointerChecking::PointerInfo
Definition: LoopAccessAnalysis.h:371
VectorUtils.h
llvm::MemoryDepChecker::VectorizationSafetyStatus::Unsafe
@ Unsafe
llvm::initializeLoopAccessLegacyAnalysisPass
void initializeLoopAccessLegacyAnalysisPass(PassRegistry &)
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4325
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:202
llvm::RuntimePointerChecking
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Definition: LoopAccessAnalysis.h:367
llvm::VectorizerParams::VectorizationFactor
static unsigned VectorizationFactor
VF as overridden by the user.
Definition: LoopAccessAnalysis.h:41
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:447
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::MemoryDepChecker::Dependence
Dependece between memory access instructions.
Definition: LoopAccessAnalysis.h:107
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::PredicatedScalarEvolution::setNoOverflow
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
Definition: ScalarEvolution.cpp:13865
llvm::MemoryDepChecker::Dependence::Backward
@ Backward
Definition: LoopAccessAnalysis.h:128
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::RuntimePointerChecking::PointerInfo::DependencySetId
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
Definition: LoopAccessAnalysis.h:384
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::RuntimePointerChecking::CheckingGroups
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
Definition: LoopAccessAnalysis.h:453
laa_name
static const char laa_name[]
Definition: LoopAccessAnalysis.cpp:2343
MemoryLocation.h
llvm::DenseMap< const Value *, Value * >
llvm::SCEV::FlagNSW
@ FlagNSW
Definition: ScalarEvolution.h:136
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
llvm::RuntimeCheckingPtrGroup
A grouping of pointers.
Definition: LoopAccessAnalysis.h:328
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:514
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:38
llvm::SCEVWrapPredicate::IncrementNUSW
@ IncrementNUSW
Definition: ScalarEvolution.h:347
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
visitPointers
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
Definition: LoopAccessAnalysis.cpp:669
llvm::EquivalenceClasses::iterator
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
Definition: EquivalenceClasses.h:163
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:13816
llvm::MemoryDepChecker::isSafeForVectorization
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
Definition: LoopAccessAnalysis.h:193
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:852
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::VectorizerParams::RuntimeMemoryCheckThreshold
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
Definition: LoopAccessAnalysis.h:49
iterator_range.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LAA_NAME
#define LAA_NAME
Definition: LoopAccessAnalysis.cpp:2344
llvm::ScalarEvolution::getNoopOrSignExtend
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4558
llvm::RuntimeCheckingPtrGroup::High
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
Definition: LoopAccessAnalysis.h:350
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::RuntimeCheckingPtrGroup::Low
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
Definition: LoopAccessAnalysis.h:353
Status
Definition: SIModeRegister.cpp:28
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:213
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:143
DataLayout.h
EnableForwardingConflictDetection
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:172
llvm::MemoryDepChecker::MemAccessInfo
PointerIntPair< Value *, 1, bool > MemAccessInfo
Definition: LoopAccessAnalysis.h:88
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:450
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1746
llvm::cl::Sink
@ Sink
Definition: CommandLine.h:171
llvm::ScalarEvolution::getStoreSizeOfExpr
const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
Definition: ScalarEvolution.cpp:4148
isNoWrap
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
Definition: LoopAccessAnalysis.cpp:655
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
MemoryCheckMergeThreshold
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
llvm::RuntimeCheckingPtrGroup::addPointer
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
Definition: LoopAccessAnalysis.cpp:283
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopAccessAnalysis.cpp:70
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ScalarEvolution::getUMinExpr
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Definition: ScalarEvolution.cpp:4115
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180
ValueHandle.h
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:132
llvm::RuntimePointerChecking::RuntimeCheckingPtrGroup
friend struct RuntimeCheckingPtrGroup
Definition: LoopAccessAnalysis.h:368
llvm::LoopAccessLegacyAnalysis::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LoopAccessAnalysis.cpp:2333
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:727
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:661
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12996
RuntimeMemoryCheckThreshold
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
llvm::ScalarEvolution::isKnownPositive
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Definition: ScalarEvolution.cpp:10147
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
llvm::MemoryDepChecker::Dependence::Unknown
@ Unknown
Definition: LoopAccessAnalysis.h:113
llvm::createLAAPass
Pass * createLAAPass()
Definition: LoopAccessAnalysis.cpp:2362
llvm::RuntimePointerChecking::Pointers
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
Definition: LoopAccessAnalysis.h:450
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::RuntimePointerChecking::arePointersInSamePartition
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition: LoopAccessAnalysis.cpp:443
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis
LoopAccessLegacyAnalysis()
Definition: LoopAccessAnalysis.cpp:2298
std
Definition: BitVector.h:850
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4471
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::LocationSize::beforeOrAfterPointer
constexpr static LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Definition: MemoryLocation.h:129
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
VectorizationFactor
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
llvm::EquivalenceClasses::findValue
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Definition: EquivalenceClasses.h:182
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:976
llvm::RuntimePointerChecking::printChecks
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition: LoopAccessAnalysis.cpp:469
llvm::TypeSize
Definition: TypeSize.h:416
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopAccessInfo::LoopAccessInfo
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI)
Definition: LoopAccessAnalysis.cpp:2242
llvm::MemoryDepChecker::Dependence::DepName
static const char * DepName[]
String version of the types.
Definition: LoopAccessAnalysis.h:137
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::RuntimePointerChecking::PointerInfo::IsWritePtr
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Definition: LoopAccessAnalysis.h:381
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4197
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
EquivalenceClasses.h
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:579
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:13844
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
llvm::AAMDNodes::TBAA
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:668
hasComputableBounds
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
Definition: LoopAccessAnalysis.cpp:634
llvm::MemoryDepChecker::Dependence::Forward
@ Forward
Definition: LoopAccessAnalysis.h:123
areStridedAccessesIndependent
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
Definition: LoopAccessAnalysis.cpp:1481
llvm::APInt::abs
APInt abs() const
Get the absolute value.
Definition: APInt.h:1682
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1582
Instructions.h
llvm::PointerIntPair< Value *, 1, bool >
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
llvm::raw_ostream::indent
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Definition: raw_ostream.cpp:496
Dominators.h
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:369
N
#define N
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1335
llvm::LoopAccessInfo::print
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
Definition: LoopAccessAnalysis.cpp:2255
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl< int >
llvm::MemoryDepChecker::Dependence::isSafeForVectorization
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Definition: LoopAccessAnalysis.cpp:1307
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:236
llvm::AnalysisUsage::addRequiredTransitive
AnalysisUsage & addRequiredTransitive()
Definition: PassAnalysisSupport.h:81
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
DerivedTypes.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:381
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5319
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2440
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
AliasSetTracker.h
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
llvm::M68kBeads::DA
@ DA
Definition: M68kBaseInfo.h:59
raw_ostream.h
llvm::SI::KernelInputOffsets::Offsets
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1260
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1985
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2202
llvm::EquivalenceClasses::member_iterator
Definition: EquivalenceClasses.h:270
Value.h
llvm::RuntimePointerChecking::print
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
Definition: LoopAccessAnalysis.cpp:488
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::RuntimeCheckingPtrGroup::Members
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
Definition: LoopAccessAnalysis.h:355
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::MemoryDepChecker::Dependence::ForwardButPreventsForwarding
@ ForwardButPreventsForwarding
Definition: LoopAccessAnalysis.h:126
llvm::RuntimePointerChecking::generateChecks
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
Definition: LoopAccessAnalysis.cpp:253
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::LoopAccessLegacyAnalysis::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: LoopAccessAnalysis.cpp:2322
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1030
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:915
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::MemoryDepChecker::VectorizationSafetyStatus::PossiblySafeWithRtChecks
@ PossiblySafeWithRtChecks