LLVM  16.0.0git
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The implementation for the loop memory dependence that was originally
10 // developed for the loop vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/DiagnosticInfo.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/Operator.h"
50 #include "llvm/IR/PassManager.h"
51 #include "llvm/IR/PatternMatch.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/ValueHandle.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <iterator>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 using namespace llvm::PatternMatch;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
75 VectorizationFactor("force-vector-width", cl::Hidden,
76  cl::desc("Sets the SIMD width. Zero is autoselect."),
79 
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82  cl::desc("Sets the vectorization interleave count. "
83  "Zero is autoselect."),
87 
89  "runtime-memory-check-threshold", cl::Hidden,
90  cl::desc("When performing memory disambiguation checks at runtime do not "
91  "generate more than this number of comparisons (default = 8)."),
94 
95 /// The maximum iterations used to merge memory checks
97  "memory-check-merge-threshold", cl::Hidden,
98  cl::desc("Maximum number of comparisons done when trying to merge "
99  "runtime memory checks. (default = 100)"),
100  cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107  MaxDependences("max-dependences", cl::Hidden,
108  cl::desc("Maximum number of dependences collected by "
109  "loop-access analysis (default = 100)"),
110  cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 /// for (i = 0; i < N; ++i)
115 /// A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 /// if (Stride1 == 1 && Stride2 == 1) {
119 /// for (i = 0; i < N; i+=4)
120 /// A[i:i+3] += ...
121 /// } else
122 /// ...
124  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125  cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
130  "store-to-load-forwarding-conflict-detection", cl::Hidden,
131  cl::desc("Enable conflict detection in loop-access analysis"),
132  cl::init(true));
133 
135  "max-forked-scev-depth", cl::Hidden,
136  cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
137  cl::init(5));
138 
140  return ::VectorizationInterleave.getNumOccurrences() > 0;
141 }
142 
144  if (auto *CI = dyn_cast<CastInst>(V))
145  if (CI->getOperand(0)->getType()->isIntegerTy())
146  return CI->getOperand(0);
147  return V;
148 }
149 
151  const ValueToValueMap &PtrToStride,
152  Value *Ptr) {
153  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
154 
155  // If there is an entry in the map return the SCEV of the pointer with the
156  // symbolic stride replaced by one.
158  if (SI == PtrToStride.end())
159  // For a non-symbolic stride, just return the original expression.
160  return OrigSCEV;
161 
162  Value *StrideVal = stripIntegerCast(SI->second);
163 
164  ScalarEvolution *SE = PSE.getSE();
165  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
166  const auto *CT =
167  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
168 
169  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
170  auto *Expr = PSE.getSCEV(Ptr);
171 
172  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
173  << " by: " << *Expr << "\n");
174  return Expr;
175 }
176 
178  unsigned Index, RuntimePointerChecking &RtCheck)
179  : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
180  AddressSpace(RtCheck.Pointers[Index]
181  .PointerValue->getType()
182  ->getPointerAddressSpace()),
183  NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
184  Members.push_back(Index);
185 }
186 
187 /// Calculate Start and End points of memory access.
188 /// Let's assume A is the first access and B is a memory access on N-th loop
189 /// iteration. Then B is calculated as:
190 /// B = A + Step*N .
191 /// Step value may be positive or negative.
192 /// N is a calculated back-edge taken count:
193 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
194 /// Start and End points are calculated in the following way:
195 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
196 /// where SizeOfElt is the size of single memory access in bytes.
197 ///
198 /// There is no conflict when the intervals are disjoint:
199 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
200 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
201  Type *AccessTy, bool WritePtr,
202  unsigned DepSetId, unsigned ASId,
204  bool NeedsFreeze) {
205  ScalarEvolution *SE = PSE.getSE();
206 
207  const SCEV *ScStart;
208  const SCEV *ScEnd;
209 
210  if (SE->isLoopInvariant(PtrExpr, Lp)) {
211  ScStart = ScEnd = PtrExpr;
212  } else {
213  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
214  assert(AR && "Invalid addrec expression");
215  const SCEV *Ex = PSE.getBackedgeTakenCount();
216 
217  ScStart = AR->getStart();
218  ScEnd = AR->evaluateAtIteration(Ex, *SE);
219  const SCEV *Step = AR->getStepRecurrence(*SE);
220 
221  // For expressions with negative step, the upper bound is ScStart and the
222  // lower bound is ScEnd.
223  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
224  if (CStep->getValue()->isNegative())
225  std::swap(ScStart, ScEnd);
226  } else {
227  // Fallback case: the step is not constant, but we can still
228  // get the upper and lower bounds of the interval by using min/max
229  // expressions.
230  ScStart = SE->getUMinExpr(ScStart, ScEnd);
231  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
232  }
233  }
234  // Add the size of the pointed element to ScEnd.
235  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
236  Type *IdxTy = DL.getIndexType(Ptr->getType());
237  const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
238  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
239 
240  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
241  NeedsFreeze);
242 }
243 
244 void RuntimePointerChecking::tryToCreateDiffCheck(
245  const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
246  if (!CanUseDiffCheck)
247  return;
248 
249  // If either group contains multiple different pointers, bail out.
250  // TODO: Support multiple pointers by using the minimum or maximum pointer,
251  // depending on src & sink.
252  if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) {
253  CanUseDiffCheck = false;
254  return;
255  }
256 
257  PointerInfo *Src = &Pointers[CGI.Members[0]];
258  PointerInfo *Sink = &Pointers[CGJ.Members[0]];
259 
260  // If either pointer is read and written, multiple checks may be needed. Bail
261  // out.
262  if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
263  !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) {
264  CanUseDiffCheck = false;
265  return;
266  }
267 
268  ArrayRef<unsigned> AccSrc =
269  DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
270  ArrayRef<unsigned> AccSink =
271  DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
272  // If either pointer is accessed multiple times, there may not be a clear
273  // src/sink relation. Bail out for now.
274  if (AccSrc.size() != 1 || AccSink.size() != 1) {
275  CanUseDiffCheck = false;
276  return;
277  }
278  // If the sink is accessed before src, swap src/sink.
279  if (AccSink[0] < AccSrc[0])
280  std::swap(Src, Sink);
281 
282  auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
283  auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr);
284  if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() ||
285  SinkAR->getLoop() != DC.getInnermostLoop()) {
286  CanUseDiffCheck = false;
287  return;
288  }
289 
291  DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
293  DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
294  Type *SrcTy = getLoadStoreType(SrcInsts[0]);
295  Type *DstTy = getLoadStoreType(SinkInsts[0]);
296  if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) {
297  CanUseDiffCheck = false;
298  return;
299  }
300  const DataLayout &DL =
301  SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
302  unsigned AllocSize =
303  std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
304 
305  // Only matching constant steps matching the AllocSize are supported at the
306  // moment. This simplifies the difference computation. Can be extended in the
307  // future.
308  auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE));
309  if (!Step || Step != SrcAR->getStepRecurrence(*SE) ||
310  Step->getAPInt().abs() != AllocSize) {
311  CanUseDiffCheck = false;
312  return;
313  }
314 
315  IntegerType *IntTy =
316  IntegerType::get(Src->PointerValue->getContext(),
317  DL.getPointerSizeInBits(CGI.AddressSpace));
318 
319  // When counting down, the dependence distance needs to be swapped.
320  if (Step->getValue()->isNegative())
321  std::swap(SinkAR, SrcAR);
322 
323  const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy);
324  const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy);
325  if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
326  isa<SCEVCouldNotCompute>(SrcStartInt)) {
327  CanUseDiffCheck = false;
328  return;
329  }
330  DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
331  Src->NeedsFreeze || Sink->NeedsFreeze);
332 }
333 
334 SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
336 
337  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
338  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
340  const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
341 
342  if (needsChecking(CGI, CGJ)) {
343  tryToCreateDiffCheck(CGI, CGJ);
344  Checks.push_back(std::make_pair(&CGI, &CGJ));
345  }
346  }
347  }
348  return Checks;
349 }
350 
351 void RuntimePointerChecking::generateChecks(
352  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
353  assert(Checks.empty() && "Checks is not empty");
354  groupChecks(DepCands, UseDependencies);
355  Checks = generateChecks();
356 }
357 
359  const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
360  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
361  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
362  if (needsChecking(M.Members[I], N.Members[J]))
363  return true;
364  return false;
365 }
366 
367 /// Compare \p I and \p J and return the minimum.
368 /// Return nullptr in case we couldn't find an answer.
369 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
370  ScalarEvolution *SE) {
371  const SCEV *Diff = SE->getMinusSCEV(J, I);
372  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
373 
374  if (!C)
375  return nullptr;
376  if (C->getValue()->isNegative())
377  return J;
378  return I;
379 }
380 
382  RuntimePointerChecking &RtCheck) {
383  return addPointer(
384  Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
385  RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
386  RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
387 }
388 
389 bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
390  const SCEV *End, unsigned AS,
391  bool NeedsFreeze,
392  ScalarEvolution &SE) {
393  assert(AddressSpace == AS &&
394  "all pointers in a checking group must be in the same address space");
395 
396  // Compare the starts and ends with the known minimum and maximum
397  // of this set. We need to know how we compare against the min/max
398  // of the set in order to be able to emit memchecks.
399  const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
400  if (!Min0)
401  return false;
402 
403  const SCEV *Min1 = getMinFromExprs(End, High, &SE);
404  if (!Min1)
405  return false;
406 
407  // Update the low bound expression if we've found a new min value.
408  if (Min0 == Start)
409  Low = Start;
410 
411  // Update the high bound expression if we've found a new max value.
412  if (Min1 != End)
413  High = End;
414 
415  Members.push_back(Index);
416  this->NeedsFreeze |= NeedsFreeze;
417  return true;
418 }
419 
420 void RuntimePointerChecking::groupChecks(
421  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
422  // We build the groups from dependency candidates equivalence classes
423  // because:
424  // - We know that pointers in the same equivalence class share
425  // the same underlying object and therefore there is a chance
426  // that we can compare pointers
427  // - We wouldn't be able to merge two pointers for which we need
428  // to emit a memcheck. The classes in DepCands are already
429  // conveniently built such that no two pointers in the same
430  // class need checking against each other.
431 
432  // We use the following (greedy) algorithm to construct the groups
433  // For every pointer in the equivalence class:
434  // For each existing group:
435  // - if the difference between this pointer and the min/max bounds
436  // of the group is a constant, then make the pointer part of the
437  // group and update the min/max bounds of that group as required.
438 
439  CheckingGroups.clear();
440 
441  // If we need to check two pointers to the same underlying object
442  // with a non-constant difference, we shouldn't perform any pointer
443  // grouping with those pointers. This is because we can easily get
444  // into cases where the resulting check would return false, even when
445  // the accesses are safe.
446  //
447  // The following example shows this:
448  // for (i = 0; i < 1000; ++i)
449  // a[5000 + i * m] = a[i] + a[i + 9000]
450  //
451  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
452  // (0, 10000) which is always false. However, if m is 1, there is no
453  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
454  // us to perform an accurate check in this case.
455  //
456  // The above case requires that we have an UnknownDependence between
457  // accesses to the same underlying object. This cannot happen unless
458  // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
459  // is also false. In this case we will use the fallback path and create
460  // separate checking groups for all pointers.
461 
462  // If we don't have the dependency partitions, construct a new
463  // checking pointer group for each pointer. This is also required
464  // for correctness, because in this case we can have checking between
465  // pointers to the same underlying object.
466  if (!UseDependencies) {
467  for (unsigned I = 0; I < Pointers.size(); ++I)
468  CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
469  return;
470  }
471 
472  unsigned TotalComparisons = 0;
473 
475  for (unsigned Index = 0; Index < Pointers.size(); ++Index) {
476  auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}});
477  Iter.first->second.push_back(Index);
478  }
479 
480  // We need to keep track of what pointers we've already seen so we
481  // don't process them twice.
483 
484  // Go through all equivalence classes, get the "pointer check groups"
485  // and add them to the overall solution. We use the order in which accesses
486  // appear in 'Pointers' to enforce determinism.
487  for (unsigned I = 0; I < Pointers.size(); ++I) {
488  // We've seen this pointer before, and therefore already processed
489  // its equivalence class.
490  if (Seen.count(I))
491  continue;
492 
493  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
494  Pointers[I].IsWritePtr);
495 
497  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
498 
499  // Because DepCands is constructed by visiting accesses in the order in
500  // which they appear in alias sets (which is deterministic) and the
501  // iteration order within an equivalence class member is only dependent on
502  // the order in which unions and insertions are performed on the
503  // equivalence class, the iteration order is deterministic.
504  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
505  MI != ME; ++MI) {
506  auto PointerI = PositionMap.find(MI->getPointer());
507  assert(PointerI != PositionMap.end() &&
508  "pointer in equivalence class not found in PositionMap");
509  for (unsigned Pointer : PointerI->second) {
510  bool Merged = false;
511  // Mark this pointer as seen.
512  Seen.insert(Pointer);
513 
514  // Go through all the existing sets and see if we can find one
515  // which can include this pointer.
516  for (RuntimeCheckingPtrGroup &Group : Groups) {
517  // Don't perform more than a certain amount of comparisons.
518  // This should limit the cost of grouping the pointers to something
519  // reasonable. If we do end up hitting this threshold, the algorithm
520  // will create separate groups for all remaining pointers.
521  if (TotalComparisons > MemoryCheckMergeThreshold)
522  break;
523 
524  TotalComparisons++;
525 
526  if (Group.addPointer(Pointer, *this)) {
527  Merged = true;
528  break;
529  }
530  }
531 
532  if (!Merged)
533  // We couldn't add this pointer to any existing set or the threshold
534  // for the number of comparisons has been reached. Create a new group
535  // to hold the current pointer.
536  Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
537  }
538  }
539 
540  // We've computed the grouped checks for this partition.
541  // Save the results and continue with the next one.
542  llvm::copy(Groups, std::back_inserter(CheckingGroups));
543  }
544 }
545 
547  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
548  unsigned PtrIdx2) {
549  return (PtrToPartition[PtrIdx1] != -1 &&
550  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
551 }
552 
553 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
554  const PointerInfo &PointerI = Pointers[I];
555  const PointerInfo &PointerJ = Pointers[J];
556 
557  // No need to check if two readonly pointers intersect.
558  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
559  return false;
560 
561  // Only need to check pointers between two different dependency sets.
562  if (PointerI.DependencySetId == PointerJ.DependencySetId)
563  return false;
564 
565  // Only need to check pointers in the same alias set.
566  if (PointerI.AliasSetId != PointerJ.AliasSetId)
567  return false;
568 
569  return true;
570 }
571 
574  unsigned Depth) const {
575  unsigned N = 0;
576  for (const auto &Check : Checks) {
577  const auto &First = Check.first->Members, &Second = Check.second->Members;
578 
579  OS.indent(Depth) << "Check " << N++ << ":\n";
580 
581  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
582  for (unsigned K = 0; K < First.size(); ++K)
583  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
584 
585  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
586  for (unsigned K = 0; K < Second.size(); ++K)
587  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
588  }
589 }
590 
592 
593  OS.indent(Depth) << "Run-time memory checks:\n";
594  printChecks(OS, Checks, Depth);
595 
596  OS.indent(Depth) << "Grouped accesses:\n";
597  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
598  const auto &CG = CheckingGroups[I];
599 
600  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
601  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
602  << ")\n";
603  for (unsigned J = 0; J < CG.Members.size(); ++J) {
604  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
605  << "\n";
606  }
607  }
608 }
609 
610 namespace {
611 
612 /// Analyses memory accesses in a loop.
613 ///
614 /// Checks whether run time pointer checks are needed and builds sets for data
615 /// dependence checking.
616 class AccessAnalysis {
617 public:
618  /// Read or write access location.
619  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
620  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
621 
622  AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
625  : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
626  // We're analyzing dependences across loop iterations.
627  BAA.enableCrossIterationMode();
628  }
629 
630  /// Register a load and whether it is only read from.
631  void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
632  Value *Ptr = const_cast<Value*>(Loc.Ptr);
634  Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
635  if (IsReadOnly)
636  ReadOnlyPtr.insert(Ptr);
637  }
638 
639  /// Register a store.
640  void addStore(MemoryLocation &Loc, Type *AccessTy) {
641  Value *Ptr = const_cast<Value*>(Loc.Ptr);
643  Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
644  }
645 
646  /// Check if we can emit a run-time no-alias check for \p Access.
647  ///
648  /// Returns true if we can emit a run-time no alias check for \p Access.
649  /// If we can check this access, this also adds it to a dependence set and
650  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
651  /// we will attempt to use additional run-time checks in order to get
652  /// the bounds of the pointer.
653  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
654  MemAccessInfo Access, Type *AccessTy,
655  const ValueToValueMap &Strides,
656  DenseMap<Value *, unsigned> &DepSetId,
657  Loop *TheLoop, unsigned &RunningDepId,
658  unsigned ASId, bool ShouldCheckStride, bool Assume);
659 
660  /// Check whether we can check the pointers at runtime for
661  /// non-intersection.
662  ///
663  /// Returns true if we need no check or if we do and we can generate them
664  /// (i.e. the pointers have computable bounds).
665  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
666  Loop *TheLoop, const ValueToValueMap &Strides,
667  Value *&UncomputablePtr, bool ShouldCheckWrap = false);
668 
669  /// Goes over all memory accesses, checks whether a RT check is needed
670  /// and builds sets of dependent accesses.
671  void buildDependenceSets() {
672  processMemAccesses();
673  }
674 
675  /// Initial processing of memory accesses determined that we need to
676  /// perform dependency checking.
677  ///
678  /// Note that this can later be cleared if we retry memcheck analysis without
679  /// dependency checking (i.e. FoundNonConstantDistanceDependence).
680  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
681 
682  /// We decided that no dependence analysis would be used. Reset the state.
683  void resetDepChecks(MemoryDepChecker &DepChecker) {
684  CheckDeps.clear();
685  DepChecker.clearDependences();
686  }
687 
688  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
689 
690 private:
692 
693  /// Go over all memory access and check whether runtime pointer checks
694  /// are needed and build sets of dependency check candidates.
695  void processMemAccesses();
696 
697  /// Map of all accesses. Values are the types used to access memory pointed to
698  /// by the pointer.
699  PtrAccessMap Accesses;
700 
701  /// The loop being checked.
702  const Loop *TheLoop;
703 
704  /// List of accesses that need a further dependence check.
705  MemAccessInfoList CheckDeps;
706 
707  /// Set of pointers that are read only.
708  SmallPtrSet<Value*, 16> ReadOnlyPtr;
709 
710  /// Batched alias analysis results.
711  BatchAAResults BAA;
712 
713  /// An alias set tracker to partition the access set by underlying object and
714  //intrinsic property (such as TBAA metadata).
715  AliasSetTracker AST;
716 
717  LoopInfo *LI;
718 
719  /// Sets of potentially dependent accesses - members of one set share an
720  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
721  /// dependence check.
723 
724  /// Initial processing of memory accesses determined that we may need
725  /// to add memchecks. Perform the analysis to determine the necessary checks.
726  ///
727  /// Note that, this is different from isDependencyCheckNeeded. When we retry
728  /// memcheck analysis without dependency checking
729  /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
730  /// cleared while this remains set if we have potentially dependent accesses.
731  bool IsRTCheckAnalysisNeeded = false;
732 
733  /// The SCEV predicate containing all the SCEV-related assumptions.
735 };
736 
737 } // end anonymous namespace
738 
739 /// Check whether a pointer can participate in a runtime bounds check.
740 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
741 /// by adding run-time checks (overflow checks) if necessary.
743  const SCEV *PtrScev, Loop *L, bool Assume) {
744  // The bounds for loop-invariant pointer is trivial.
745  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
746  return true;
747 
748  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
749 
750  if (!AR && Assume)
751  AR = PSE.getAsAddRec(Ptr);
752 
753  if (!AR)
754  return false;
755 
756  return AR->isAffine();
757 }
758 
759 /// Check whether a pointer address cannot wrap.
761  const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
762  Loop *L) {
763  const SCEV *PtrScev = PSE.getSCEV(Ptr);
764  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
765  return true;
766 
767  int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides).value_or(0);
768  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
769  return true;
770 
771  return false;
772 }
773 
774 static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
775  function_ref<void(Value *)> AddPointer) {
776  SmallPtrSet<Value *, 8> Visited;
777  SmallVector<Value *> WorkList;
778  WorkList.push_back(StartPtr);
779 
780  while (!WorkList.empty()) {
781  Value *Ptr = WorkList.pop_back_val();
782  if (!Visited.insert(Ptr).second)
783  continue;
784  auto *PN = dyn_cast<PHINode>(Ptr);
785  // SCEV does not look through non-header PHIs inside the loop. Such phis
786  // can be analyzed by adding separate accesses for each incoming pointer
787  // value.
788  if (PN && InnermostLoop.contains(PN->getParent()) &&
789  PN->getParent() != InnermostLoop.getHeader()) {
790  for (const Use &Inc : PN->incoming_values())
791  WorkList.push_back(Inc);
792  } else
793  AddPointer(Ptr);
794  }
795 }
796 
797 // Walk back through the IR for a pointer, looking for a select like the
798 // following:
799 //
800 // %offset = select i1 %cmp, i64 %a, i64 %b
801 // %addr = getelementptr double, double* %base, i64 %offset
802 // %ld = load double, double* %addr, align 8
803 //
804 // We won't be able to form a single SCEVAddRecExpr from this since the
805 // address for each loop iteration depends on %cmp. We could potentially
806 // produce multiple valid SCEVAddRecExprs, though, and check all of them for
807 // memory safety/aliasing if needed.
808 //
809 // If we encounter some IR we don't yet handle, or something obviously fine
810 // like a constant, then we just add the SCEV for that term to the list passed
811 // in by the caller. If we have a node that may potentially yield a valid
812 // SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
813 // ourselves before adding to the list.
814 static void findForkedSCEVs(
815  ScalarEvolution *SE, const Loop *L, Value *Ptr,
817  unsigned Depth) {
818  // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
819  // we've exceeded our limit on recursion, just return whatever we have
820  // regardless of whether it can be used for a forked pointer or not, along
821  // with an indication of whether it might be a poison or undef value.
822  const SCEV *Scev = SE->getSCEV(Ptr);
823  if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
824  !isa<Instruction>(Ptr) || Depth == 0) {
825  ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
826  return;
827  }
828 
829  Depth--;
830 
831  auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) {
832  return get<1>(S);
833  };
834 
835  auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
836  switch (Opcode) {
837  case Instruction::Add:
838  return SE->getAddExpr(L, R);
839  case Instruction::Sub:
840  return SE->getMinusSCEV(L, R);
841  default:
842  llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
843  }
844  };
845 
846  Instruction *I = cast<Instruction>(Ptr);
847  unsigned Opcode = I->getOpcode();
848  switch (Opcode) {
849  case Instruction::GetElementPtr: {
850  GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
851  Type *SourceTy = GEP->getSourceElementType();
852  // We only handle base + single offset GEPs here for now.
853  // Not dealing with preexisting gathers yet, so no vectors.
854  if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
855  ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP));
856  break;
857  }
860  findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
861  findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
862 
863  // See if we need to freeze our fork...
864  bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
865  any_of(OffsetScevs, UndefPoisonCheck);
866 
867  // Check that we only have a single fork, on either the base or the offset.
868  // Copy the SCEV across for the one without a fork in order to generate
869  // the full SCEV for both sides of the GEP.
870  if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
871  BaseScevs.push_back(BaseScevs[0]);
872  else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
873  OffsetScevs.push_back(OffsetScevs[0]);
874  else {
875  ScevList.emplace_back(Scev, NeedsFreeze);
876  break;
877  }
878 
879  // Find the pointer type we need to extend to.
880  Type *IntPtrTy = SE->getEffectiveSCEVType(
881  SE->getSCEV(GEP->getPointerOperand())->getType());
882 
883  // Find the size of the type being pointed to. We only have a single
884  // index term (guarded above) so we don't need to index into arrays or
885  // structures, just get the size of the scalar value.
886  const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
887 
888  // Scale up the offsets by the size of the type, then add to the bases.
889  const SCEV *Scaled1 = SE->getMulExpr(
890  Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[0]), IntPtrTy));
891  const SCEV *Scaled2 = SE->getMulExpr(
892  Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[1]), IntPtrTy));
893  ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[0]), Scaled1),
894  NeedsFreeze);
895  ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[1]), Scaled2),
896  NeedsFreeze);
897  break;
898  }
899  case Instruction::Select: {
901  // A select means we've found a forked pointer, but we currently only
902  // support a single select per pointer so if there's another behind this
903  // then we just bail out and return the generic SCEV.
904  findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
905  findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
906  if (ChildScevs.size() == 2) {
907  ScevList.push_back(ChildScevs[0]);
908  ScevList.push_back(ChildScevs[1]);
909  } else
910  ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
911  break;
912  }
913  case Instruction::Add:
914  case Instruction::Sub: {
917  findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
918  findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
919 
920  // See if we need to freeze our fork...
921  bool NeedsFreeze =
922  any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
923 
924  // Check that we only have a single fork, on either the left or right side.
925  // Copy the SCEV across for the one without a fork in order to generate
926  // the full SCEV for both sides of the BinOp.
927  if (LScevs.size() == 2 && RScevs.size() == 1)
928  RScevs.push_back(RScevs[0]);
929  else if (RScevs.size() == 2 && LScevs.size() == 1)
930  LScevs.push_back(LScevs[0]);
931  else {
932  ScevList.emplace_back(Scev, NeedsFreeze);
933  break;
934  }
935 
936  ScevList.emplace_back(
937  GetBinOpExpr(Opcode, get<0>(LScevs[0]), get<0>(RScevs[0])),
938  NeedsFreeze);
939  ScevList.emplace_back(
940  GetBinOpExpr(Opcode, get<0>(LScevs[1]), get<0>(RScevs[1])),
941  NeedsFreeze);
942  break;
943  }
944  default:
945  // Just return the current SCEV if we haven't handled the instruction yet.
946  LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
947  ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
948  break;
949  }
950 }
951 
954  const ValueToValueMap &StridesMap, Value *Ptr,
955  const Loop *L) {
956  ScalarEvolution *SE = PSE.getSE();
957  assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
959  findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
960 
961  // For now, we will only accept a forked pointer with two possible SCEVs
962  // that are either SCEVAddRecExprs or loop invariant.
963  if (Scevs.size() == 2 &&
964  (isa<SCEVAddRecExpr>(get<0>(Scevs[0])) ||
965  SE->isLoopInvariant(get<0>(Scevs[0]), L)) &&
966  (isa<SCEVAddRecExpr>(get<0>(Scevs[1])) ||
967  SE->isLoopInvariant(get<0>(Scevs[1]), L))) {
968  LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n");
969  LLVM_DEBUG(dbgs() << "\t(1) " << *get<0>(Scevs[0]) << "\n");
970  LLVM_DEBUG(dbgs() << "\t(2) " << *get<0>(Scevs[1]) << "\n");
971  return Scevs;
972  }
973 
974  return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
975 }
976 
977 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
978  MemAccessInfo Access, Type *AccessTy,
979  const ValueToValueMap &StridesMap,
980  DenseMap<Value *, unsigned> &DepSetId,
981  Loop *TheLoop, unsigned &RunningDepId,
982  unsigned ASId, bool ShouldCheckWrap,
983  bool Assume) {
984  Value *Ptr = Access.getPointer();
985 
987  findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
988 
989  for (auto &P : TranslatedPtrs) {
990  const SCEV *PtrExpr = get<0>(P);
991  if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
992  return false;
993 
994  // When we run after a failing dependency check we have to make sure
995  // we don't have wrapping pointers.
996  if (ShouldCheckWrap) {
997  // Skip wrap checking when translating pointers.
998  if (TranslatedPtrs.size() > 1)
999  return false;
1000 
1001  if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
1002  auto *Expr = PSE.getSCEV(Ptr);
1003  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
1004  return false;
1006  }
1007  }
1008  // If there's only one option for Ptr, look it up after bounds and wrap
1009  // checking, because assumptions might have been added to PSE.
1010  if (TranslatedPtrs.size() == 1)
1011  TranslatedPtrs[0] = {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr),
1012  false};
1013  }
1014 
1015  for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) {
1016  // The id of the dependence set.
1017  unsigned DepId;
1018 
1019  if (isDependencyCheckNeeded()) {
1020  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
1021  unsigned &LeaderId = DepSetId[Leader];
1022  if (!LeaderId)
1023  LeaderId = RunningDepId++;
1024  DepId = LeaderId;
1025  } else
1026  // Each access has its own dependence set.
1027  DepId = RunningDepId++;
1028 
1029  bool IsWrite = Access.getInt();
1030  RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1031  NeedsFreeze);
1032  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1033  }
1034 
1035  return true;
1036 }
1037 
1038 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
1039  ScalarEvolution *SE, Loop *TheLoop,
1040  const ValueToValueMap &StridesMap,
1041  Value *&UncomputablePtr, bool ShouldCheckWrap) {
1042  // Find pointers with computable bounds. We are going to use this information
1043  // to place a runtime bound check.
1044  bool CanDoRT = true;
1045 
1046  bool MayNeedRTCheck = false;
1047  if (!IsRTCheckAnalysisNeeded) return true;
1048 
1049  bool IsDepCheckNeeded = isDependencyCheckNeeded();
1050 
1051  // We assign a consecutive id to access from different alias sets.
1052  // Accesses between different groups doesn't need to be checked.
1053  unsigned ASId = 0;
1054  for (auto &AS : AST) {
1055  int NumReadPtrChecks = 0;
1056  int NumWritePtrChecks = 0;
1057  bool CanDoAliasSetRT = true;
1058  ++ASId;
1059 
1060  // We assign consecutive id to access from different dependence sets.
1061  // Accesses within the same set don't need a runtime check.
1062  unsigned RunningDepId = 1;
1063  DenseMap<Value *, unsigned> DepSetId;
1064 
1066 
1067  // First, count how many write and read accesses are in the alias set. Also
1068  // collect MemAccessInfos for later.
1069  SmallVector<MemAccessInfo, 4> AccessInfos;
1070  for (const auto &A : AS) {
1071  Value *Ptr = A.getValue();
1072  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
1073 
1074  if (IsWrite)
1075  ++NumWritePtrChecks;
1076  else
1077  ++NumReadPtrChecks;
1078  AccessInfos.emplace_back(Ptr, IsWrite);
1079  }
1080 
1081  // We do not need runtime checks for this alias set, if there are no writes
1082  // or a single write and no reads.
1083  if (NumWritePtrChecks == 0 ||
1084  (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1085  assert((AS.size() <= 1 ||
1086  all_of(AS,
1087  [this](auto AC) {
1088  MemAccessInfo AccessWrite(AC.getValue(), true);
1089  return DepCands.findValue(AccessWrite) == DepCands.end();
1090  })) &&
1091  "Can only skip updating CanDoRT below, if all entries in AS "
1092  "are reads or there is at most 1 entry");
1093  continue;
1094  }
1095 
1096  for (auto &Access : AccessInfos) {
1097  for (const auto &AccessTy : Accesses[Access]) {
1098  if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1099  DepSetId, TheLoop, RunningDepId, ASId,
1100  ShouldCheckWrap, false)) {
1101  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1102  << *Access.getPointer() << '\n');
1103  Retries.push_back({Access, AccessTy});
1104  CanDoAliasSetRT = false;
1105  }
1106  }
1107  }
1108 
1109  // Note that this function computes CanDoRT and MayNeedRTCheck
1110  // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
1111  // we have a pointer for which we couldn't find the bounds but we don't
1112  // actually need to emit any checks so it does not matter.
1113  //
1114  // We need runtime checks for this alias set, if there are at least 2
1115  // dependence sets (in which case RunningDepId > 2) or if we need to re-try
1116  // any bound checks (because in that case the number of dependence sets is
1117  // incomplete).
1118  bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1119 
1120  // We need to perform run-time alias checks, but some pointers had bounds
1121  // that couldn't be checked.
1122  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1123  // Reset the CanDoSetRt flag and retry all accesses that have failed.
1124  // We know that we need these checks, so we can now be more aggressive
1125  // and add further checks if required (overflow checks).
1126  CanDoAliasSetRT = true;
1127  for (auto Retry : Retries) {
1128  MemAccessInfo Access = Retry.first;
1129  Type *AccessTy = Retry.second;
1130  if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1131  DepSetId, TheLoop, RunningDepId, ASId,
1132  ShouldCheckWrap, /*Assume=*/true)) {
1133  CanDoAliasSetRT = false;
1134  UncomputablePtr = Access.getPointer();
1135  break;
1136  }
1137  }
1138  }
1139 
1140  CanDoRT &= CanDoAliasSetRT;
1141  MayNeedRTCheck |= NeedsAliasSetRTCheck;
1142  ++ASId;
1143  }
1144 
1145  // If the pointers that we would use for the bounds comparison have different
1146  // address spaces, assume the values aren't directly comparable, so we can't
1147  // use them for the runtime check. We also have to assume they could
1148  // overlap. In the future there should be metadata for whether address spaces
1149  // are disjoint.
1150  unsigned NumPointers = RtCheck.Pointers.size();
1151  for (unsigned i = 0; i < NumPointers; ++i) {
1152  for (unsigned j = i + 1; j < NumPointers; ++j) {
1153  // Only need to check pointers between two different dependency sets.
1154  if (RtCheck.Pointers[i].DependencySetId ==
1155  RtCheck.Pointers[j].DependencySetId)
1156  continue;
1157  // Only need to check pointers in the same alias set.
1158  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1159  continue;
1160 
1161  Value *PtrI = RtCheck.Pointers[i].PointerValue;
1162  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
1163 
1164  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
1165  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
1166  if (ASi != ASj) {
1167  LLVM_DEBUG(
1168  dbgs() << "LAA: Runtime check would require comparison between"
1169  " different address spaces\n");
1170  return false;
1171  }
1172  }
1173  }
1174 
1175  if (MayNeedRTCheck && CanDoRT)
1176  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
1177 
1178  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1179  << " pointer comparisons.\n");
1180 
1181  // If we can do run-time checks, but there are no checks, no runtime checks
1182  // are needed. This can happen when all pointers point to the same underlying
1183  // object for example.
1184  RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
1185 
1186  bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1187  if (!CanDoRTIfNeeded)
1188  RtCheck.reset();
1189  return CanDoRTIfNeeded;
1190 }
1191 
1192 void AccessAnalysis::processMemAccesses() {
1193  // We process the set twice: first we process read-write pointers, last we
1194  // process read-only pointers. This allows us to skip dependence tests for
1195  // read-only pointers.
1196 
1197  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1198  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
1199  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
1200  LLVM_DEBUG({
1201  for (auto A : Accesses)
1202  dbgs() << "\t" << *A.first.getPointer() << " ("
1203  << (A.first.getInt()
1204  ? "write"
1205  : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
1206  : "read"))
1207  << ")\n";
1208  });
1209 
1210  // The AliasSetTracker has nicely partitioned our pointers by metadata
1211  // compatibility and potential for underlying-object overlap. As a result, we
1212  // only need to check for potential pointer dependencies within each alias
1213  // set.
1214  for (const auto &AS : AST) {
1215  // Note that both the alias-set tracker and the alias sets themselves used
1216  // linked lists internally and so the iteration order here is deterministic
1217  // (matching the original instruction order within each set).
1218 
1219  bool SetHasWrite = false;
1220 
1221  // Map of pointers to last access encountered.
1222  typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
1223  UnderlyingObjToAccessMap ObjToLastAccess;
1224 
1225  // Set of access to check after all writes have been processed.
1226  PtrAccessMap DeferredAccesses;
1227 
1228  // Iterate over each alias set twice, once to process read/write pointers,
1229  // and then to process read-only pointers.
1230  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
1231  bool UseDeferred = SetIteration > 0;
1232  PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1233 
1234  for (const auto &AV : AS) {
1235  Value *Ptr = AV.getValue();
1236 
1237  // For a single memory access in AliasSetTracker, Accesses may contain
1238  // both read and write, and they both need to be handled for CheckDeps.
1239  for (const auto &AC : S) {
1240  if (AC.first.getPointer() != Ptr)
1241  continue;
1242 
1243  bool IsWrite = AC.first.getInt();
1244 
1245  // If we're using the deferred access set, then it contains only
1246  // reads.
1247  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
1248  if (UseDeferred && !IsReadOnlyPtr)
1249  continue;
1250  // Otherwise, the pointer must be in the PtrAccessSet, either as a
1251  // read or a write.
1252  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1253  S.count(MemAccessInfo(Ptr, false))) &&
1254  "Alias-set pointer not in the access set?");
1255 
1256  MemAccessInfo Access(Ptr, IsWrite);
1257  DepCands.insert(Access);
1258 
1259  // Memorize read-only pointers for later processing and skip them in
1260  // the first round (they need to be checked after we have seen all
1261  // write pointers). Note: we also mark pointer that are not
1262  // consecutive as "read-only" pointers (so that we check
1263  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
1264  if (!UseDeferred && IsReadOnlyPtr) {
1265  // We only use the pointer keys, the types vector values don't
1266  // matter.
1267  DeferredAccesses.insert({Access, {}});
1268  continue;
1269  }
1270 
1271  // If this is a write - check other reads and writes for conflicts. If
1272  // this is a read only check other writes for conflicts (but only if
1273  // there is no other write to the ptr - this is an optimization to
1274  // catch "a[i] = a[i] + " without having to do a dependence check).
1275  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
1276  CheckDeps.push_back(Access);
1277  IsRTCheckAnalysisNeeded = true;
1278  }
1279 
1280  if (IsWrite)
1281  SetHasWrite = true;
1282 
1283  // Create sets of pointers connected by a shared alias set and
1284  // underlying object.
1285  typedef SmallVector<const Value *, 16> ValueVector;
1286  ValueVector TempObjects;
1287 
1288  getUnderlyingObjects(Ptr, TempObjects, LI);
1289  LLVM_DEBUG(dbgs()
1290  << "Underlying objects for pointer " << *Ptr << "\n");
1291  for (const Value *UnderlyingObj : TempObjects) {
1292  // nullptr never alias, don't join sets for pointer that have "null"
1293  // in their UnderlyingObjects list.
1294  if (isa<ConstantPointerNull>(UnderlyingObj) &&
1296  TheLoop->getHeader()->getParent(),
1297  UnderlyingObj->getType()->getPointerAddressSpace()))
1298  continue;
1299 
1300  UnderlyingObjToAccessMap::iterator Prev =
1301  ObjToLastAccess.find(UnderlyingObj);
1302  if (Prev != ObjToLastAccess.end())
1303  DepCands.unionSets(Access, Prev->second);
1304 
1305  ObjToLastAccess[UnderlyingObj] = Access;
1306  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
1307  }
1308  }
1309  }
1310  }
1311  }
1312 }
1313 
1314 static bool isInBoundsGep(Value *Ptr) {
1315  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
1316  return GEP->isInBounds();
1317  return false;
1318 }
1319 
1320 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
1321 /// i.e. monotonically increasing/decreasing.
1322 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
1323  PredicatedScalarEvolution &PSE, const Loop *L) {
1324  // FIXME: This should probably only return true for NUW.
1326  return true;
1327 
1328  // Scalar evolution does not propagate the non-wrapping flags to values that
1329  // are derived from a non-wrapping induction variable because non-wrapping
1330  // could be flow-sensitive.
1331  //
1332  // Look through the potentially overflowing instruction to try to prove
1333  // non-wrapping for the *specific* value of Ptr.
1334 
1335  // The arithmetic implied by an inbounds GEP can't overflow.
1336  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1337  if (!GEP || !GEP->isInBounds())
1338  return false;
1339 
1340  // Make sure there is only one non-const index and analyze that.
1341  Value *NonConstIndex = nullptr;
1342  for (Value *Index : GEP->indices())
1343  if (!isa<ConstantInt>(Index)) {
1344  if (NonConstIndex)
1345  return false;
1346  NonConstIndex = Index;
1347  }
1348  if (!NonConstIndex)
1349  // The recurrence is on the pointer, ignore for now.
1350  return false;
1351 
1352  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
1353  // AddRec using a NSW operation.
1354  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1355  if (OBO->hasNoSignedWrap() &&
1356  // Assume constant for other the operand so that the AddRec can be
1357  // easily found.
1358  isa<ConstantInt>(OBO->getOperand(1))) {
1359  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
1360 
1361  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1362  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
1363  }
1364 
1365  return false;
1366 }
1367 
1368 /// Check whether the access through \p Ptr has a constant stride.
1371  Value *Ptr, const Loop *Lp,
1372  const ValueToValueMap &StridesMap, bool Assume,
1373  bool ShouldCheckWrap) {
1374  Type *Ty = Ptr->getType();
1375  assert(Ty->isPointerTy() && "Unexpected non-ptr");
1376 
1377  if (isa<ScalableVectorType>(AccessTy)) {
1378  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
1379  << "\n");
1380  return std::nullopt;
1381  }
1382 
1383  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1384 
1385  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1386  if (Assume && !AR)
1387  AR = PSE.getAsAddRec(Ptr);
1388 
1389  if (!AR) {
1390  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1391  << " SCEV: " << *PtrScev << "\n");
1392  return std::nullopt;
1393  }
1394 
1395  // The access function must stride over the innermost loop.
1396  if (Lp != AR->getLoop()) {
1397  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1398  << *Ptr << " SCEV: " << *AR << "\n");
1399  return std::nullopt;
1400  }
1401 
1402  // The address calculation must not wrap. Otherwise, a dependence could be
1403  // inverted.
1404  // An inbounds getelementptr that is a AddRec with a unit stride
1405  // cannot wrap per definition. The unit stride requirement is checked later.
1406  // An getelementptr without an inbounds attribute and unit stride would have
1407  // to access the pointer value "0" which is undefined behavior in address
1408  // space 0, therefore we can also vectorize this case.
1409  unsigned AddrSpace = Ty->getPointerAddressSpace();
1410  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1411  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1413  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1414  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1415  NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
1416  if (Assume) {
1418  IsNoWrapAddRec = true;
1419  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1420  << "LAA: Pointer: " << *Ptr << "\n"
1421  << "LAA: SCEV: " << *AR << "\n"
1422  << "LAA: Added an overflow assumption\n");
1423  } else {
1424  LLVM_DEBUG(
1425  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1426  << *Ptr << " SCEV: " << *AR << "\n");
1427  return std::nullopt;
1428  }
1429  }
1430 
1431  // Check the step is constant.
1432  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1433 
1434  // Calculate the pointer stride and check if it is constant.
1435  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1436  if (!C) {
1437  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1438  << " SCEV: " << *AR << "\n");
1439  return std::nullopt;
1440  }
1441 
1442  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1443  TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1444  int64_t Size = AllocSize.getFixedSize();
1445  const APInt &APStepVal = C->getAPInt();
1446 
1447  // Huge step value - give up.
1448  if (APStepVal.getBitWidth() > 64)
1449  return std::nullopt;
1450 
1451  int64_t StepVal = APStepVal.getSExtValue();
1452 
1453  // Strided access.
1454  int64_t Stride = StepVal / Size;
1455  int64_t Rem = StepVal % Size;
1456  if (Rem)
1457  return std::nullopt;
1458 
1459  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1460  // know we can't "wrap around the address space". In case of address space
1461  // zero we know that this won't happen without triggering undefined behavior.
1462  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1463  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1464  AddrSpace))) {
1465  if (Assume) {
1466  // We can avoid this case by adding a run-time check.
1467  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1468  << "inbounds or in address space 0 may wrap:\n"
1469  << "LAA: Pointer: " << *Ptr << "\n"
1470  << "LAA: SCEV: " << *AR << "\n"
1471  << "LAA: Added an overflow assumption\n");
1473  } else
1474  return std::nullopt;
1475  }
1476 
1477  return Stride;
1478 }
1479 
1481  Value *PtrB, const DataLayout &DL,
1482  ScalarEvolution &SE, bool StrictCheck,
1483  bool CheckType) {
1484  assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1485  assert(cast<PointerType>(PtrA->getType())
1486  ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
1487  assert(cast<PointerType>(PtrB->getType())
1488  ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
1489 
1490  // Make sure that A and B are different pointers.
1491  if (PtrA == PtrB)
1492  return 0;
1493 
1494  // Make sure that the element types are the same if required.
1495  if (CheckType && ElemTyA != ElemTyB)
1496  return std::nullopt;
1497 
1498  unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1499  unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1500 
1501  // Check that the address spaces match.
1502  if (ASA != ASB)
1503  return std::nullopt;
1504  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1505 
1506  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1507  Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1508  Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1509 
1510  int Val;
1511  if (PtrA1 == PtrB1) {
1512  // Retrieve the address space again as pointer stripping now tracks through
1513  // `addrspacecast`.
1514  ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1515  ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1516  // Check that the address spaces match and that the pointers are valid.
1517  if (ASA != ASB)
1518  return std::nullopt;
1519 
1520  IdxWidth = DL.getIndexSizeInBits(ASA);
1521  OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1522  OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1523 
1524  OffsetB -= OffsetA;
1525  Val = OffsetB.getSExtValue();
1526  } else {
1527  // Otherwise compute the distance with SCEV between the base pointers.
1528  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1529  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1530  const auto *Diff =
1531  dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
1532  if (!Diff)
1533  return std::nullopt;
1534  Val = Diff->getAPInt().getSExtValue();
1535  }
1536  int Size = DL.getTypeStoreSize(ElemTyA);
1537  int Dist = Val / Size;
1538 
1539  // Ensure that the calculated distance matches the type-based one after all
1540  // the bitcasts removal in the provided pointers.
1541  if (!StrictCheck || Dist * Size == Val)
1542  return Dist;
1543  return std::nullopt;
1544 }
1545 
1547  const DataLayout &DL, ScalarEvolution &SE,
1548  SmallVectorImpl<unsigned> &SortedIndices) {
1550  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1551  "Expected list of pointer operands.");
1552  // Walk over the pointers, and map each of them to an offset relative to
1553  // first pointer in the array.
1554  Value *Ptr0 = VL[0];
1555 
1556  using DistOrdPair = std::pair<int64_t, int>;
1557  auto Compare = llvm::less_first();
1558  std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1559  Offsets.emplace(0, 0);
1560  int Cnt = 1;
1561  bool IsConsecutive = true;
1562  for (auto *Ptr : VL.drop_front()) {
1563  Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1564  /*StrictCheck=*/true);
1565  if (!Diff)
1566  return false;
1567 
1568  // Check if the pointer with the same offset is found.
1569  int64_t Offset = *Diff;
1570  auto Res = Offsets.emplace(Offset, Cnt);
1571  if (!Res.second)
1572  return false;
1573  // Consecutive order if the inserted element is the last one.
1574  IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
1575  ++Cnt;
1576  }
1577  SortedIndices.clear();
1578  if (!IsConsecutive) {
1579  // Fill SortedIndices array only if it is non-consecutive.
1580  SortedIndices.resize(VL.size());
1581  Cnt = 0;
1582  for (const std::pair<int64_t, int> &Pair : Offsets) {
1583  SortedIndices[Cnt] = Pair.second;
1584  ++Cnt;
1585  }
1586  }
1587  return true;
1588 }
1589 
1590 /// Returns true if the memory operations \p A and \p B are consecutive.
1592  ScalarEvolution &SE, bool CheckType) {
1593  Value *PtrA = getLoadStorePointerOperand(A);
1595  if (!PtrA || !PtrB)
1596  return false;
1597  Type *ElemTyA = getLoadStoreType(A);
1598  Type *ElemTyB = getLoadStoreType(B);
1599  Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1600  /*StrictCheck=*/true, CheckType);
1601  return Diff && *Diff == 1;
1602 }
1603 
1605  visitPointers(SI->getPointerOperand(), *InnermostLoop,
1606  [this, SI](Value *Ptr) {
1607  Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1608  InstMap.push_back(SI);
1609  ++AccessIdx;
1610  });
1611 }
1612 
1614  visitPointers(LI->getPointerOperand(), *InnermostLoop,
1615  [this, LI](Value *Ptr) {
1616  Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1617  InstMap.push_back(LI);
1618  ++AccessIdx;
1619  });
1620 }
1621 
1624  switch (Type) {
1625  case NoDep:
1626  case Forward:
1627  case BackwardVectorizable:
1629 
1630  case Unknown:
1633  case Backward:
1636  }
1637  llvm_unreachable("unexpected DepType!");
1638 }
1639 
1641  switch (Type) {
1642  case NoDep:
1643  case Forward:
1644  case ForwardButPreventsForwarding:
1645  case Unknown:
1646  return false;
1647 
1648  case BackwardVectorizable:
1649  case Backward:
1650  case BackwardVectorizableButPreventsForwarding:
1651  return true;
1652  }
1653  llvm_unreachable("unexpected DepType!");
1654 }
1655 
1657  return isBackward() || Type == Unknown;
1658 }
1659 
1661  switch (Type) {
1662  case Forward:
1663  case ForwardButPreventsForwarding:
1664  return true;
1665 
1666  case NoDep:
1667  case Unknown:
1668  case BackwardVectorizable:
1669  case Backward:
1670  case BackwardVectorizableButPreventsForwarding:
1671  return false;
1672  }
1673  llvm_unreachable("unexpected DepType!");
1674 }
1675 
1676 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1677  uint64_t TypeByteSize) {
1678  // If loads occur at a distance that is not a multiple of a feasible vector
1679  // factor store-load forwarding does not take place.
1680  // Positive dependences might cause troubles because vectorizing them might
1681  // prevent store-load forwarding making vectorized code run a lot slower.
1682  // a[i] = a[i-3] ^ a[i-8];
1683  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1684  // hence on your typical architecture store-load forwarding does not take
1685  // place. Vectorizing in such cases does not make sense.
1686  // Store-load forwarding distance.
1687 
1688  // After this many iterations store-to-load forwarding conflicts should not
1689  // cause any slowdowns.
1690  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1691  // Maximum vector factor.
1692  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1693  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1694 
1695  // Compute the smallest VF at which the store and load would be misaligned.
1696  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1697  VF *= 2) {
1698  // If the number of vector iteration between the store and the load are
1699  // small we could incur conflicts.
1700  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1701  MaxVFWithoutSLForwardIssues = (VF >> 1);
1702  break;
1703  }
1704  }
1705 
1706  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1707  LLVM_DEBUG(
1708  dbgs() << "LAA: Distance " << Distance
1709  << " that could cause a store-load forwarding conflict\n");
1710  return true;
1711  }
1712 
1713  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1714  MaxVFWithoutSLForwardIssues !=
1715  VectorizerParams::MaxVectorWidth * TypeByteSize)
1716  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1717  return false;
1718 }
1719 
1720 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1721  if (Status < S)
1722  Status = S;
1723 }
1724 
1725 /// Given a dependence-distance \p Dist between two
1726 /// memory accesses, that have the same stride whose absolute value is given
1727 /// in \p Stride, and that have the same type size \p TypeByteSize,
1728 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1729 /// possible to prove statically that the dependence distance is larger
1730 /// than the range that the accesses will travel through the execution of
1731 /// the loop. If so, return true; false otherwise. This is useful for
1732 /// example in loops such as the following (PR31098):
1733 /// for (i = 0; i < D; ++i) {
1734 /// = out[i];
1735 /// out[i+D] =
1736 /// }
1738  const SCEV &BackedgeTakenCount,
1739  const SCEV &Dist, uint64_t Stride,
1740  uint64_t TypeByteSize) {
1741 
1742  // If we can prove that
1743  // (**) |Dist| > BackedgeTakenCount * Step
1744  // where Step is the absolute stride of the memory accesses in bytes,
1745  // then there is no dependence.
1746  //
1747  // Rationale:
1748  // We basically want to check if the absolute distance (|Dist/Step|)
1749  // is >= the loop iteration count (or > BackedgeTakenCount).
1750  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1751  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1752  // that the dependence distance is >= VF; This is checked elsewhere.
1753  // But in some cases we can prune dependence distances early, and
1754  // even before selecting the VF, and without a runtime test, by comparing
1755  // the distance against the loop iteration count. Since the vectorized code
1756  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1757  // also guarantees that distance >= VF.
1758  //
1759  const uint64_t ByteStride = Stride * TypeByteSize;
1760  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1761  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1762 
1763  const SCEV *CastedDist = &Dist;
1764  const SCEV *CastedProduct = Product;
1765  uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
1766  uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1767 
1768  // The dependence distance can be positive/negative, so we sign extend Dist;
1769  // The multiplication of the absolute stride in bytes and the
1770  // backedgeTakenCount is non-negative, so we zero extend Product.
1771  if (DistTypeSizeBits > ProductTypeSizeBits)
1772  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1773  else
1774  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1775 
1776  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1777  // (If so, then we have proven (**) because |Dist| >= Dist)
1778  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1779  if (SE.isKnownPositive(Minus))
1780  return true;
1781 
1782  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1783  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1784  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1785  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1786  if (SE.isKnownPositive(Minus))
1787  return true;
1788 
1789  return false;
1790 }
1791 
1792 /// Check the dependence for two accesses with the same stride \p Stride.
1793 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1794 /// bytes.
1795 ///
1796 /// \returns true if they are independent.
1797 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1798  uint64_t TypeByteSize) {
1799  assert(Stride > 1 && "The stride must be greater than 1");
1800  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1801  assert(Distance > 0 && "The distance must be non-zero");
1802 
1803  // Skip if the distance is not multiple of type byte size.
1804  if (Distance % TypeByteSize)
1805  return false;
1806 
1807  uint64_t ScaledDist = Distance / TypeByteSize;
1808 
1809  // No dependence if the scaled distance is not multiple of the stride.
1810  // E.g.
1811  // for (i = 0; i < 1024 ; i += 4)
1812  // A[i+2] = A[i] + 1;
1813  //
1814  // Two accesses in memory (scaled distance is 2, stride is 4):
1815  // | A[0] | | | | A[4] | | | |
1816  // | | | A[2] | | | | A[6] | |
1817  //
1818  // E.g.
1819  // for (i = 0; i < 1024 ; i += 3)
1820  // A[i+4] = A[i] + 1;
1821  //
1822  // Two accesses in memory (scaled distance is 4, stride is 3):
1823  // | A[0] | | | A[3] | | | A[6] | | |
1824  // | | | | | A[4] | | | A[7] | |
1825  return ScaledDist % Stride;
1826 }
1827 
1829 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1830  const MemAccessInfo &B, unsigned BIdx,
1831  const ValueToValueMap &Strides) {
1832  assert (AIdx < BIdx && "Must pass arguments in program order");
1833 
1834  auto [APtr, AIsWrite] = A;
1835  auto [BPtr, BIsWrite] = B;
1836  Type *ATy = getLoadStoreType(InstMap[AIdx]);
1837  Type *BTy = getLoadStoreType(InstMap[BIdx]);
1838 
1839  // Two reads are independent.
1840  if (!AIsWrite && !BIsWrite)
1841  return Dependence::NoDep;
1842 
1843  // We cannot check pointers in different address spaces.
1844  if (APtr->getType()->getPointerAddressSpace() !=
1845  BPtr->getType()->getPointerAddressSpace())
1846  return Dependence::Unknown;
1847 
1848  int64_t StrideAPtr =
1849  getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0);
1850  int64_t StrideBPtr =
1851  getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0);
1852 
1853  const SCEV *Src = PSE.getSCEV(APtr);
1854  const SCEV *Sink = PSE.getSCEV(BPtr);
1855 
1856  // If the induction step is negative we have to invert source and sink of the
1857  // dependence.
1858  if (StrideAPtr < 0) {
1859  std::swap(APtr, BPtr);
1860  std::swap(ATy, BTy);
1861  std::swap(Src, Sink);
1862  std::swap(AIsWrite, BIsWrite);
1863  std::swap(AIdx, BIdx);
1864  std::swap(StrideAPtr, StrideBPtr);
1865  }
1866 
1867  ScalarEvolution &SE = *PSE.getSE();
1868  const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
1869 
1870  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1871  << "(Induction step: " << StrideAPtr << ")\n");
1872  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1873  << *InstMap[BIdx] << ": " << *Dist << "\n");
1874 
1875  // Need accesses with constant stride. We don't want to vectorize
1876  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1877  // the address space.
1878  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1879  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1880  return Dependence::Unknown;
1881  }
1882 
1883  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1884  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1885  bool HasSameSize =
1886  DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
1887  uint64_t Stride = std::abs(StrideAPtr);
1888 
1889  if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
1890  isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
1891  Stride, TypeByteSize))
1892  return Dependence::NoDep;
1893 
1894  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1895  if (!C) {
1896  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1897  FoundNonConstantDistanceDependence = true;
1898  return Dependence::Unknown;
1899  }
1900 
1901  const APInt &Val = C->getAPInt();
1902  int64_t Distance = Val.getSExtValue();
1903 
1904  // Attempt to prove strided accesses independent.
1905  if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
1906  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1907  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1908  return Dependence::NoDep;
1909  }
1910 
1911  // Negative distances are not plausible dependencies.
1912  if (Val.isNegative()) {
1913  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1914  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1915  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1916  !HasSameSize)) {
1917  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1919  }
1920 
1921  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1922  return Dependence::Forward;
1923  }
1924 
1925  // Write to the same location with the same size.
1926  if (Val == 0) {
1927  if (HasSameSize)
1928  return Dependence::Forward;
1929  LLVM_DEBUG(
1930  dbgs() << "LAA: Zero dependence difference but different type sizes\n");
1931  return Dependence::Unknown;
1932  }
1933 
1934  assert(Val.isStrictlyPositive() && "Expect a positive value");
1935 
1936  if (!HasSameSize) {
1937  LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
1938  "different type sizes\n");
1939  return Dependence::Unknown;
1940  }
1941 
1942  // Bail out early if passed-in parameters make vectorization not feasible.
1943  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1945  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1947  // The minimum number of iterations for a vectorized/unrolled version.
1948  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1949 
1950  // It's not vectorizable if the distance is smaller than the minimum distance
1951  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1952  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1953  // TypeByteSize (No need to plus the last gap distance).
1954  //
1955  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1956  // foo(int *A) {
1957  // int *B = (int *)((char *)A + 14);
1958  // for (i = 0 ; i < 1024 ; i += 2)
1959  // B[i] = A[i] + 1;
1960  // }
1961  //
1962  // Two accesses in memory (stride is 2):
1963  // | A[0] | | A[2] | | A[4] | | A[6] | |
1964  // | B[0] | | B[2] | | B[4] |
1965  //
1966  // Distance needs for vectorizing iterations except the last iteration:
1967  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1968  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1969  //
1970  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1971  // 12, which is less than distance.
1972  //
1973  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1974  // the minimum distance needed is 28, which is greater than distance. It is
1975  // not safe to do vectorization.
1976  uint64_t MinDistanceNeeded =
1977  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1978  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1979  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1980  << Distance << '\n');
1981  return Dependence::Backward;
1982  }
1983 
1984  // Unsafe if the minimum distance needed is greater than max safe distance.
1985  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1986  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1987  << MinDistanceNeeded << " size in bytes\n");
1988  return Dependence::Backward;
1989  }
1990 
1991  // Positive distance bigger than max vectorization factor.
1992  // FIXME: Should use max factor instead of max distance in bytes, which could
1993  // not handle different types.
1994  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1995  // void foo (int *A, char *B) {
1996  // for (unsigned i = 0; i < 1024; i++) {
1997  // A[i+2] = A[i] + 1;
1998  // B[i+2] = B[i] + 1;
1999  // }
2000  // }
2001  //
2002  // This case is currently unsafe according to the max safe distance. If we
2003  // analyze the two accesses on array B, the max safe dependence distance
2004  // is 2. Then we analyze the accesses on array A, the minimum distance needed
2005  // is 8, which is less than 2 and forbidden vectorization, But actually
2006  // both A and B could be vectorized by 2 iterations.
2007  MaxSafeDepDistBytes =
2008  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
2009 
2010  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2011  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
2012  couldPreventStoreLoadForward(Distance, TypeByteSize))
2014 
2015  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
2016  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
2017  << " with max VF = " << MaxVF << '\n');
2018  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2019  MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2021 }
2022 
2024  MemAccessInfoList &CheckDeps,
2025  const ValueToValueMap &Strides) {
2026 
2027  MaxSafeDepDistBytes = -1;
2029  for (MemAccessInfo CurAccess : CheckDeps) {
2030  if (Visited.count(CurAccess))
2031  continue;
2032 
2033  // Get the relevant memory access set.
2035  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
2036 
2037  // Check accesses within this set.
2039  AccessSets.member_begin(I);
2041  AccessSets.member_end();
2042 
2043  // Check every access pair.
2044  while (AI != AE) {
2045  Visited.insert(*AI);
2046  bool AIIsWrite = AI->getInt();
2047  // Check loads only against next equivalent class, but stores also against
2048  // other stores in the same equivalence class - to the same address.
2050  (AIIsWrite ? AI : std::next(AI));
2051  while (OI != AE) {
2052  // Check every accessing instruction pair in program order.
2053  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
2054  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
2055  // Scan all accesses of another equivalence class, but only the next
2056  // accesses of the same equivalent class.
2057  for (std::vector<unsigned>::iterator
2058  I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2059  I2E = (OI == AI ? I1E : Accesses[*OI].end());
2060  I2 != I2E; ++I2) {
2061  auto A = std::make_pair(&*AI, *I1);
2062  auto B = std::make_pair(&*OI, *I2);
2063 
2064  assert(*I1 != *I2);
2065  if (*I1 > *I2)
2066  std::swap(A, B);
2067 
2069  isDependent(*A.first, A.second, *B.first, B.second, Strides);
2070  mergeInStatus(Dependence::isSafeForVectorization(Type));
2071 
2072  // Gather dependences unless we accumulated MaxDependences
2073  // dependences. In that case return as soon as we find the first
2074  // unsafe dependence. This puts a limit on this quadratic
2075  // algorithm.
2076  if (RecordDependences) {
2077  if (Type != Dependence::NoDep)
2078  Dependences.push_back(Dependence(A.second, B.second, Type));
2079 
2080  if (Dependences.size() >= MaxDependences) {
2081  RecordDependences = false;
2082  Dependences.clear();
2083  LLVM_DEBUG(dbgs()
2084  << "Too many dependences, stopped recording\n");
2085  }
2086  }
2087  if (!RecordDependences && !isSafeForVectorization())
2088  return false;
2089  }
2090  ++OI;
2091  }
2092  AI++;
2093  }
2094  }
2095 
2096  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2097  return isSafeForVectorization();
2098 }
2099 
2102  MemAccessInfo Access(Ptr, isWrite);
2103  auto &IndexVector = Accesses.find(Access)->second;
2104 
2106  transform(IndexVector,
2107  std::back_inserter(Insts),
2108  [&](unsigned Idx) { return this->InstMap[Idx]; });
2109  return Insts;
2110 }
2111 
2112 const char *MemoryDepChecker::Dependence::DepName[] = {
2113  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
2114  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
2115 
2117  raw_ostream &OS, unsigned Depth,
2118  const SmallVectorImpl<Instruction *> &Instrs) const {
2119  OS.indent(Depth) << DepName[Type] << ":\n";
2120  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
2121  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
2122 }
2123 
2124 bool LoopAccessInfo::canAnalyzeLoop() {
2125  // We need to have a loop header.
2126  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
2127  << TheLoop->getHeader()->getParent()->getName() << ": "
2128  << TheLoop->getHeader()->getName() << '\n');
2129 
2130  // We can only analyze innermost loops.
2131  if (!TheLoop->isInnermost()) {
2132  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2133  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2134  return false;
2135  }
2136 
2137  // We must have a single backedge.
2138  if (TheLoop->getNumBackEdges() != 1) {
2139  LLVM_DEBUG(
2140  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2141  recordAnalysis("CFGNotUnderstood")
2142  << "loop control flow is not understood by analyzer";
2143  return false;
2144  }
2145 
2146  // ScalarEvolution needs to be able to find the exit count.
2147  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
2148  if (isa<SCEVCouldNotCompute>(ExitCount)) {
2149  recordAnalysis("CantComputeNumberOfIterations")
2150  << "could not determine number of loop iterations";
2151  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2152  return false;
2153  }
2154 
2155  return true;
2156 }
2157 
2158 void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2159  const TargetLibraryInfo *TLI,
2160  DominatorTree *DT) {
2161  // Holds the Load and Store instructions.
2164 
2165  // Holds all the different accesses in the loop.
2166  unsigned NumReads = 0;
2167  unsigned NumReadWrites = 0;
2168 
2169  bool HasComplexMemInst = false;
2170 
2171  // A runtime check is only legal to insert if there are no convergent calls.
2172  HasConvergentOp = false;
2173 
2174  PtrRtChecking->Pointers.clear();
2175  PtrRtChecking->Need = false;
2176 
2177  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
2178 
2179  const bool EnableMemAccessVersioningOfLoop =
2181  !TheLoop->getHeader()->getParent()->hasOptSize();
2182 
2183  // Traverse blocks in fixed RPOT order, regardless of their storage in the
2184  // loop info, as it may be arbitrary.
2185  LoopBlocksRPO RPOT(TheLoop);
2186  RPOT.perform(LI);
2187  for (BasicBlock *BB : RPOT) {
2188  // Scan the BB and collect legal loads and stores. Also detect any
2189  // convergent instructions.
2190  for (Instruction &I : *BB) {
2191  if (auto *Call = dyn_cast<CallBase>(&I)) {
2192  if (Call->isConvergent())
2193  HasConvergentOp = true;
2194  }
2195 
2196  // With both a non-vectorizable memory instruction and a convergent
2197  // operation, found in this loop, no reason to continue the search.
2198  if (HasComplexMemInst && HasConvergentOp) {
2199  CanVecMem = false;
2200  return;
2201  }
2202 
2203  // Avoid hitting recordAnalysis multiple times.
2204  if (HasComplexMemInst)
2205  continue;
2206 
2207  // If this is a load, save it. If this instruction can read from memory
2208  // but is not a load, then we quit. Notice that we don't handle function
2209  // calls that read or write.
2210  if (I.mayReadFromMemory()) {
2211  // Many math library functions read the rounding mode. We will only
2212  // vectorize a loop if it contains known function calls that don't set
2213  // the flag. Therefore, it is safe to ignore this read from memory.
2214  auto *Call = dyn_cast<CallInst>(&I);
2215  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
2216  continue;
2217 
2218  // If the function has an explicit vectorized counterpart, we can safely
2219  // assume that it can be vectorized.
2220  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
2221  !VFDatabase::getMappings(*Call).empty())
2222  continue;
2223 
2224  auto *Ld = dyn_cast<LoadInst>(&I);
2225  if (!Ld) {
2226  recordAnalysis("CantVectorizeInstruction", Ld)
2227  << "instruction cannot be vectorized";
2228  HasComplexMemInst = true;
2229  continue;
2230  }
2231  if (!Ld->isSimple() && !IsAnnotatedParallel) {
2232  recordAnalysis("NonSimpleLoad", Ld)
2233  << "read with atomic ordering or volatile read";
2234  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2235  HasComplexMemInst = true;
2236  continue;
2237  }
2238  NumLoads++;
2239  Loads.push_back(Ld);
2240  DepChecker->addAccess(Ld);
2241  if (EnableMemAccessVersioningOfLoop)
2242  collectStridedAccess(Ld);
2243  continue;
2244  }
2245 
2246  // Save 'store' instructions. Abort if other instructions write to memory.
2247  if (I.mayWriteToMemory()) {
2248  auto *St = dyn_cast<StoreInst>(&I);
2249  if (!St) {
2250  recordAnalysis("CantVectorizeInstruction", St)
2251  << "instruction cannot be vectorized";
2252  HasComplexMemInst = true;
2253  continue;
2254  }
2255  if (!St->isSimple() && !IsAnnotatedParallel) {
2256  recordAnalysis("NonSimpleStore", St)
2257  << "write with atomic ordering or volatile write";
2258  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2259  HasComplexMemInst = true;
2260  continue;
2261  }
2262  NumStores++;
2263  Stores.push_back(St);
2264  DepChecker->addAccess(St);
2265  if (EnableMemAccessVersioningOfLoop)
2266  collectStridedAccess(St);
2267  }
2268  } // Next instr.
2269  } // Next block.
2270 
2271  if (HasComplexMemInst) {
2272  CanVecMem = false;
2273  return;
2274  }
2275 
2276  // Now we have two lists that hold the loads and the stores.
2277  // Next, we find the pointers that they use.
2278 
2279  // Check if we see any stores. If there are no stores, then we don't
2280  // care if the pointers are *restrict*.
2281  if (!Stores.size()) {
2282  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2283  CanVecMem = true;
2284  return;
2285  }
2286 
2287  MemoryDepChecker::DepCandidates DependentAccesses;
2288  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
2289 
2290  // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
2291  // multiple times on the same object. If the ptr is accessed twice, once
2292  // for read and once for write, it will only appear once (on the write
2293  // list). This is okay, since we are going to check for conflicts between
2294  // writes and between reads and writes, but not between reads and reads.
2296 
2297  // Record uniform store addresses to identify if we have multiple stores
2298  // to the same address.
2299  SmallPtrSet<Value *, 16> UniformStores;
2300 
2301  for (StoreInst *ST : Stores) {
2302  Value *Ptr = ST->getPointerOperand();
2303 
2304  if (isUniform(Ptr)) {
2305  // Record store instructions to loop invariant addresses
2306  StoresToInvariantAddresses.push_back(ST);
2307  HasDependenceInvolvingLoopInvariantAddress |=
2308  !UniformStores.insert(Ptr).second;
2309  }
2310 
2311  // If we did *not* see this pointer before, insert it to the read-write
2312  // list. At this phase it is only a 'write' list.
2313  Type *AccessTy = getLoadStoreType(ST);
2314  if (Seen.insert({Ptr, AccessTy}).second) {
2315  ++NumReadWrites;
2316 
2318  // The TBAA metadata could have a control dependency on the predication
2319  // condition, so we cannot rely on it when determining whether or not we
2320  // need runtime pointer checks.
2321  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2322  Loc.AATags.TBAA = nullptr;
2323 
2324  visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2325  [&Accesses, AccessTy, Loc](Value *Ptr) {
2326  MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2327  Accesses.addStore(NewLoc, AccessTy);
2328  });
2329  }
2330  }
2331 
2332  if (IsAnnotatedParallel) {
2333  LLVM_DEBUG(
2334  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2335  << "checks.\n");
2336  CanVecMem = true;
2337  return;
2338  }
2339 
2340  for (LoadInst *LD : Loads) {
2341  Value *Ptr = LD->getPointerOperand();
2342  // If we did *not* see this pointer before, insert it to the
2343  // read list. If we *did* see it before, then it is already in
2344  // the read-write list. This allows us to vectorize expressions
2345  // such as A[i] += x; Because the address of A[i] is a read-write
2346  // pointer. This only works if the index of A[i] is consecutive.
2347  // If the address of i is unknown (for example A[B[i]]) then we may
2348  // read a few words, modify, and write a few words, and some of the
2349  // words may be written to the same address.
2350  bool IsReadOnlyPtr = false;
2351  Type *AccessTy = getLoadStoreType(LD);
2352  if (Seen.insert({Ptr, AccessTy}).second ||
2353  !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides).value_or(0)) {
2354  ++NumReads;
2355  IsReadOnlyPtr = true;
2356  }
2357 
2358  // See if there is an unsafe dependency between a load to a uniform address and
2359  // store to the same uniform address.
2360  if (UniformStores.count(Ptr)) {
2361  LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2362  "load and uniform store to the same address!\n");
2363  HasDependenceInvolvingLoopInvariantAddress = true;
2364  }
2365 
2367  // The TBAA metadata could have a control dependency on the predication
2368  // condition, so we cannot rely on it when determining whether or not we
2369  // need runtime pointer checks.
2370  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2371  Loc.AATags.TBAA = nullptr;
2372 
2373  visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2374  [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2375  MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2376  Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2377  });
2378  }
2379 
2380  // If we write (or read-write) to a single destination and there are no
2381  // other reads in this loop then is it safe to vectorize.
2382  if (NumReadWrites == 1 && NumReads == 0) {
2383  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2384  CanVecMem = true;
2385  return;
2386  }
2387 
2388  // Build dependence sets and check whether we need a runtime pointer bounds
2389  // check.
2390  Accesses.buildDependenceSets();
2391 
2392  // Find pointers with computable bounds. We are going to use this information
2393  // to place a runtime bound check.
2394  Value *UncomputablePtr = nullptr;
2395  bool CanDoRTIfNeeded =
2396  Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
2397  SymbolicStrides, UncomputablePtr, false);
2398  if (!CanDoRTIfNeeded) {
2399  auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2400  recordAnalysis("CantIdentifyArrayBounds", I)
2401  << "cannot identify array bounds";
2402  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2403  << "the array bounds.\n");
2404  CanVecMem = false;
2405  return;
2406  }
2407 
2408  LLVM_DEBUG(
2409  dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2410 
2411  CanVecMem = true;
2412  if (Accesses.isDependencyCheckNeeded()) {
2413  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2414  CanVecMem = DepChecker->areDepsSafe(
2415  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2416  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2417 
2418  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2419  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2420 
2421  // Clear the dependency checks. We assume they are not needed.
2422  Accesses.resetDepChecks(*DepChecker);
2423 
2424  PtrRtChecking->reset();
2425  PtrRtChecking->Need = true;
2426 
2427  auto *SE = PSE->getSE();
2428  UncomputablePtr = nullptr;
2429  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2430  *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
2431 
2432  // Check that we found the bounds for the pointer.
2433  if (!CanDoRTIfNeeded) {
2434  auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2435  recordAnalysis("CantCheckMemDepsAtRunTime", I)
2436  << "cannot check memory dependencies at runtime";
2437  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2438  CanVecMem = false;
2439  return;
2440  }
2441 
2442  CanVecMem = true;
2443  }
2444  }
2445 
2446  if (HasConvergentOp) {
2447  recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2448  << "cannot add control dependency to convergent operation";
2449  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2450  "would be needed with a convergent operation\n");
2451  CanVecMem = false;
2452  return;
2453  }
2454 
2455  if (CanVecMem)
2456  LLVM_DEBUG(
2457  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2458  << (PtrRtChecking->Need ? "" : " don't")
2459  << " need runtime memory checks.\n");
2460  else
2461  emitUnsafeDependenceRemark();
2462 }
2463 
2464 void LoopAccessInfo::emitUnsafeDependenceRemark() {
2465  auto Deps = getDepChecker().getDependences();
2466  if (!Deps)
2467  return;
2468  auto Found = llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2471  });
2472  if (Found == Deps->end())
2473  return;
2474  MemoryDepChecker::Dependence Dep = *Found;
2475 
2476  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2477 
2478  // Emit remark for first unsafe dependence
2480  recordAnalysis("UnsafeDep", Dep.getDestination(*this))
2481  << "unsafe dependent memory operations in loop. Use "
2482  "#pragma loop distribute(enable) to allow loop distribution "
2483  "to attempt to isolate the offending operations into a separate "
2484  "loop";
2485 
2486  switch (Dep.Type) {
2490  llvm_unreachable("Unexpected dependence");
2492  R << "\nBackward loop carried data dependence.";
2493  break;
2495  R << "\nForward loop carried data dependence that prevents "
2496  "store-to-load forwarding.";
2497  break;
2499  R << "\nBackward loop carried data dependence that prevents "
2500  "store-to-load forwarding.";
2501  break;
2503  R << "\nUnknown data dependence.";
2504  break;
2505  }
2506 
2507  if (Instruction *I = Dep.getSource(*this)) {
2508  DebugLoc SourceLoc = I->getDebugLoc();
2509  if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
2510  SourceLoc = DD->getDebugLoc();
2511  if (SourceLoc)
2512  R << " Memory location is the same as accessed at "
2513  << ore::NV("Location", SourceLoc);
2514  }
2515 }
2516 
2518  DominatorTree *DT) {
2519  assert(TheLoop->contains(BB) && "Unknown block used");
2520 
2521  // Blocks that do not dominate the latch need predication.
2522  BasicBlock* Latch = TheLoop->getLoopLatch();
2523  return !DT->dominates(BB, Latch);
2524 }
2525 
2526 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2527  Instruction *I) {
2528  assert(!Report && "Multiple reports generated");
2529 
2530  Value *CodeRegion = TheLoop->getHeader();
2531  DebugLoc DL = TheLoop->getStartLoc();
2532 
2533  if (I) {
2534  CodeRegion = I->getParent();
2535  // If there is no debug location attached to the instruction, revert back to
2536  // using the loop's.
2537  if (I->getDebugLoc())
2538  DL = I->getDebugLoc();
2539  }
2540 
2541  Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2542  CodeRegion);
2543  return *Report;
2544 }
2545 
2547  auto *SE = PSE->getSE();
2548  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2549  // never considered uniform.
2550  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2551  // trivially loop-invariant FP values to be considered uniform.
2552  if (!SE->isSCEVable(V->getType()))
2553  return false;
2554  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2555 }
2556 
2557 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2558  Value *Ptr = getLoadStorePointerOperand(MemAccess);
2559  if (!Ptr)
2560  return;
2561 
2562  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2563  if (!Stride)
2564  return;
2565 
2566  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2567  "versioning:");
2568  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2569 
2570  // Avoid adding the "Stride == 1" predicate when we know that
2571  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2572  // or zero iteration loop, as Trip-Count <= Stride == 1.
2573  //
2574  // TODO: We are currently not making a very informed decision on when it is
2575  // beneficial to apply stride versioning. It might make more sense that the
2576  // users of this analysis (such as the vectorizer) will trigger it, based on
2577  // their specific cost considerations; For example, in cases where stride
2578  // versioning does not help resolving memory accesses/dependences, the
2579  // vectorizer should evaluate the cost of the runtime test, and the benefit
2580  // of various possible stride specializations, considering the alternatives
2581  // of using gather/scatters (if available).
2582 
2583  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2584  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2585 
2586  // Match the types so we can compare the stride and the BETakenCount.
2587  // The Stride can be positive/negative, so we sign extend Stride;
2588  // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2589  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2590  uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
2591  uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType());
2592  const SCEV *CastedStride = StrideExpr;
2593  const SCEV *CastedBECount = BETakenCount;
2594  ScalarEvolution *SE = PSE->getSE();
2595  if (BETypeSizeBits >= StrideTypeSizeBits)
2596  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2597  else
2598  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2599  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2600  // Since TripCount == BackEdgeTakenCount + 1, checking:
2601  // "Stride >= TripCount" is equivalent to checking:
2602  // Stride - BETakenCount > 0
2603  if (SE->isKnownPositive(StrideMinusBETaken)) {
2604  LLVM_DEBUG(
2605  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2606  "Stride==1 predicate will imply that the loop executes "
2607  "at most once.\n");
2608  return;
2609  }
2610  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
2611 
2612  SymbolicStrides[Ptr] = Stride;
2613  StrideSet.insert(Stride);
2614 }
2615 
2617  const TargetLibraryInfo *TLI, AAResults *AA,
2618  DominatorTree *DT, LoopInfo *LI)
2619  : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2620  PtrRtChecking(nullptr),
2621  DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
2622  PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
2623  if (canAnalyzeLoop()) {
2624  analyzeLoop(AA, LI, TLI, DT);
2625  }
2626 }
2627 
2628 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2629  if (CanVecMem) {
2630  OS.indent(Depth) << "Memory dependences are safe";
2631  if (MaxSafeDepDistBytes != -1ULL)
2632  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2633  << " bytes";
2634  if (PtrRtChecking->Need)
2635  OS << " with run-time checks";
2636  OS << "\n";
2637  }
2638 
2639  if (HasConvergentOp)
2640  OS.indent(Depth) << "Has convergent operation in loop\n";
2641 
2642  if (Report)
2643  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2644 
2645  if (auto *Dependences = DepChecker->getDependences()) {
2646  OS.indent(Depth) << "Dependences:\n";
2647  for (const auto &Dep : *Dependences) {
2648  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2649  OS << "\n";
2650  }
2651  } else
2652  OS.indent(Depth) << "Too many dependences, not recorded\n";
2653 
2654  // List the pair of accesses need run-time checks to prove independence.
2655  PtrRtChecking->print(OS, Depth);
2656  OS << "\n";
2657 
2658  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2659  << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2660  << "found in loop.\n";
2661 
2662  OS.indent(Depth) << "SCEV assumptions:\n";
2663  PSE->getPredicate().print(OS, Depth);
2664 
2665  OS << "\n";
2666 
2667  OS.indent(Depth) << "Expressions re-written:\n";
2668  PSE->print(OS, Depth);
2669 }
2670 
2672  auto I = LoopAccessInfoMap.insert({&L, nullptr});
2673 
2674  if (I.second)
2675  I.first->second =
2676  std::make_unique<LoopAccessInfo>(&L, &SE, TLI, &AA, &DT, &LI);
2677 
2678  return *I.first->second;
2679 }
2680 
2683 }
2684 
2686  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2687  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2688  auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2689  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2690  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2691  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2692  LAIs = std::make_unique<LoopAccessInfoManager>(SE, AA, DT, LI, TLI);
2693  return false;
2694 }
2695 
2701 
2702  AU.setPreservesAll();
2703 }
2704 
2707  return LoopAccessInfoManager(
2711 }
2712 
2714 static const char laa_name[] = "Loop Access Analysis";
2715 #define LAA_NAME "loop-accesses"
2716 
2723 
2725 
2726 namespace llvm {
2727 
2729  return new LoopAccessLegacyAnalysis();
2730  }
2731 
2732 } // end namespace llvm
llvm::sortPtrAccesses
bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
Definition: LoopAccessAnalysis.cpp:1546
i
i
Definition: README.txt:29
llvm::MemoryDepChecker::clearDependences
void clearDependences()
Definition: LoopAccessAnalysis.h:225
llvm::APInt::isStrictlyPositive
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:339
llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition: AliasAnalysis.h:609
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:881
llvm::LoopAccessLegacyAnalysis::ID
static char ID
Definition: LoopAccessAnalysis.h:798
llvm::MemoryDepChecker::Dependence::isBackward
bool isBackward() const
Lexically backward dependence.
Definition: LoopAccessAnalysis.cpp:1640
llvm::MemoryDepChecker
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
Definition: LoopAccessAnalysis.h:87
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2161
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:796
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:36
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4464
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
getMinFromExprs
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Definition: LoopAccessAnalysis.cpp:369
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MemoryDepChecker::VectorizationSafetyStatus::Safe
@ Safe
llvm::ScalarEvolution::getEffectiveSCEVType
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
Definition: ScalarEvolution.cpp:4345
llvm::PredicatedScalarEvolution::getPredicate
const SCEVPredicate & getPredicate() const
Definition: ScalarEvolution.cpp:14620
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::LoopAccessInfoManager
Definition: LoopAccessAnalysis.h:768
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2546
llvm::EquivalenceClasses::getLeaderValue
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:191
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:219
llvm::SCEVPredicate::print
virtual void print(raw_ostream &OS, unsigned Depth=0) const =0
Prints a textual representation of this predicate with an indentation of Depth.
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:135
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:444
llvm::RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup
RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
Definition: LoopAccessAnalysis.cpp:177
llvm::Function
Definition: Function.h:60
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:466
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
EnableMemAccessVersioning
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5464
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2222
High
uint64_t High
Definition: NVVMIntrRange.cpp:61
llvm::MemoryDepChecker::Dependence::getSource
Instruction * getSource(const LoopAccessInfo &LAI) const
Return the source instruction of the dependence.
Definition: LoopAccessAnalysis.h:839
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:178
CheckType
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
Definition: SelectionDAGISel.cpp:2552
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1498
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:60
llvm::PredicatedScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
Definition: ScalarEvolution.cpp:14599
ErrorHandling.h
LoopAccessAnalysis.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:44
MaxDependences
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::SCEV::NoWrapMask
@ NoWrapMask
Definition: ScalarEvolution.h:135
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::MemoryDepChecker::Dependence::isPossiblyBackward
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Definition: LoopAccessAnalysis.cpp:1656
llvm::AliasSetTracker
Definition: AliasSetTracker.h:304
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:828
APInt.h
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5403
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1794
llvm::DenseMapIterator
Definition: DenseMap.h:57
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1431
llvm::RuntimePointerChecking::needsChecking
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition: LoopAccessAnalysis.cpp:358
Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:73
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1836
llvm::MemoryDepChecker::areDepsSafe
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
Definition: LoopAccessAnalysis.cpp:2023
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::isConsecutiveAccess
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
Definition: LoopAccessAnalysis.cpp:1591
llvm::Optional< int64_t >
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet< Value *, 16 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::RuntimePointerChecking::getNumberOfChecks
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
Definition: LoopAccessAnalysis.h:469
Operator.h
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::EquivalenceClasses::insert
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
Definition: EquivalenceClasses.h:220
llvm::ScalarEvolution::getTruncateOrSignExtend
const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4612
STLExtras.h
llvm::MemoryDepChecker::getInnermostLoop
const Loop * getInnermostLoop() const
Definition: LoopAccessAnalysis.h:257
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:261
llvm::PredicatedScalarEvolution::getAsAddRec
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
Definition: ScalarEvolution.cpp:14666
llvm::MemoryDepChecker::Dependence::NoDep
@ NoDep
Definition: LoopAccessAnalysis.h:112
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
llvm::stripIntegerCast
Value * stripIntegerCast(Value *V)
Definition: LoopAccessAnalysis.cpp:143
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MemoryDepChecker::Dependence::DepType
DepType
The type of the dependence.
Definition: LoopAccessAnalysis.h:110
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::ScalarEvolution::getSizeOfExpr
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for the alloc size of AllocTy that is type IntTy.
Definition: ScalarEvolution.cpp:4270
LoopAnalysisManager.h
llvm::MemoryDepChecker::Dependence::isForward
bool isForward() const
Lexically forward dependence.
Definition: LoopAccessAnalysis.cpp:1660
AliasAnalysis.h
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
PointerIntPair.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3090
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Instruction.h
CommandLine.h
llvm::MemoryLocation::AATags
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Definition: MemoryLocation.h:232
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:312
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:649
Constants.h
llvm::MemoryDepChecker::VectorizationSafetyStatus
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
Definition: LoopAccessAnalysis.h:97
llvm::AAResults
Definition: AliasAnalysis.h:294
isNoWrapAddRec
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
Definition: LoopAccessAnalysis.cpp:1322
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2517
llvm::MemoryDepChecker::Dependence::BackwardVectorizable
@ BackwardVectorizable
Definition: LoopAccessAnalysis.h:132
llvm::less_first
Function object to check whether the first component of a std::pair compares less than the first comp...
Definition: STLExtras.h:1476
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::Loop::isAnnotatedParallel
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:564
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:209
llvm::ScalarEvolution::getUMaxExpr
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:4227
InstrTypes.h
llvm::MemoryDepChecker::getOrderForAccess
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
Definition: LoopAccessAnalysis.h:250
SI
@ SI
Definition: SIInstrInfo.cpp:7982
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryDepChecker::Dependence::print
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Definition: LoopAccessAnalysis.cpp:2116
llvm::PredicatedScalarEvolution::hasNoOverflow
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
Definition: ScalarEvolution.cpp:14650
Check
#define Check(C,...)
Definition: Lint.cpp:170
MaxForkedSCEVDepth
static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:486
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::RuntimeCheckingPtrGroup::AddressSpace
unsigned AddressSpace
Address space of the involved pointers.
Definition: LoopAccessAnalysis.h:360
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:490
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:168
llvm::Instruction
Definition: Instruction.h:42
isInBoundsGep
static bool isInBoundsGep(Value *Ptr)
Definition: LoopAccessAnalysis.cpp:1314
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:306
llvm::MemoryDepChecker::addAccess
void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
Definition: LoopAccessAnalysis.cpp:1604
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1486
llvm::MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding
@ BackwardVectorizableButPreventsForwarding
Definition: LoopAccessAnalysis.h:134
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:642
llvm::ThreadPriority::Low
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2191
DebugLoc.h
SmallPtrSet.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
PatternMatch.h
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::getPointersDiff
Optional< int > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
Definition: LoopAccessAnalysis.cpp:1480
llvm::ScalarEvolution::getEqualPredicate
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:14249
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
VectorizationInterleave
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
llvm::RuntimePointerChecking::reset
void reset()
Reset the state of the pointer runtime information.
Definition: LoopAccessAnalysis.h:422
LoopIterator.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::RuntimePointerChecking::PointerInfo::AliasSetId
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
Definition: LoopAccessAnalysis.h:404
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
isNoWrap
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy, Loop *L)
Check whether a pointer address cannot wrap.
Definition: LoopAccessAnalysis.cpp:760
llvm::MemoryDepChecker::getInstructionsForAccess
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
Definition: LoopAccessAnalysis.cpp:2101
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4442
isSafeDependenceDistance
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a dependence-distance Dist between two memory accesses, that have the same stride whose absolut...
Definition: LoopAccessAnalysis.cpp:1737
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::RuntimePointerChecking::PointerInfo
Definition: LoopAccessAnalysis.h:389
VectorUtils.h
llvm::MemoryDepChecker::VectorizationSafetyStatus::Unsafe
@ Unsafe
llvm::initializeLoopAccessLegacyAnalysisPass
void initializeLoopAccessLegacyAnalysisPass(PassRegistry &)
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1411
llvm::AArch64PACKey::DA
@ DA
Definition: AArch64BaseInfo.h:821
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::EquivalenceClasses::iterator
typename std::set< ECValue, ECValueComparator >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
Definition: EquivalenceClasses.h:165
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4551
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:203
llvm::RuntimePointerChecking
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Definition: LoopAccessAnalysis.h:385
llvm::VectorizerParams::VectorizationFactor
static unsigned VectorizationFactor
VF as overridden by the user.
Definition: LoopAccessAnalysis.h:42
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:480
llvm::getPointerOperand
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Definition: Instructions.h:5372
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::MemoryDepChecker::Dependence
Dependece between memory access instructions.
Definition: LoopAccessAnalysis.h:108
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::PredicatedScalarEvolution::setNoOverflow
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
Definition: ScalarEvolution.cpp:14634
llvm::MemoryDepChecker::Dependence::Backward
@ Backward
Definition: LoopAccessAnalysis.h:129
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::RuntimePointerChecking::PointerInfo::DependencySetId
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
Definition: LoopAccessAnalysis.h:402
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::RuntimePointerChecking::CheckingGroups
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
Definition: LoopAccessAnalysis.h:486
findForkedPointer
static SmallVector< PointerIntPair< const SCEV *, 1, bool > > findForkedPointer(PredicatedScalarEvolution &PSE, const ValueToValueMap &StridesMap, Value *Ptr, const Loop *L)
Definition: LoopAccessAnalysis.cpp:953
llvm::getPtrStride
Optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1370
laa_name
static const char laa_name[]
Definition: LoopAccessAnalysis.cpp:2714
MemoryLocation.h
llvm::DenseMap< const Value *, Value * >
llvm::SCEV::FlagNSW
@ FlagNSW
Definition: ScalarEvolution.h:134
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
llvm::RuntimeCheckingPtrGroup
A grouping of pointers.
Definition: LoopAccessAnalysis.h:337
llvm::ScalarEvolution::getPtrToIntExpr
const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)
Definition: ScalarEvolution.cpp:1204
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:562
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:39
llvm::SCEVWrapPredicate::IncrementNUSW
@ IncrementNUSW
Definition: ScalarEvolution.h:344
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
visitPointers
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
Definition: LoopAccessAnalysis.cpp:774
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:14580
llvm::MemoryDepChecker::isSafeForVectorization
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
Definition: LoopAccessAnalysis.h:191
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::VectorizerParams::RuntimeMemoryCheckThreshold
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
Definition: LoopAccessAnalysis.h:50
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
iterator_range.h
LAA_NAME
#define LAA_NAME
Definition: LoopAccessAnalysis.cpp:2715
llvm::ScalarEvolution::getNoopOrSignExtend
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4637
llvm::RuntimeCheckingPtrGroup::High
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
Definition: LoopAccessAnalysis.h:353
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::RuntimeCheckingPtrGroup::Low
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
Definition: LoopAccessAnalysis.h:356
Status
Definition: SIModeRegister.cpp:29
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:213
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::ArrayRef< unsigned >
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:150
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
DataLayout.h
EnableForwardingConflictDetection
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:174
llvm::MemoryDepChecker::MemAccessInfo
PointerIntPair< Value *, 1, bool > MemAccessInfo
Definition: LoopAccessAnalysis.h:89
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:472
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1909
llvm::cl::Sink
@ Sink
Definition: CommandLine.h:167
llvm::ScalarEvolution::getStoreSizeOfExpr
const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
Definition: ScalarEvolution.cpp:4279
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
MemoryCheckMergeThreshold
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
llvm::RuntimeCheckingPtrGroup::addPointer
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
Definition: LoopAccessAnalysis.cpp:381
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopAccessAnalysis.cpp:72
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ScalarEvolution::getUMinExpr
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Definition: ScalarEvolution.cpp:4246
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:780
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
ValueHandle.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:139
llvm::RuntimePointerChecking::RuntimeCheckingPtrGroup
friend struct RuntimeCheckingPtrGroup
Definition: LoopAccessAnalysis.h:386
llvm::LoopAccessLegacyAnalysis::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LoopAccessAnalysis.cpp:2696
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:724
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:645
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13662
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1761
RuntimeMemoryCheckThreshold
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
llvm::ScalarEvolution::isKnownPositive
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Definition: ScalarEvolution.cpp:10696
llvm::MemoryDepChecker::Dependence::Unknown
@ Unknown
Definition: LoopAccessAnalysis.h:114
llvm::createLAAPass
Pass * createLAAPass()
Definition: LoopAccessAnalysis.cpp:2728
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::RuntimePointerChecking::Pointers
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
Definition: LoopAccessAnalysis.h:483
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
llvm::RuntimePointerChecking::arePointersInSamePartition
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition: LoopAccessAnalysis.cpp:546
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis
LoopAccessLegacyAnalysis()
Definition: LoopAccessAnalysis.cpp:2681
std
Definition: BitVector.h:851
hasComputableBounds
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr, const SCEV *PtrScev, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
Definition: LoopAccessAnalysis.cpp:742
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4550
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::LocationSize::beforeOrAfterPointer
constexpr static LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Definition: MemoryLocation.h:131
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
VectorizationFactor
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
llvm::EquivalenceClasses::findValue
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Definition: EquivalenceClasses.h:184
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1002
llvm::RuntimePointerChecking::printChecks
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition: LoopAccessAnalysis.cpp:572
llvm::TypeSize
Definition: TypeSize.h:435
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopAccessInfo::LoopAccessInfo
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI)
Definition: LoopAccessAnalysis.cpp:2616
llvm::MemoryDepChecker::Dependence::DepName
static const char * DepName[]
String version of the types.
Definition: LoopAccessAnalysis.h:138
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::RuntimePointerChecking::PointerInfo::IsWritePtr
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Definition: LoopAccessAnalysis.h:399
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4328
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
EquivalenceClasses.h
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:14609
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:267
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::AAMDNodes::TBAA
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:669
llvm::MemoryDepChecker::Dependence::Forward
@ Forward
Definition: LoopAccessAnalysis.h:124
areStridedAccessesIndependent
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
Definition: LoopAccessAnalysis.cpp:1797
findForkedSCEVs
static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool >> &ScevList, unsigned Depth)
Definition: LoopAccessAnalysis.cpp:814
llvm::APInt::abs
APInt abs() const
Get the absolute value.
Definition: APInt.h:1706
ScalarEvolutionExpressions.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:271
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1605
Instructions.h
llvm::PointerIntPair< Value *, 1, bool >
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
llvm::RuntimePointerChecking::insert
void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
Definition: LoopAccessAnalysis.cpp:200
SmallVector.h
llvm::raw_ostream::indent
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Definition: raw_ostream.cpp:494
Dominators.h
llvm::MemoryDepChecker::Dependence::Type
DepType Type
The type of the dependence.
Definition: LoopAccessAnalysis.h:145
N
#define N
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:929
llvm::LoopAccessInfo::print
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
Definition: LoopAccessAnalysis.cpp:2628
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl< int >
llvm::MemoryDepChecker::Dependence::isSafeForVectorization
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Definition: LoopAccessAnalysis.cpp:1623
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:238
llvm::AnalysisUsage::addRequiredTransitive
AnalysisUsage & addRequiredTransitive()
Definition: PassAnalysisSupport.h:81
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
DerivedTypes.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:403
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5358
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2493
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:310
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
AliasSetTracker.h
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::PredicatedScalarEvolution::print
void print(raw_ostream &OS, unsigned Depth) const
Print the SCEV mappings done by the Predicated Scalar Evolution.
Definition: ScalarEvolution.cpp:14690
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::SI::KernelInputOffsets::Offsets
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1314
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2118
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2254
llvm::EquivalenceClasses::member_iterator
Definition: EquivalenceClasses.h:272
llvm::LoopAccessInfoManager::getInfo
const LoopAccessInfo & getInfo(Loop &L)
Definition: LoopAccessAnalysis.cpp:2671
llvm::LoopAccessAnalysis::run
Result run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopAccessAnalysis.cpp:2705
Value.h
llvm::RuntimePointerChecking::print
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
Definition: LoopAccessAnalysis.cpp:591
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1297
llvm::RuntimeCheckingPtrGroup::Members
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
Definition: LoopAccessAnalysis.h:358
llvm::Optional::value_or
constexpr T value_or(U &&alt) const &
Definition: Optional.h:289
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:450
llvm::MemoryDepChecker::Dependence::ForwardButPreventsForwarding
@ ForwardButPreventsForwarding
Definition: LoopAccessAnalysis.h:127
llvm::RuntimeCheckingPtrGroup::NeedsFreeze
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
Definition: LoopAccessAnalysis.h:363
llvm::RuntimePointerChecking::generateChecks
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
Definition: LoopAccessAnalysis.cpp:351
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:211
llvm::MemoryDepChecker::Dependence::getDestination
Instruction * getDestination(const LoopAccessInfo &LAI) const
Return the destination instruction of the dependence.
Definition: LoopAccessAnalysis.h:844
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
llvm::LoopAccessLegacyAnalysis::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: LoopAccessAnalysis.cpp:2685
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1053
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::MemoryDepChecker::VectorizationSafetyStatus::PossiblySafeWithRtChecks
@ PossiblySafeWithRtChecks