LLVM  9.0.0svn
LoopLoadElimination.cpp
Go to the documentation of this file.
1 //===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implement a loop-aware load elimination pass.
10 //
11 // It uses LoopAccessAnalysis to identify loop-carried dependences with a
12 // distance of one between stores and loads. These form the candidates for the
13 // transformation. The source value of each store then propagated to the user
14 // of the corresponding load. This makes the load dead.
15 //
16 // The pass can also version the loop and add memchecks in order to prove that
17 // may-aliasing stores can't change the value in memory before it's read by the
18 // load.
19 //
20 //===----------------------------------------------------------------------===//
21 
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
35 #include "llvm/Analysis/LoopInfo.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/PassManager.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Debug.h"
53 #include "llvm/Transforms/Scalar.h"
54 #include "llvm/Transforms/Utils.h"
56 #include <algorithm>
57 #include <cassert>
58 #include <forward_list>
59 #include <set>
60 #include <tuple>
61 #include <utility>
62 
63 using namespace llvm;
64 
65 #define LLE_OPTION "loop-load-elim"
66 #define DEBUG_TYPE LLE_OPTION
67 
69  "runtime-check-per-loop-load-elim", cl::Hidden,
70  cl::desc("Max number of memchecks allowed per eliminated load on average"),
71  cl::init(1));
72 
74  "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
75  cl::desc("The maximum number of SCEV checks allowed for Loop "
76  "Load Elimination"));
77 
78 STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
79 
80 namespace {
81 
82 /// Represent a store-to-forwarding candidate.
83 struct StoreToLoadForwardingCandidate {
84  LoadInst *Load;
86 
87  StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
88  : Load(Load), Store(Store) {}
89 
90  /// Return true if the dependence from the store to the load has a
91  /// distance of one. E.g. A[i+1] = A[i]
92  bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
93  Loop *L) const {
94  Value *LoadPtr = Load->getPointerOperand();
95  Value *StorePtr = Store->getPointerOperand();
96  Type *LoadPtrType = LoadPtr->getType();
97  Type *LoadType = LoadPtrType->getPointerElementType();
98 
99  assert(LoadPtrType->getPointerAddressSpace() ==
100  StorePtr->getType()->getPointerAddressSpace() &&
101  LoadType == StorePtr->getType()->getPointerElementType() &&
102  "Should be a known dependence");
103 
104  // Currently we only support accesses with unit stride. FIXME: we should be
105  // able to handle non unit stirde as well as long as the stride is equal to
106  // the dependence distance.
107  if (getPtrStride(PSE, LoadPtr, L) != 1 ||
108  getPtrStride(PSE, StorePtr, L) != 1)
109  return false;
110 
111  auto &DL = Load->getParent()->getModule()->getDataLayout();
112  unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
113 
114  auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
115  auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
116 
117  // We don't need to check non-wrapping here because forward/backward
118  // dependence wouldn't be valid if these weren't monotonic accesses.
119  auto *Dist = cast<SCEVConstant>(
120  PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
121  const APInt &Val = Dist->getAPInt();
122  return Val == TypeByteSize;
123  }
124 
125  Value *getLoadPtr() const { return Load->getPointerOperand(); }
126 
127 #ifndef NDEBUG
128  friend raw_ostream &operator<<(raw_ostream &OS,
129  const StoreToLoadForwardingCandidate &Cand) {
130  OS << *Cand.Store << " -->\n";
131  OS.indent(2) << *Cand.Load << "\n";
132  return OS;
133  }
134 #endif
135 };
136 
137 } // end anonymous namespace
138 
139 /// Check if the store dominates all latches, so as long as there is no
140 /// intervening store this value will be loaded in the next iteration.
141 static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
142  DominatorTree *DT) {
144  L->getLoopLatches(Latches);
145  return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
146  return DT->dominates(StoreBlock, Latch);
147  });
148 }
149 
150 /// Return true if the load is not executed on all paths in the loop.
151 static bool isLoadConditional(LoadInst *Load, Loop *L) {
152  return Load->getParent() != L->getHeader();
153 }
154 
155 namespace {
156 
157 /// The per-loop class that does most of the work.
158 class LoadEliminationForLoop {
159 public:
160  LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
161  DominatorTree *DT)
162  : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
163 
164  /// Look through the loop-carried and loop-independent dependences in
165  /// this loop and find store->load dependences.
166  ///
167  /// Note that no candidate is returned if LAA has failed to analyze the loop
168  /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
169  std::forward_list<StoreToLoadForwardingCandidate>
170  findStoreToLoadDependences(const LoopAccessInfo &LAI) {
171  std::forward_list<StoreToLoadForwardingCandidate> Candidates;
172 
173  const auto *Deps = LAI.getDepChecker().getDependences();
174  if (!Deps)
175  return Candidates;
176 
177  // Find store->load dependences (consequently true dep). Both lexically
178  // forward and backward dependences qualify. Disqualify loads that have
179  // other unknown dependences.
180 
181  SmallPtrSet<Instruction *, 4> LoadsWithUnknownDepedence;
182 
183  for (const auto &Dep : *Deps) {
184  Instruction *Source = Dep.getSource(LAI);
185  Instruction *Destination = Dep.getDestination(LAI);
186 
187  if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
188  if (isa<LoadInst>(Source))
189  LoadsWithUnknownDepedence.insert(Source);
190  if (isa<LoadInst>(Destination))
191  LoadsWithUnknownDepedence.insert(Destination);
192  continue;
193  }
194 
195  if (Dep.isBackward())
196  // Note that the designations source and destination follow the program
197  // order, i.e. source is always first. (The direction is given by the
198  // DepType.)
199  std::swap(Source, Destination);
200  else
201  assert(Dep.isForward() && "Needs to be a forward dependence");
202 
203  auto *Store = dyn_cast<StoreInst>(Source);
204  if (!Store)
205  continue;
206  auto *Load = dyn_cast<LoadInst>(Destination);
207  if (!Load)
208  continue;
209 
210  // Only progagate the value if they are of the same type.
211  if (Store->getPointerOperandType() != Load->getPointerOperandType())
212  continue;
213 
214  Candidates.emplace_front(Load, Store);
215  }
216 
217  if (!LoadsWithUnknownDepedence.empty())
218  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
219  return LoadsWithUnknownDepedence.count(C.Load);
220  });
221 
222  return Candidates;
223  }
224 
225  /// Return the index of the instruction according to program order.
226  unsigned getInstrIndex(Instruction *Inst) {
227  auto I = InstOrder.find(Inst);
228  assert(I != InstOrder.end() && "No index for instruction");
229  return I->second;
230  }
231 
232  /// If a load has multiple candidates associated (i.e. different
233  /// stores), it means that it could be forwarding from multiple stores
234  /// depending on control flow. Remove these candidates.
235  ///
236  /// Here, we rely on LAA to include the relevant loop-independent dependences.
237  /// LAA is known to omit these in the very simple case when the read and the
238  /// write within an alias set always takes place using the *same* pointer.
239  ///
240  /// However, we know that this is not the case here, i.e. we can rely on LAA
241  /// to provide us with loop-independent dependences for the cases we're
242  /// interested. Consider the case for example where a loop-independent
243  /// dependece S1->S2 invalidates the forwarding S3->S2.
244  ///
245  /// A[i] = ... (S1)
246  /// ... = A[i] (S2)
247  /// A[i+1] = ... (S3)
248  ///
249  /// LAA will perform dependence analysis here because there are two
250  /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
251  void removeDependencesFromMultipleStores(
252  std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
253  // If Store is nullptr it means that we have multiple stores forwarding to
254  // this store.
255  using LoadToSingleCandT =
257  LoadToSingleCandT LoadToSingleCand;
258 
259  for (const auto &Cand : Candidates) {
260  bool NewElt;
261  LoadToSingleCandT::iterator Iter;
262 
263  std::tie(Iter, NewElt) =
264  LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
265  if (!NewElt) {
266  const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
267  // Already multiple stores forward to this load.
268  if (OtherCand == nullptr)
269  continue;
270 
271  // Handle the very basic case when the two stores are in the same block
272  // so deciding which one forwards is easy. The later one forwards as
273  // long as they both have a dependence distance of one to the load.
274  if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
275  Cand.isDependenceDistanceOfOne(PSE, L) &&
276  OtherCand->isDependenceDistanceOfOne(PSE, L)) {
277  // They are in the same block, the later one will forward to the load.
278  if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
279  OtherCand = &Cand;
280  } else
281  OtherCand = nullptr;
282  }
283  }
284 
285  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
286  if (LoadToSingleCand[Cand.Load] != &Cand) {
287  LLVM_DEBUG(
288  dbgs() << "Removing from candidates: \n"
289  << Cand
290  << " The load may have multiple stores forwarding to "
291  << "it\n");
292  return true;
293  }
294  return false;
295  });
296  }
297 
298  /// Given two pointers operations by their RuntimePointerChecking
299  /// indices, return true if they require an alias check.
300  ///
301  /// We need a check if one is a pointer for a candidate load and the other is
302  /// a pointer for a possibly intervening store.
303  bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
304  const SmallPtrSet<Value *, 4> &PtrsWrittenOnFwdingPath,
305  const std::set<Value *> &CandLoadPtrs) {
306  Value *Ptr1 =
308  Value *Ptr2 =
310  return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
311  (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
312  }
313 
314  /// Return pointers that are possibly written to on the path from a
315  /// forwarding store to a load.
316  ///
317  /// These pointers need to be alias-checked against the forwarding candidates.
318  SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
320  // From FirstStore to LastLoad neither of the elimination candidate loads
321  // should overlap with any of the stores.
322  //
323  // E.g.:
324  //
325  // st1 C[i]
326  // ld1 B[i] <-------,
327  // ld0 A[i] <----, | * LastLoad
328  // ... | |
329  // st2 E[i] | |
330  // st3 B[i+1] -- | -' * FirstStore
331  // st0 A[i+1] ---'
332  // st4 D[i]
333  //
334  // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
335  // ld0.
336 
337  LoadInst *LastLoad =
338  std::max_element(Candidates.begin(), Candidates.end(),
339  [&](const StoreToLoadForwardingCandidate &A,
340  const StoreToLoadForwardingCandidate &B) {
341  return getInstrIndex(A.Load) < getInstrIndex(B.Load);
342  })
343  ->Load;
344  StoreInst *FirstStore =
345  std::min_element(Candidates.begin(), Candidates.end(),
346  [&](const StoreToLoadForwardingCandidate &A,
347  const StoreToLoadForwardingCandidate &B) {
348  return getInstrIndex(A.Store) <
349  getInstrIndex(B.Store);
350  })
351  ->Store;
352 
353  // We're looking for stores after the first forwarding store until the end
354  // of the loop, then from the beginning of the loop until the last
355  // forwarded-to load. Collect the pointer for the stores.
356  SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
357 
358  auto InsertStorePtr = [&](Instruction *I) {
359  if (auto *S = dyn_cast<StoreInst>(I))
360  PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
361  };
362  const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
363  std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
364  MemInstrs.end(), InsertStorePtr);
365  std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
366  InsertStorePtr);
367 
368  return PtrsWrittenOnFwdingPath;
369  }
370 
371  /// Determine the pointer alias checks to prove that there are no
372  /// intervening stores.
375 
376  SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
377  findPointersWrittenOnForwardingPath(Candidates);
378 
379  // Collect the pointers of the candidate loads.
380  // FIXME: SmallPtrSet does not work with std::inserter.
381  std::set<Value *> CandLoadPtrs;
382  transform(Candidates,
383  std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
384  std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
385 
386  const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
388 
389  copy_if(AllChecks, std::back_inserter(Checks),
391  for (auto PtrIdx1 : Check.first->Members)
392  for (auto PtrIdx2 : Check.second->Members)
393  if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
394  CandLoadPtrs))
395  return true;
396  return false;
397  });
398 
399  LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
400  << "):\n");
402 
403  return Checks;
404  }
405 
406  /// Perform the transformation for a candidate.
407  void
408  propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
409  SCEVExpander &SEE) {
410  // loop:
411  // %x = load %gep_i
412  // = ... %x
413  // store %y, %gep_i_plus_1
414  //
415  // =>
416  //
417  // ph:
418  // %x.initial = load %gep_0
419  // loop:
420  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
421  // %x = load %gep_i <---- now dead
422  // = ... %x.storeforward
423  // store %y, %gep_i_plus_1
424 
425  Value *Ptr = Cand.Load->getPointerOperand();
426  auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
427  auto *PH = L->getLoopPreheader();
428  Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
429  PH->getTerminator());
430  Value *Initial =
431  new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
432  Cand.Load->getAlignment(), PH->getTerminator());
433 
434  PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
435  &L->getHeader()->front());
436  PHI->addIncoming(Initial, PH);
437  PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
438 
439  Cand.Load->replaceAllUsesWith(PHI);
440  }
441 
442  /// Top-level driver for each loop: find store->load forwarding
443  /// candidates, add run-time checks and perform transformation.
444  bool processLoop() {
445  LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
446  << "\" checking " << *L << "\n");
447 
448  // Look for store-to-load forwarding cases across the
449  // backedge. E.g.:
450  //
451  // loop:
452  // %x = load %gep_i
453  // = ... %x
454  // store %y, %gep_i_plus_1
455  //
456  // =>
457  //
458  // ph:
459  // %x.initial = load %gep_0
460  // loop:
461  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
462  // %x = load %gep_i <---- now dead
463  // = ... %x.storeforward
464  // store %y, %gep_i_plus_1
465 
466  // First start with store->load dependences.
467  auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
468  if (StoreToLoadDependences.empty())
469  return false;
470 
471  // Generate an index for each load and store according to the original
472  // program order. This will be used later.
473  InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
474 
475  // To keep things simple for now, remove those where the load is potentially
476  // fed by multiple stores.
477  removeDependencesFromMultipleStores(StoreToLoadDependences);
478  if (StoreToLoadDependences.empty())
479  return false;
480 
481  // Filter the candidates further.
483  unsigned NumForwarding = 0;
484  for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
485  LLVM_DEBUG(dbgs() << "Candidate " << Cand);
486 
487  // Make sure that the stored values is available everywhere in the loop in
488  // the next iteration.
489  if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
490  continue;
491 
492  // If the load is conditional we can't hoist its 0-iteration instance to
493  // the preheader because that would make it unconditional. Thus we would
494  // access a memory location that the original loop did not access.
495  if (isLoadConditional(Cand.Load, L))
496  continue;
497 
498  // Check whether the SCEV difference is the same as the induction step,
499  // thus we load the value in the next iteration.
500  if (!Cand.isDependenceDistanceOfOne(PSE, L))
501  continue;
502 
503  ++NumForwarding;
504  LLVM_DEBUG(
505  dbgs()
506  << NumForwarding
507  << ". Valid store-to-load forwarding across the loop backedge\n");
508  Candidates.push_back(Cand);
509  }
510  if (Candidates.empty())
511  return false;
512 
513  // Check intervening may-alias stores. These need runtime checks for alias
514  // disambiguation.
516  collectMemchecks(Candidates);
517 
518  // Too many checks are likely to outweigh the benefits of forwarding.
519  if (Checks.size() > Candidates.size() * CheckPerElim) {
520  LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
521  return false;
522  }
523 
524  if (LAI.getPSE().getUnionPredicate().getComplexity() >
526  LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
527  return false;
528  }
529 
530  if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
531  if (L->getHeader()->getParent()->optForSize()) {
532  LLVM_DEBUG(
533  dbgs() << "Versioning is needed but not allowed when optimizing "
534  "for size.\n");
535  return false;
536  }
537 
538  if (!L->isLoopSimplifyForm()) {
539  LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
540  return false;
541  }
542 
543  // Point of no-return, start the transformation. First, version the loop
544  // if necessary.
545 
546  LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
547  LV.setAliasChecks(std::move(Checks));
548  LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
549  LV.versionLoop();
550  }
551 
552  // Next, propagate the value stored by the store to the users of the load.
553  // Also for the first iteration, generate the initial value of the load.
555  "storeforward");
556  for (const auto &Cand : Candidates)
557  propagateStoredValueToLoadUsers(Cand, SEE);
558  NumLoopLoadEliminted += NumForwarding;
559 
560  return true;
561  }
562 
563 private:
564  Loop *L;
565 
566  /// Maps the load/store instructions to their index according to
567  /// program order.
569 
570  // Analyses used.
571  LoopInfo *LI;
572  const LoopAccessInfo &LAI;
573  DominatorTree *DT;
575 };
576 
577 } // end anonymous namespace
578 
579 static bool
581  function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
582  // Build up a worklist of inner-loops to transform to avoid iterator
583  // invalidation.
584  // FIXME: This logic comes from other passes that actually change the loop
585  // nest structure. It isn't clear this is necessary (or useful) for a pass
586  // which merely optimizes the use of loads in a loop.
587  SmallVector<Loop *, 8> Worklist;
588 
589  for (Loop *TopLevelLoop : LI)
590  for (Loop *L : depth_first(TopLevelLoop))
591  // We only handle inner-most loops.
592  if (L->empty())
593  Worklist.push_back(L);
594 
595  // Now walk the identified inner loops.
596  bool Changed = false;
597  for (Loop *L : Worklist) {
598  // The actual work is performed by LoadEliminationForLoop.
599  LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
600  Changed |= LEL.processLoop();
601  }
602  return Changed;
603 }
604 
605 namespace {
606 
607 /// The pass. Most of the work is delegated to the per-loop
608 /// LoadEliminationForLoop class.
609 class LoopLoadElimination : public FunctionPass {
610 public:
611  static char ID;
612 
613  LoopLoadElimination() : FunctionPass(ID) {
615  }
616 
617  bool runOnFunction(Function &F) override {
618  if (skipFunction(F))
619  return false;
620 
621  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
622  auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
623  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
624 
625  // Process each loop nest in the function.
627  F, LI, DT,
628  [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
629  }
630 
631  void getAnalysisUsage(AnalysisUsage &AU) const override {
640  }
641 };
642 
643 } // end anonymous namespace
644 
646 
647 static const char LLE_name[] = "Loop Load Elimination";
648 
649 INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
654 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
655 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
656 
658  return new LoopLoadElimination();
659 }
660 
663  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
664  auto &LI = AM.getResult<LoopAnalysis>(F);
665  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
666  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
667  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
668  auto &AA = AM.getResult<AAManager>(F);
669  auto &AC = AM.getResult<AssumptionAnalysis>(F);
670 
671  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
672  bool Changed = eliminateLoadsAcrossLoops(
673  F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
674  LoopStandardAnalysisResults AR = {AA, AC, DT, LI,
675  SE, TLI, TTI, nullptr};
676  return LAM.getResult<LoopAccessAnalysis>(L, AR);
677  });
678 
679  if (!Changed)
680  return PreservedAnalyses::all();
681 
683  return PA;
684 }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
uint64_t CallInst * C
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, function_ref< const LoopAccessInfo &(Loop &)> GetLAI)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1232
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
const RuntimePointerChecking * getRuntimePointerChecking() const
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
Analysis pass providing the TargetTransformInfo.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:502
An instruction for reading from memory.
Definition: Instructions.h:167
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Type * getPointerElementType() const
Definition: Type.h:375
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:370
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
static const char LLE_name[]
void getLoopLatches(SmallVectorImpl< BlockT *> &LoopLatches) const
Return all loop latch blocks of this loop.
Definition: LoopInfo.h:303
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:944
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:99
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
This header provides classes for managing per-loop analyses.
void initializeLoopLoadEliminationPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:320
#define LLE_OPTION
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This analysis provides dependence information for the memory accesses of a loop.
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:128
const Instruction & front() const
Definition: BasicBlock.h:280
A manager for alias analyses.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:597
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Value * getPointerOperand()
Definition: Instructions.h:284
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
void setAliasChecks(SmallVector< RuntimePointerChecking::PointerCheck, 4 > Checks)
Sets the runtime alias checks for versioning the loop.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
char & LoopSimplifyID
A function analysis which provides an AssumptionCache.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
This header defines the LoopLoadEliminationPass object.
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:298
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:845
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
FunctionPass * createLoopLoadEliminationPass()
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
Drive the analysis of memory accesses in the loop.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
#define SEE(c)
Definition: regcomp.c:253
This class uses information about analyze scalars to rewrite expressions in canonical form...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:435
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:192
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:132
Type * getPointerOperandType() const
Definition: Instructions.h:287
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:213
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2038
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1267
bool empty() const
Definition: LoopInfo.h:145
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
LLVM Value Representation.
Definition: Value.h:72
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:969
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1178
This header defines various interfaces for pass management in LLVM.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
Value * getPointerOperand()
Definition: Instructions.h:412
const BasicBlock * getParent() const
Definition: Instruction.h:66
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1037