LLVM  10.0.0svn
LoopLoadElimination.cpp
Go to the documentation of this file.
1 //===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implement a loop-aware load elimination pass.
10 //
11 // It uses LoopAccessAnalysis to identify loop-carried dependences with a
12 // distance of one between stores and loads. These form the candidates for the
13 // transformation. The source value of each store then propagated to the user
14 // of the corresponding load. This makes the load dead.
15 //
16 // The pass can also version the loop and add memchecks in order to prove that
17 // may-aliasing stores can't change the value in memory before it's read by the
18 // load.
19 //
20 //===----------------------------------------------------------------------===//
21 
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
37 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/Dominators.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/PassManager.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Debug.h"
57 #include "llvm/Transforms/Scalar.h"
58 #include "llvm/Transforms/Utils.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <forward_list>
64 #include <set>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define LLE_OPTION "loop-load-elim"
71 #define DEBUG_TYPE LLE_OPTION
72 
74  "runtime-check-per-loop-load-elim", cl::Hidden,
75  cl::desc("Max number of memchecks allowed per eliminated load on average"),
76  cl::init(1));
77 
79  "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
80  cl::desc("The maximum number of SCEV checks allowed for Loop "
81  "Load Elimination"));
82 
83 STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
84 
85 namespace {
86 
87 /// Represent a store-to-forwarding candidate.
88 struct StoreToLoadForwardingCandidate {
89  LoadInst *Load;
91 
92  StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
93  : Load(Load), Store(Store) {}
94 
95  /// Return true if the dependence from the store to the load has a
96  /// distance of one. E.g. A[i+1] = A[i]
97  bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
98  Loop *L) const {
99  Value *LoadPtr = Load->getPointerOperand();
100  Value *StorePtr = Store->getPointerOperand();
101  Type *LoadPtrType = LoadPtr->getType();
102  Type *LoadType = LoadPtrType->getPointerElementType();
103 
104  assert(LoadPtrType->getPointerAddressSpace() ==
105  StorePtr->getType()->getPointerAddressSpace() &&
106  LoadType == StorePtr->getType()->getPointerElementType() &&
107  "Should be a known dependence");
108 
109  // Currently we only support accesses with unit stride. FIXME: we should be
110  // able to handle non unit stirde as well as long as the stride is equal to
111  // the dependence distance.
112  if (getPtrStride(PSE, LoadPtr, L) != 1 ||
113  getPtrStride(PSE, StorePtr, L) != 1)
114  return false;
115 
116  auto &DL = Load->getParent()->getModule()->getDataLayout();
117  unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
118 
119  auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
120  auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
121 
122  // We don't need to check non-wrapping here because forward/backward
123  // dependence wouldn't be valid if these weren't monotonic accesses.
124  auto *Dist = cast<SCEVConstant>(
125  PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
126  const APInt &Val = Dist->getAPInt();
127  return Val == TypeByteSize;
128  }
129 
130  Value *getLoadPtr() const { return Load->getPointerOperand(); }
131 
132 #ifndef NDEBUG
133  friend raw_ostream &operator<<(raw_ostream &OS,
134  const StoreToLoadForwardingCandidate &Cand) {
135  OS << *Cand.Store << " -->\n";
136  OS.indent(2) << *Cand.Load << "\n";
137  return OS;
138  }
139 #endif
140 };
141 
142 } // end anonymous namespace
143 
144 /// Check if the store dominates all latches, so as long as there is no
145 /// intervening store this value will be loaded in the next iteration.
146 static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
147  DominatorTree *DT) {
149  L->getLoopLatches(Latches);
150  return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
151  return DT->dominates(StoreBlock, Latch);
152  });
153 }
154 
155 /// Return true if the load is not executed on all paths in the loop.
156 static bool isLoadConditional(LoadInst *Load, Loop *L) {
157  return Load->getParent() != L->getHeader();
158 }
159 
160 namespace {
161 
162 /// The per-loop class that does most of the work.
163 class LoadEliminationForLoop {
164 public:
165  LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
167  ProfileSummaryInfo* PSI)
168  : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
169 
170  /// Look through the loop-carried and loop-independent dependences in
171  /// this loop and find store->load dependences.
172  ///
173  /// Note that no candidate is returned if LAA has failed to analyze the loop
174  /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
175  std::forward_list<StoreToLoadForwardingCandidate>
176  findStoreToLoadDependences(const LoopAccessInfo &LAI) {
177  std::forward_list<StoreToLoadForwardingCandidate> Candidates;
178 
179  const auto *Deps = LAI.getDepChecker().getDependences();
180  if (!Deps)
181  return Candidates;
182 
183  // Find store->load dependences (consequently true dep). Both lexically
184  // forward and backward dependences qualify. Disqualify loads that have
185  // other unknown dependences.
186 
187  SmallPtrSet<Instruction *, 4> LoadsWithUnknownDepedence;
188 
189  for (const auto &Dep : *Deps) {
190  Instruction *Source = Dep.getSource(LAI);
191  Instruction *Destination = Dep.getDestination(LAI);
192 
193  if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
194  if (isa<LoadInst>(Source))
195  LoadsWithUnknownDepedence.insert(Source);
196  if (isa<LoadInst>(Destination))
197  LoadsWithUnknownDepedence.insert(Destination);
198  continue;
199  }
200 
201  if (Dep.isBackward())
202  // Note that the designations source and destination follow the program
203  // order, i.e. source is always first. (The direction is given by the
204  // DepType.)
205  std::swap(Source, Destination);
206  else
207  assert(Dep.isForward() && "Needs to be a forward dependence");
208 
209  auto *Store = dyn_cast<StoreInst>(Source);
210  if (!Store)
211  continue;
212  auto *Load = dyn_cast<LoadInst>(Destination);
213  if (!Load)
214  continue;
215 
216  // Only progagate the value if they are of the same type.
217  if (Store->getPointerOperandType() != Load->getPointerOperandType())
218  continue;
219 
220  Candidates.emplace_front(Load, Store);
221  }
222 
223  if (!LoadsWithUnknownDepedence.empty())
224  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
225  return LoadsWithUnknownDepedence.count(C.Load);
226  });
227 
228  return Candidates;
229  }
230 
231  /// Return the index of the instruction according to program order.
232  unsigned getInstrIndex(Instruction *Inst) {
233  auto I = InstOrder.find(Inst);
234  assert(I != InstOrder.end() && "No index for instruction");
235  return I->second;
236  }
237 
238  /// If a load has multiple candidates associated (i.e. different
239  /// stores), it means that it could be forwarding from multiple stores
240  /// depending on control flow. Remove these candidates.
241  ///
242  /// Here, we rely on LAA to include the relevant loop-independent dependences.
243  /// LAA is known to omit these in the very simple case when the read and the
244  /// write within an alias set always takes place using the *same* pointer.
245  ///
246  /// However, we know that this is not the case here, i.e. we can rely on LAA
247  /// to provide us with loop-independent dependences for the cases we're
248  /// interested. Consider the case for example where a loop-independent
249  /// dependece S1->S2 invalidates the forwarding S3->S2.
250  ///
251  /// A[i] = ... (S1)
252  /// ... = A[i] (S2)
253  /// A[i+1] = ... (S3)
254  ///
255  /// LAA will perform dependence analysis here because there are two
256  /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
257  void removeDependencesFromMultipleStores(
258  std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
259  // If Store is nullptr it means that we have multiple stores forwarding to
260  // this store.
261  using LoadToSingleCandT =
263  LoadToSingleCandT LoadToSingleCand;
264 
265  for (const auto &Cand : Candidates) {
266  bool NewElt;
267  LoadToSingleCandT::iterator Iter;
268 
269  std::tie(Iter, NewElt) =
270  LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
271  if (!NewElt) {
272  const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
273  // Already multiple stores forward to this load.
274  if (OtherCand == nullptr)
275  continue;
276 
277  // Handle the very basic case when the two stores are in the same block
278  // so deciding which one forwards is easy. The later one forwards as
279  // long as they both have a dependence distance of one to the load.
280  if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
281  Cand.isDependenceDistanceOfOne(PSE, L) &&
282  OtherCand->isDependenceDistanceOfOne(PSE, L)) {
283  // They are in the same block, the later one will forward to the load.
284  if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
285  OtherCand = &Cand;
286  } else
287  OtherCand = nullptr;
288  }
289  }
290 
291  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
292  if (LoadToSingleCand[Cand.Load] != &Cand) {
293  LLVM_DEBUG(
294  dbgs() << "Removing from candidates: \n"
295  << Cand
296  << " The load may have multiple stores forwarding to "
297  << "it\n");
298  return true;
299  }
300  return false;
301  });
302  }
303 
304  /// Given two pointers operations by their RuntimePointerChecking
305  /// indices, return true if they require an alias check.
306  ///
307  /// We need a check if one is a pointer for a candidate load and the other is
308  /// a pointer for a possibly intervening store.
309  bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
310  const SmallPtrSet<Value *, 4> &PtrsWrittenOnFwdingPath,
311  const std::set<Value *> &CandLoadPtrs) {
312  Value *Ptr1 =
314  Value *Ptr2 =
316  return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
317  (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
318  }
319 
320  /// Return pointers that are possibly written to on the path from a
321  /// forwarding store to a load.
322  ///
323  /// These pointers need to be alias-checked against the forwarding candidates.
324  SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
326  // From FirstStore to LastLoad neither of the elimination candidate loads
327  // should overlap with any of the stores.
328  //
329  // E.g.:
330  //
331  // st1 C[i]
332  // ld1 B[i] <-------,
333  // ld0 A[i] <----, | * LastLoad
334  // ... | |
335  // st2 E[i] | |
336  // st3 B[i+1] -- | -' * FirstStore
337  // st0 A[i+1] ---'
338  // st4 D[i]
339  //
340  // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
341  // ld0.
342 
343  LoadInst *LastLoad =
344  std::max_element(Candidates.begin(), Candidates.end(),
345  [&](const StoreToLoadForwardingCandidate &A,
346  const StoreToLoadForwardingCandidate &B) {
347  return getInstrIndex(A.Load) < getInstrIndex(B.Load);
348  })
349  ->Load;
350  StoreInst *FirstStore =
351  std::min_element(Candidates.begin(), Candidates.end(),
352  [&](const StoreToLoadForwardingCandidate &A,
353  const StoreToLoadForwardingCandidate &B) {
354  return getInstrIndex(A.Store) <
355  getInstrIndex(B.Store);
356  })
357  ->Store;
358 
359  // We're looking for stores after the first forwarding store until the end
360  // of the loop, then from the beginning of the loop until the last
361  // forwarded-to load. Collect the pointer for the stores.
362  SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
363 
364  auto InsertStorePtr = [&](Instruction *I) {
365  if (auto *S = dyn_cast<StoreInst>(I))
366  PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
367  };
368  const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
369  std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
370  MemInstrs.end(), InsertStorePtr);
371  std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
372  InsertStorePtr);
373 
374  return PtrsWrittenOnFwdingPath;
375  }
376 
377  /// Determine the pointer alias checks to prove that there are no
378  /// intervening stores.
381 
382  SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
383  findPointersWrittenOnForwardingPath(Candidates);
384 
385  // Collect the pointers of the candidate loads.
386  // FIXME: SmallPtrSet does not work with std::inserter.
387  std::set<Value *> CandLoadPtrs;
388  transform(Candidates,
389  std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
390  std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
391 
392  const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
394 
395  copy_if(AllChecks, std::back_inserter(Checks),
397  for (auto PtrIdx1 : Check.first->Members)
398  for (auto PtrIdx2 : Check.second->Members)
399  if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
400  CandLoadPtrs))
401  return true;
402  return false;
403  });
404 
405  LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
406  << "):\n");
408 
409  return Checks;
410  }
411 
412  /// Perform the transformation for a candidate.
413  void
414  propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
415  SCEVExpander &SEE) {
416  // loop:
417  // %x = load %gep_i
418  // = ... %x
419  // store %y, %gep_i_plus_1
420  //
421  // =>
422  //
423  // ph:
424  // %x.initial = load %gep_0
425  // loop:
426  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
427  // %x = load %gep_i <---- now dead
428  // = ... %x.storeforward
429  // store %y, %gep_i_plus_1
430 
431  Value *Ptr = Cand.Load->getPointerOperand();
432  auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
433  auto *PH = L->getLoopPreheader();
434  Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
435  PH->getTerminator());
436  Value *Initial = new LoadInst(
437  Cand.Load->getType(), InitialPtr, "load_initial",
438  /* isVolatile */ false, MaybeAlign(Cand.Load->getAlignment()),
439  PH->getTerminator());
440 
441  PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
442  &L->getHeader()->front());
443  PHI->addIncoming(Initial, PH);
444  PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
445 
446  Cand.Load->replaceAllUsesWith(PHI);
447  }
448 
449  /// Top-level driver for each loop: find store->load forwarding
450  /// candidates, add run-time checks and perform transformation.
451  bool processLoop() {
452  LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
453  << "\" checking " << *L << "\n");
454 
455  // Look for store-to-load forwarding cases across the
456  // backedge. E.g.:
457  //
458  // loop:
459  // %x = load %gep_i
460  // = ... %x
461  // store %y, %gep_i_plus_1
462  //
463  // =>
464  //
465  // ph:
466  // %x.initial = load %gep_0
467  // loop:
468  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
469  // %x = load %gep_i <---- now dead
470  // = ... %x.storeforward
471  // store %y, %gep_i_plus_1
472 
473  // First start with store->load dependences.
474  auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
475  if (StoreToLoadDependences.empty())
476  return false;
477 
478  // Generate an index for each load and store according to the original
479  // program order. This will be used later.
480  InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
481 
482  // To keep things simple for now, remove those where the load is potentially
483  // fed by multiple stores.
484  removeDependencesFromMultipleStores(StoreToLoadDependences);
485  if (StoreToLoadDependences.empty())
486  return false;
487 
488  // Filter the candidates further.
490  unsigned NumForwarding = 0;
491  for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
492  LLVM_DEBUG(dbgs() << "Candidate " << Cand);
493 
494  // Make sure that the stored values is available everywhere in the loop in
495  // the next iteration.
496  if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
497  continue;
498 
499  // If the load is conditional we can't hoist its 0-iteration instance to
500  // the preheader because that would make it unconditional. Thus we would
501  // access a memory location that the original loop did not access.
502  if (isLoadConditional(Cand.Load, L))
503  continue;
504 
505  // Check whether the SCEV difference is the same as the induction step,
506  // thus we load the value in the next iteration.
507  if (!Cand.isDependenceDistanceOfOne(PSE, L))
508  continue;
509 
510  ++NumForwarding;
511  LLVM_DEBUG(
512  dbgs()
513  << NumForwarding
514  << ". Valid store-to-load forwarding across the loop backedge\n");
515  Candidates.push_back(Cand);
516  }
517  if (Candidates.empty())
518  return false;
519 
520  // Check intervening may-alias stores. These need runtime checks for alias
521  // disambiguation.
523  collectMemchecks(Candidates);
524 
525  // Too many checks are likely to outweigh the benefits of forwarding.
526  if (Checks.size() > Candidates.size() * CheckPerElim) {
527  LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
528  return false;
529  }
530 
531  if (LAI.getPSE().getUnionPredicate().getComplexity() >
533  LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
534  return false;
535  }
536 
537  if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
538  if (LAI.hasConvergentOp()) {
539  LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
540  "convergent calls\n");
541  return false;
542  }
543 
544  auto *HeaderBB = L->getHeader();
545  auto *F = HeaderBB->getParent();
546  bool OptForSize = F->hasOptSize() ||
547  llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
548  if (OptForSize) {
549  LLVM_DEBUG(
550  dbgs() << "Versioning is needed but not allowed when optimizing "
551  "for size.\n");
552  return false;
553  }
554 
555  if (!L->isLoopSimplifyForm()) {
556  LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
557  return false;
558  }
559 
560  // Point of no-return, start the transformation. First, version the loop
561  // if necessary.
562 
563  LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
564  LV.setAliasChecks(std::move(Checks));
565  LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
566  LV.versionLoop();
567  }
568 
569  // Next, propagate the value stored by the store to the users of the load.
570  // Also for the first iteration, generate the initial value of the load.
572  "storeforward");
573  for (const auto &Cand : Candidates)
574  propagateStoredValueToLoadUsers(Cand, SEE);
575  NumLoopLoadEliminted += NumForwarding;
576 
577  return true;
578  }
579 
580 private:
581  Loop *L;
582 
583  /// Maps the load/store instructions to their index according to
584  /// program order.
586 
587  // Analyses used.
588  LoopInfo *LI;
589  const LoopAccessInfo &LAI;
590  DominatorTree *DT;
592  ProfileSummaryInfo *PSI;
594 };
595 
596 } // end anonymous namespace
597 
598 static bool
601  function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
602  // Build up a worklist of inner-loops to transform to avoid iterator
603  // invalidation.
604  // FIXME: This logic comes from other passes that actually change the loop
605  // nest structure. It isn't clear this is necessary (or useful) for a pass
606  // which merely optimizes the use of loads in a loop.
607  SmallVector<Loop *, 8> Worklist;
608 
609  for (Loop *TopLevelLoop : LI)
610  for (Loop *L : depth_first(TopLevelLoop))
611  // We only handle inner-most loops.
612  if (L->empty())
613  Worklist.push_back(L);
614 
615  // Now walk the identified inner loops.
616  bool Changed = false;
617  for (Loop *L : Worklist) {
618  // The actual work is performed by LoadEliminationForLoop.
619  LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
620  Changed |= LEL.processLoop();
621  }
622  return Changed;
623 }
624 
625 namespace {
626 
627 /// The pass. Most of the work is delegated to the per-loop
628 /// LoadEliminationForLoop class.
629 class LoopLoadElimination : public FunctionPass {
630 public:
631  static char ID;
632 
633  LoopLoadElimination() : FunctionPass(ID) {
635  }
636 
637  bool runOnFunction(Function &F) override {
638  if (skipFunction(F))
639  return false;
640 
641  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
642  auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
643  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
644  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
645  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
646  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
647  nullptr;
648 
649  // Process each loop nest in the function.
651  F, LI, DT, BFI, PSI,
652  [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
653  }
654 
655  void getAnalysisUsage(AnalysisUsage &AU) const override {
666  }
667 };
668 
669 } // end anonymous namespace
670 
672 
673 static const char LLE_name[] = "Loop Load Elimination";
674 
675 INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
680 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
683 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
684 
686  return new LoopLoadElimination();
687 }
688 
691  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
692  auto &LI = AM.getResult<LoopAnalysis>(F);
693  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
694  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
695  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
696  auto &AA = AM.getResult<AAManager>(F);
697  auto &AC = AM.getResult<AssumptionAnalysis>(F);
698  auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
699  auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
700  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
701  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
703  ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
704  : nullptr;
705 
706  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
707  bool Changed = eliminateLoadsAcrossLoops(
708  F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
709  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
710  return LAM.getResult<LoopAccessAnalysis>(L, AR);
711  });
712 
713  if (!Changed)
714  return PreservedAnalyses::all();
715 
717  return PA;
718 }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
uint64_t CallInst * C
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:209
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:777
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1212
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
const RuntimePointerChecking * getRuntimePointerChecking() const
Analysis providing profile information.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:104
Analysis pass providing the TargetTransformInfo.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:635
An instruction for reading from memory.
Definition: Instructions.h:169
bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if function F is suggested to be size-optimized base on the profile. ...
Definition: SizeOpts.cpp:23
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:140
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Type * getPointerElementType() const
Definition: Type.h:381
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:703
static const char LLE_name[]
void getLoopLatches(SmallVectorImpl< BlockT *> &LoopLatches) const
Return all loop latch blocks of this loop.
Definition: LoopInfo.h:314
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1183
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:105
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
This header provides classes for managing per-loop analyses.
void initializeLoopLoadEliminationPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:325
#define LLE_OPTION
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
bool hasProfileSummary()
Returns true if profile summary is available.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
const Instruction & front() const
Definition: BasicBlock.h:285
A manager for alias analyses.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Value * getPointerOperand()
Definition: Instructions.h:289
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
void setAliasChecks(SmallVector< RuntimePointerChecking::PointerCheck, 4 > Checks)
Sets the runtime alias checks for versioning the loop.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
char & LoopSimplifyID
A function analysis which provides an AssumptionCache.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
This header defines the LoopLoadEliminationPass object.
Analysis pass which computes BlockFrequencyInfo.
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1161
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:314
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
FunctionPass * createLoopLoadEliminationPass()
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:926
Drive the analysis of memory accesses in the loop.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
#define SEE(c)
Definition: regcomp.c:254
This class uses information about analyze scalars to rewrite expressions in canonical form...
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, function_ref< const LoopAccessInfo &(Loop &)> GetLAI)
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:464
This analysis provides dependence information for the memory accesses of a loop.
Type * getPointerOperandType() const
Definition: Instructions.h:292
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2047
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1247
bool empty() const
Definition: LoopInfo.h:151
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1158
This header defines various interfaces for pass management in LLVM.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
Value * getPointerOperand()
Definition: Instructions.h:418
const BasicBlock * getParent() const
Definition: Instruction.h:66
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1045