LLVM 23.0.0git
LoopLoadElimination.cpp
Go to the documentation of this file.
1//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implement a loop-aware load elimination pass.
10//
11// It uses LoopAccessAnalysis to identify loop-carried dependences with a
12// distance of one between stores and loads. These form the candidates for the
13// transformation. The source value of each store then propagated to the user
14// of the corresponding load. This makes the load dead.
15//
16// The pass can also version the loop and add memchecks in order to prove that
17// may-aliasing stores can't change the value in memory before it's read by the
18// load.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/Statistic.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/Dominators.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/Type.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
56#include <algorithm>
57#include <cassert>
58#include <forward_list>
59#include <tuple>
60#include <utility>
61
62using namespace llvm;
63
64#define LLE_OPTION "loop-load-elim"
65#define DEBUG_TYPE LLE_OPTION
66
68 "runtime-check-per-loop-load-elim", cl::Hidden,
69 cl::desc("Max number of memchecks allowed per eliminated load on average"),
70 cl::init(1));
71
73 "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
74 cl::desc("The maximum number of SCEV checks allowed for Loop "
75 "Load Elimination"));
76
77STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
78
79namespace {
80
81/// Represent a store-to-forwarding candidate.
82struct StoreToLoadForwardingCandidate {
83 LoadInst *Load;
84 StoreInst *Store;
85
86 StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
87 : Load(Load), Store(Store) {}
88
89 /// Return true if the dependence from the store to the load has an
90 /// absolute distance of one.
91 /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
92 bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE, Loop *L,
93 const DominatorTree &DT) const {
94 Value *LoadPtr = Load->getPointerOperand();
95 Value *StorePtr = Store->getPointerOperand();
96 Type *LoadType = getLoadStoreType(Load);
97 auto &DL = Load->getDataLayout();
98
100 StorePtr->getType()->getPointerAddressSpace() &&
101 DL.getTypeSizeInBits(LoadType) ==
102 DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
103 "Should be a known dependence");
104
105 int64_t StrideLoad =
106 getPtrStride(PSE, LoadType, LoadPtr, L, DT).value_or(0);
107 int64_t StrideStore =
108 getPtrStride(PSE, LoadType, StorePtr, L, DT).value_or(0);
109 if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
110 return false;
111
112 // TODO: This check for stride values other than 1 and -1 can be eliminated.
113 // However, doing so may cause the LoopAccessAnalysis to overcompensate,
114 // generating numerous non-wrap runtime checks that may undermine the
115 // benefits of load elimination. To safely implement support for non-unit
116 // strides, we would need to ensure either that the processed case does not
117 // require these additional checks, or improve the LAA to handle them more
118 // efficiently, or potentially both.
119 if (std::abs(StrideLoad) != 1)
120 return false;
121
122 unsigned TypeByteSize = DL.getTypeAllocSize(LoadType);
123
124 auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
125 auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
126
127 // We don't need to check non-wrapping here because forward/backward
128 // dependence wouldn't be valid if these weren't monotonic accesses.
129 auto *Dist = dyn_cast<SCEVConstant>(
130 PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
131 if (!Dist)
132 return false;
133 const APInt &Val = Dist->getAPInt();
134 return Val == TypeByteSize * StrideLoad;
135 }
136
137 Value *getLoadPtr() const { return Load->getPointerOperand(); }
138
139#ifndef NDEBUG
140 friend raw_ostream &operator<<(raw_ostream &OS,
141 const StoreToLoadForwardingCandidate &Cand) {
142 OS << *Cand.Store << " -->\n";
143 OS.indent(2) << *Cand.Load << "\n";
144 return OS;
145 }
146#endif
147};
148
149} // end anonymous namespace
150
151/// Check if the store dominates all latches, so as long as there is no
152/// intervening store this value will be loaded in the next iteration.
153static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
154 DominatorTree *DT) {
156 L->getLoopLatches(Latches);
157 return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
158 return DT->dominates(StoreBlock, Latch);
159 });
160}
161
162/// Return true if the load is not executed on all paths in the loop.
163static bool isLoadConditional(LoadInst *Load, Loop *L) {
164 return Load->getParent() != L->getHeader();
165}
166
167namespace {
168
169/// The per-loop class that does most of the work.
170class LoadEliminationForLoop {
171public:
172 LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
173 DominatorTree *DT, BlockFrequencyInfo *BFI,
174 ProfileSummaryInfo* PSI)
175 : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
176
177 /// Look through the loop-carried and loop-independent dependences in
178 /// this loop and find store->load dependences.
179 ///
180 /// Note that no candidate is returned if LAA has failed to analyze the loop
181 /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
182 std::forward_list<StoreToLoadForwardingCandidate>
183 findStoreToLoadDependences(const LoopAccessInfo &LAI) {
184 std::forward_list<StoreToLoadForwardingCandidate> Candidates;
185
186 const auto &DepChecker = LAI.getDepChecker();
187 const auto *Deps = DepChecker.getDependences();
188 if (!Deps)
189 return Candidates;
190
191 // Find store->load dependences (consequently true dep). Both lexically
192 // forward and backward dependences qualify. Disqualify loads that have
193 // other unknown dependences.
194
195 SmallPtrSet<Instruction *, 4> LoadsWithUnknownDependence;
196
197 for (const auto &Dep : *Deps) {
198 Instruction *Source = Dep.getSource(DepChecker);
199 Instruction *Destination = Dep.getDestination(DepChecker);
200
204 if (isa<LoadInst>(Source))
205 LoadsWithUnknownDependence.insert(Source);
206 if (isa<LoadInst>(Destination))
207 LoadsWithUnknownDependence.insert(Destination);
208 continue;
209 }
210
211 if (Dep.isBackward())
212 // Note that the designations source and destination follow the program
213 // order, i.e. source is always first. (The direction is given by the
214 // DepType.)
215 std::swap(Source, Destination);
216 else
217 assert(Dep.isForward() && "Needs to be a forward dependence");
218
219 auto *Store = dyn_cast<StoreInst>(Source);
220 if (!Store)
221 continue;
222 auto *Load = dyn_cast<LoadInst>(Destination);
223 if (!Load)
224 continue;
225
226 // Only propagate if the stored values are bit/pointer castable.
229 Store->getDataLayout()))
230 continue;
231
232 Candidates.emplace_front(Load, Store);
233 }
234
235 if (!LoadsWithUnknownDependence.empty())
236 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
237 return LoadsWithUnknownDependence.count(C.Load);
238 });
239
240 return Candidates;
241 }
242
243 /// Return the index of the instruction according to program order.
244 unsigned getInstrIndex(Instruction *Inst) {
245 auto I = InstOrder.find(Inst);
246 assert(I != InstOrder.end() && "No index for instruction");
247 return I->second;
248 }
249
250 /// If a load has multiple candidates associated (i.e. different
251 /// stores), it means that it could be forwarding from multiple stores
252 /// depending on control flow. Remove these candidates.
253 ///
254 /// Here, we rely on LAA to include the relevant loop-independent dependences.
255 /// LAA is known to omit these in the very simple case when the read and the
256 /// write within an alias set always takes place using the *same* pointer.
257 ///
258 /// However, we know that this is not the case here, i.e. we can rely on LAA
259 /// to provide us with loop-independent dependences for the cases we're
260 /// interested. Consider the case for example where a loop-independent
261 /// dependece S1->S2 invalidates the forwarding S3->S2.
262 ///
263 /// A[i] = ... (S1)
264 /// ... = A[i] (S2)
265 /// A[i+1] = ... (S3)
266 ///
267 /// LAA will perform dependence analysis here because there are two
268 /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
269 void removeDependencesFromMultipleStores(
270 std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
271 // If Store is nullptr it means that we have multiple stores forwarding to
272 // this store.
273 using LoadToSingleCandT =
274 DenseMap<LoadInst *, const StoreToLoadForwardingCandidate *>;
275 LoadToSingleCandT LoadToSingleCand;
276
277 for (const auto &Cand : Candidates) {
278 bool NewElt;
279 LoadToSingleCandT::iterator Iter;
280
281 std::tie(Iter, NewElt) =
282 LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
283 if (!NewElt) {
284 const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
285 // Already multiple stores forward to this load.
286 if (OtherCand == nullptr)
287 continue;
288
289 // Handle the very basic case when the two stores are in the same block
290 // so deciding which one forwards is easy. The later one forwards as
291 // long as they both have a dependence distance of one to the load.
292 if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
293 Cand.isDependenceDistanceOfOne(PSE, L, *DT) &&
294 OtherCand->isDependenceDistanceOfOne(PSE, L, *DT)) {
295 // They are in the same block, the later one will forward to the load.
296 if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
297 OtherCand = &Cand;
298 } else
299 OtherCand = nullptr;
300 }
301 }
302
303 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
304 if (LoadToSingleCand[Cand.Load] != &Cand) {
306 dbgs() << "Removing from candidates: \n"
307 << Cand
308 << " The load may have multiple stores forwarding to "
309 << "it\n");
310 return true;
311 }
312 return false;
313 });
314 }
315
316 /// Given two pointers operations by their RuntimePointerChecking
317 /// indices, return true if they require an alias check.
318 ///
319 /// We need a check if one is a pointer for a candidate load and the other is
320 /// a pointer for a possibly intervening store.
321 bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
322 const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
323 const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
324 Value *Ptr1 =
325 LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
326 Value *Ptr2 =
327 LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue;
328 return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
329 (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
330 }
331
332 /// Return pointers that are possibly written to on the path from a
333 /// forwarding store to a load.
334 ///
335 /// These pointers need to be alias-checked against the forwarding candidates.
336 SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
337 const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
338 // From FirstStore to LastLoad neither of the elimination candidate loads
339 // should overlap with any of the stores.
340 //
341 // E.g.:
342 //
343 // st1 C[i]
344 // ld1 B[i] <-------,
345 // ld0 A[i] <----, | * LastLoad
346 // ... | |
347 // st2 E[i] | |
348 // st3 B[i+1] -- | -' * FirstStore
349 // st0 A[i+1] ---'
350 // st4 D[i]
351 //
352 // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
353 // ld0.
354
355 LoadInst *LastLoad =
356 llvm::max_element(Candidates,
357 [&](const StoreToLoadForwardingCandidate &A,
358 const StoreToLoadForwardingCandidate &B) {
359 return getInstrIndex(A.Load) <
360 getInstrIndex(B.Load);
361 })
362 ->Load;
363 StoreInst *FirstStore =
364 llvm::min_element(Candidates,
365 [&](const StoreToLoadForwardingCandidate &A,
366 const StoreToLoadForwardingCandidate &B) {
367 return getInstrIndex(A.Store) <
368 getInstrIndex(B.Store);
369 })
370 ->Store;
371
372 // We're looking for stores after the first forwarding store until the end
373 // of the loop, then from the beginning of the loop until the last
374 // forwarded-to load. Collect the pointer for the stores.
375 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
376
377 auto InsertStorePtr = [&](Instruction *I) {
378 if (auto *S = dyn_cast<StoreInst>(I))
379 PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
380 };
381 const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
382 std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
383 MemInstrs.end(), InsertStorePtr);
384 std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
385 InsertStorePtr);
386
387 return PtrsWrittenOnFwdingPath;
388 }
389
390 /// Determine the pointer alias checks to prove that there are no
391 /// intervening stores.
392 SmallVector<RuntimePointerCheck, 4> collectMemchecks(
393 const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
394
395 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
396 findPointersWrittenOnForwardingPath(Candidates);
397
398 // Collect the pointers of the candidate loads.
399 SmallPtrSet<Value *, 4> CandLoadPtrs;
400 for (const auto &Candidate : Candidates)
401 CandLoadPtrs.insert(Candidate.getLoadPtr());
402
403 const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
404 SmallVector<RuntimePointerCheck, 4> Checks;
405
406 copy_if(AllChecks, std::back_inserter(Checks),
407 [&](const RuntimePointerCheck &Check) {
408 for (auto PtrIdx1 : Check.first->Members)
409 for (auto PtrIdx2 : Check.second->Members)
410 if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
411 CandLoadPtrs))
412 return true;
413 return false;
414 });
415
416 LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
417 << "):\n");
418 LLVM_DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
419
420 return Checks;
421 }
422
423 /// Perform the transformation for a candidate.
424 void
425 propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
426 SCEVExpander &SEE) {
427 // loop:
428 // %x = load %gep_i
429 // = ... %x
430 // store %y, %gep_i_plus_1
431 //
432 // =>
433 //
434 // ph:
435 // %x.initial = load %gep_0
436 // loop:
437 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
438 // %x = load %gep_i <---- now dead
439 // = ... %x.storeforward
440 // store %y, %gep_i_plus_1
441
442 Value *Ptr = Cand.Load->getPointerOperand();
443 auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
444 auto *PH = L->getLoopPreheader();
445 assert(PH && "Preheader should exist!");
446 Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
447 PH->getTerminator());
449 new LoadInst(Cand.Load->getType(), InitialPtr, "load_initial",
450 /* isVolatile */ false, Cand.Load->getAlign(),
451 PH->getTerminator()->getIterator());
452 // We don't give any debug location to Initial, because it is inserted
453 // into the loop's preheader. A debug location inside the loop will cause
454 // a misleading stepping when debugging. The test update-debugloc-store
455 // -forwarded.ll checks this.
456 Initial->setDebugLoc(DebugLoc::getDropped());
457
458 PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded");
459 PHI->insertBefore(L->getHeader()->begin());
460 PHI->addIncoming(Initial, PH);
461
462 Type *LoadType = Initial->getType();
463 Type *StoreType = Cand.Store->getValueOperand()->getType();
464 auto &DL = Cand.Load->getDataLayout();
465 (void)DL;
466
467 assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
468 "The type sizes should match!");
469
470 Value *StoreValue = Cand.Store->getValueOperand();
471 if (LoadType != StoreType) {
472 StoreValue = CastInst::CreateBitOrPointerCast(StoreValue, LoadType,
473 "store_forward_cast",
474 Cand.Store->getIterator());
475 // Because it casts the old `load` value and is used by the new `phi`
476 // which replaces the old `load`, we give the `load`'s debug location
477 // to it.
478 cast<Instruction>(StoreValue)->setDebugLoc(Cand.Load->getDebugLoc());
479 }
480
481 PHI->addIncoming(StoreValue, L->getLoopLatch());
482
483 Cand.Load->replaceAllUsesWith(PHI);
484 PHI->setDebugLoc(Cand.Load->getDebugLoc());
485 }
486
487 /// Top-level driver for each loop: find store->load forwarding
488 /// candidates, add run-time checks and perform transformation.
489 bool processLoop() {
490 LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
491 << "\" checking " << *L << "\n");
492
493 // Look for store-to-load forwarding cases across the
494 // backedge. E.g.:
495 //
496 // loop:
497 // %x = load %gep_i
498 // = ... %x
499 // store %y, %gep_i_plus_1
500 //
501 // =>
502 //
503 // ph:
504 // %x.initial = load %gep_0
505 // loop:
506 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
507 // %x = load %gep_i <---- now dead
508 // = ... %x.storeforward
509 // store %y, %gep_i_plus_1
510
511 // First start with store->load dependences.
512 auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
513 if (StoreToLoadDependences.empty())
514 return false;
515
516 // Generate an index for each load and store according to the original
517 // program order. This will be used later.
518 InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
519
520 // To keep things simple for now, remove those where the load is potentially
521 // fed by multiple stores.
522 removeDependencesFromMultipleStores(StoreToLoadDependences);
523 if (StoreToLoadDependences.empty())
524 return false;
525
526 // Filter the candidates further.
528 for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
529 LLVM_DEBUG(dbgs() << "Candidate " << Cand);
530
531 // Make sure that the stored values is available everywhere in the loop in
532 // the next iteration.
533 if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
534 continue;
535
536 // If the load is conditional we can't hoist its 0-iteration instance to
537 // the preheader because that would make it unconditional. Thus we would
538 // access a memory location that the original loop did not access.
539 if (isLoadConditional(Cand.Load, L))
540 continue;
541
542 // Check whether the SCEV difference is the same as the induction step,
543 // thus we load the value in the next iteration.
544 if (!Cand.isDependenceDistanceOfOne(PSE, L, *DT))
545 continue;
546
547 assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
548 "Loading from something other than indvar?");
549 assert(
550 isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
551 "Storing to something other than indvar?");
552
553 Candidates.push_back(Cand);
555 dbgs()
556 << Candidates.size()
557 << ". Valid store-to-load forwarding across the loop backedge\n");
558 }
559 if (Candidates.empty())
560 return false;
561
562 // Check intervening may-alias stores. These need runtime checks for alias
563 // disambiguation.
564 SmallVector<RuntimePointerCheck, 4> Checks = collectMemchecks(Candidates);
565
566 // Too many checks are likely to outweigh the benefits of forwarding.
567 if (Checks.size() > Candidates.size() * CheckPerElim) {
568 LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
569 return false;
570 }
571
572 if (LAI.getPSE().getPredicate().getComplexity() >
574 LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
575 return false;
576 }
577
578 if (!L->isLoopSimplifyForm()) {
579 LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
580 return false;
581 }
582
583 if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) {
584 if (LAI.hasConvergentOp()) {
585 LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
586 "convergent calls\n");
587 return false;
588 }
589
590 auto *HeaderBB = L->getHeader();
591 if (llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
592 PGSOQueryType::IRPass)) {
594 dbgs() << "Versioning is needed but not allowed when optimizing "
595 "for size.\n");
596 return false;
597 }
598
599 // Point of no-return, start the transformation. First, version the loop
600 // if necessary.
601
602 LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
603 LV.versionLoop();
604
605 // After versioning, some of the candidates' pointers could stop being
606 // SCEVAddRecs. We need to filter them out.
607 auto NoLongerGoodCandidate = [this](
608 const StoreToLoadForwardingCandidate &Cand) {
609 return !isa<SCEVAddRecExpr>(
610 PSE.getSCEV(Cand.Load->getPointerOperand())) ||
612 PSE.getSCEV(Cand.Store->getPointerOperand()));
613 };
614 llvm::erase_if(Candidates, NoLongerGoodCandidate);
615 }
616
617 // Next, propagate the value stored by the store to the users of the load.
618 // Also for the first iteration, generate the initial value of the load.
619 SCEVExpander SEE(*PSE.getSE(), "storeforward");
620 for (const auto &Cand : Candidates)
621 propagateStoredValueToLoadUsers(Cand, SEE);
622 NumLoopLoadEliminted += Candidates.size();
623
624 return true;
625 }
626
627private:
628 Loop *L;
629
630 /// Maps the load/store instructions to their index according to
631 /// program order.
632 DenseMap<Instruction *, unsigned> InstOrder;
633
634 // Analyses used.
635 LoopInfo *LI;
636 const LoopAccessInfo &LAI;
637 DominatorTree *DT;
638 BlockFrequencyInfo *BFI;
639 ProfileSummaryInfo *PSI;
640 PredicatedScalarEvolution PSE;
641};
642
643} // end anonymous namespace
644
646 DominatorTree &DT,
650 LoopAccessInfoManager &LAIs) {
651 // Build up a worklist of inner-loops to transform to avoid iterator
652 // invalidation.
653 // FIXME: This logic comes from other passes that actually change the loop
654 // nest structure. It isn't clear this is necessary (or useful) for a pass
655 // which merely optimizes the use of loads in a loop.
656 SmallVector<Loop *, 8> Worklist;
657
658 bool Changed = false;
659
660 for (Loop *TopLevelLoop : LI)
661 for (Loop *L : depth_first(TopLevelLoop)) {
662 Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
663 // We only handle inner-most loops.
664 if (L->isInnermost())
665 Worklist.push_back(L);
666 }
667
668 // Now walk the identified inner loops.
669 for (Loop *L : Worklist) {
670 // Match historical behavior
671 if (!L->isRotatedForm() || !L->getExitingBlock())
672 continue;
673 // The actual work is performed by LoadEliminationForLoop.
674 LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(*L), &DT, BFI, PSI);
675 Changed |= LEL.processLoop();
676 if (Changed)
677 LAIs.clear();
678 }
679 return Changed;
680}
681
684 auto &LI = AM.getResult<LoopAnalysis>(F);
685 // There are no loops in the function. Return before computing other expensive
686 // analyses.
687 if (LI.empty())
688 return PreservedAnalyses::all();
689 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
690 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
691 auto &AC = AM.getResult<AssumptionAnalysis>(F);
692 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
693 auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
694 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
695 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
697
698 bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, &SE, &AC, LAIs);
699
700 if (!Changed)
701 return PreservedAnalyses::all();
702
706 return PA;
707}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, ScalarEvolution *SE, AssumptionCache *AC, LoopAccessInfoManager &LAIs)
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
This header defines the LoopLoadEliminationPass object.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static DebugLoc getDropped()
Definition DebugLoc.h:163
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2078
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1791
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2088
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define SEE(c)
Definition regcomp.c:248
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)