LLVM 23.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1//===- LoopPeel.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop Peeling Utilities.
10//===----------------------------------------------------------------------===//
11
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/Analysis/Loads.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/Instruction.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/IR/MDBuilder.h"
37#include "llvm/Support/Debug.h"
45#include <algorithm>
46#include <cassert>
47#include <cstdint>
48#include <optional>
49
50using namespace llvm;
51using namespace llvm::PatternMatch;
52using namespace llvm::SCEVPatternMatch;
53
54#define DEBUG_TYPE "loop-peel"
55
56STATISTIC(NumPeeled, "Number of loops peeled");
57STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
58
59namespace llvm {
61 "unroll-peel-count", cl::Hidden,
62 cl::desc("Set the unroll peeling count, for testing purposes"));
63
64static cl::opt<bool>
65 UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
66 cl::desc("Allows loops to be peeled when the dynamic "
67 "trip count is known to be low."));
68
69static cl::opt<bool>
70 UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
71 cl::init(false), cl::Hidden,
72 cl::desc("Allows loop nests to be peeled."));
73
75 "unroll-peel-max-count", cl::init(7), cl::Hidden,
76 cl::desc("Max average trip count which will cause loop peeling."));
77
79 "unroll-force-peel-count", cl::init(0), cl::Hidden,
80 cl::desc("Force a peel count regardless of profiling information."));
81
83 "disable-advanced-peeling", cl::init(false), cl::Hidden,
85 "Disable advance peeling. Issues for convergent targets (D134803)."));
86
88 "enable-peeling-for-iv", cl::init(false), cl::Hidden,
89 cl::desc("Enable peeling to convert Phi nodes into IVs"));
90
91static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
92
94} // namespace llvm
95
96// Check whether we are capable of peeling this loop.
97bool llvm::canPeel(const Loop *L) {
98 // Make sure the loop is in simplified form
99 if (!L->isLoopSimplifyForm())
100 return false;
102 return true;
103
105 L->getUniqueNonLatchExitBlocks(Exits);
106 // The latch must either be the only exiting block or all non-latch exit
107 // blocks have either a deopt or unreachable terminator or compose a chain of
108 // blocks where the last one is either deopt or unreachable terminated. Both
109 // deopt and unreachable terminators are a strong indication they are not
110 // taken. Note that this is a profitability check, not a legality check. Also
111 // note that LoopPeeling currently can only update the branch weights of latch
112 // blocks and branch weights to blocks with deopt or unreachable do not need
113 // updating.
115}
116
117namespace {
118
119// As a loop is peeled, it may be the case that Phi nodes become
120// loop-invariant (ie, known because there is only one choice).
121// For example, consider the following function:
122// void g(int);
123// void binary() {
124// int x = 0;
125// int y = 0;
126// int a = 0;
127// for(int i = 0; i <100000; ++i) {
128// g(x);
129// x = y;
130// g(a);
131// y = a + 1;
132// a = 5;
133// }
134// }
135// Peeling 3 iterations is beneficial because the values for x, y and a
136// become known. The IR for this loop looks something like the following:
137//
138// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
139// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
140// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
141// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
142// ...
143// tail call void @_Z1gi(i32 signext %x)
144// tail call void @_Z1gi(i32 signext %a)
145// %add = add nuw nsw i32 %a, 1
146// %inc = add nuw nsw i32 %i, 1
147// %exitcond = icmp eq i32 %inc, 100000
148// br i1 %exitcond, label %for.cond.cleanup, label %for.body
149//
150// The arguments for the calls to g will become known after 3 iterations
151// of the loop, because the phi nodes values become known after 3 iterations
152// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
153// The first iteration has g(0), g(0); the second has g(0), g(5); the
154// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
155// Now consider the phi nodes:
156// %a is a phi with constants so it is determined after iteration 1.
157// %y is a phi based on a constant and %a so it is determined on
158// the iteration after %a is determined, so iteration 2.
159// %x is a phi based on a constant and %y so it is determined on
160// the iteration after %y, so iteration 3.
161// %i is based on itself (and is an induction variable) so it is
162// never determined.
163// This means that peeling off 3 iterations will result in being able to
164// remove the phi nodes for %a, %y, and %x. The arguments for the
165// corresponding calls to g are determined and the code for computing
166// x, y, and a can be removed.
167//
168// Similarly, there are cases where peeling makes Phi nodes loop-inductions
169// (i.e., the value is increased or decreased by a fixed amount on every
170// iteration). For example, consider the following function.
171//
172// #define N 100
173// void f(int a[], int b[]) {
174// int im = N - 1;
175// for (int i = 0; i < N; i++) {
176// a[i] = b[i] + b[im];
177// im = i;
178// }
179// }
180//
181// The IR of the loop will look something like the following.
182//
183// %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
184// %im = phi i32 [ 99, %entry ], [ %i, %for.body ]
185// ...
186// %i.next = add nuw nsw i32 %i, 1
187// ...
188//
189// In this case, %im becomes a loop-induction variable by peeling 1 iteration,
190// because %i is a loop-induction one. The peeling count can be determined by
191// the same algorithm with loop-invariant case. Such peeling is profitable for
192// loop-vectorization.
193//
194// The PhiAnalyzer class calculates how many times a loop should be
195// peeled based on the above analysis of the phi nodes in the loop while
196// respecting the maximum specified.
197class PhiAnalyzer {
198public:
199 PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV);
200
201 // Calculate the sufficient minimum number of iterations of the loop to peel
202 // such that phi instructions become determined (subject to allowable limits)
203 std::optional<unsigned> calculateIterationsToPeel();
204
205protected:
206 enum class PeelCounterType {
207 Invariant,
208 Induction,
209 };
210
211 using PeelCounterValue = std::pair<unsigned, PeelCounterType>;
212 using PeelCounter = std::optional<PeelCounterValue>;
213 const PeelCounter Unknown = std::nullopt;
214
215 // Add 1 respecting Unknown and return Unknown if result over MaxIterations
216 PeelCounter addOne(PeelCounter PC) const {
217 if (PC == Unknown)
218 return Unknown;
219 auto [Val, Ty] = *PC;
220 return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown;
221 }
222
223 // Return a value representing zero for the given counter type.
224 PeelCounter makeZero(PeelCounterType Ty) const {
225 return PeelCounter({0, Ty});
226 }
227
228 // Calculate the number of iterations after which the given value becomes an
229 // invariant or an induction.
230 PeelCounter calculate(const Value &);
231
232 // Auxiliary function to calculate the number of iterations for a comparison
233 // instruction or a binary operator.
234 PeelCounter mergeTwoCounters(const Instruction &CmpOrBinaryOp,
235 const PeelCounterValue &LHS,
236 const PeelCounterValue &RHS) const;
237
238 // Returns true if the \p Phi is an induction in the target loop. This is a
239 // lightweight check and possible to detect an IV in some cases.
240 bool isInductionPHI(const PHINode *Phi) const;
241
242 const Loop &L;
243 const unsigned MaxIterations;
244 const bool PeelForIV;
245
246 // Map of Values to number of iterations to invariance or induction
247 SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction;
248};
249
250PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV)
251 : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) {
252 assert(canPeel(&L) && "loop is not suitable for peeling");
253 assert(MaxIterations > 0 && "no peeling is allowed?");
254}
255
256/// Test whether \p Phi is an induction variable. Although this can be
257/// determined using SCEV analysis, it is expensive to compute here. Instead,
258/// we perform cheaper checks that may not detect complex cases but are
259/// sufficient for some situations.
260bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
261 // Currently we only support a loop that has single latch.
262 BasicBlock *Latch = L.getLoopLatch();
263 if (Latch == nullptr)
264 return false;
265
266 Value *Cur = Phi->getIncomingValueForBlock(Latch);
267 SmallPtrSet<Value *, 4> Visited;
268 bool VisitBinOp = false;
269
270 // Starting from the incoming value of the Phi, we follow the use-def chain.
271 // We consider Phi to be an IV if we can reach it again by traversing only
272 // add, sub, or cast instructions.
273 while (true) {
274 if (Cur == Phi)
275 break;
276
277 // Avoid infinite loop.
278 if (!Visited.insert(Cur).second)
279 return false;
280
281 auto *I = dyn_cast<Instruction>(Cur);
282 if (!I || !L.contains(I))
283 return false;
284
285 if (auto *Cast = dyn_cast<CastInst>(I)) {
286 Cur = Cast->getOperand(0);
287 } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
288 if (BinOp->getOpcode() != Instruction::Add &&
289 BinOp->getOpcode() != Instruction::Sub)
290 return false;
291 if (!isa<ConstantInt>(BinOp->getOperand(1)))
292 return false;
293
294 VisitBinOp = true;
295 Cur = BinOp->getOperand(0);
296 } else {
297 return false;
298 }
299 }
300
301 // Ignore cases where no binary operations are visited.
302 return VisitBinOp;
303}
304
305/// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is
306/// considered an IV only if it is an addition or a subtraction. Otherwise the
307/// result can be a value that is neither a loop-invariant nor an IV.
308///
309/// If both \p LHS and \p RHS are loop-invariants, then the result of
310/// \CmpOrBinaryOp is also a loop-invariant.
311PhiAnalyzer::PeelCounter
312PhiAnalyzer::mergeTwoCounters(const Instruction &CmpOrBinaryOp,
313 const PeelCounterValue &LHS,
314 const PeelCounterValue &RHS) const {
315 auto &[LVal, LTy] = LHS;
316 auto &[RVal, RTy] = RHS;
317 unsigned NewVal = std::max(LVal, RVal);
318
319 if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) {
320 if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) {
321 if (BinOp->getOpcode() == Instruction::Add ||
322 BinOp->getOpcode() == Instruction::Sub)
323 return PeelCounter({NewVal, PeelCounterType::Induction});
324 }
325 return Unknown;
326 }
327 return PeelCounter({NewVal, PeelCounterType::Invariant});
328}
329
330// This function calculates the number of iterations after which the value
331// becomes an invariant. The pre-calculated values are memorized in a map.
332// N.B. This number will be Unknown or <= MaxIterations.
333// The function is calculated according to the following definition:
334// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
335// F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
336// G(%y) = 0 if %y is a loop invariant
337// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
338// G(%y) = TODO: if %y is an expression based on phis and loop invariants
339// The example looks like:
340// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
341// %y = phi(0, 5)
342// %a = %y + 1
343// G(%y) = Unknown otherwise (including phi not in header block)
344PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
345 // If we already know the answer, take it from the map.
346 // Otherwise, place Unknown to map to avoid infinite recursion. Such
347 // cycles can never stop on an invariant.
348 auto [I, Inserted] =
349 IterationsToInvarianceOrInduction.try_emplace(&V, Unknown);
350 if (!Inserted)
351 return I->second;
352
353 if (L.isLoopInvariant(&V))
354 // Loop invariant so known at start.
355 return (IterationsToInvarianceOrInduction[&V] =
356 makeZero(PeelCounterType::Invariant));
357 if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
358 if (Phi->getParent() != L.getHeader()) {
359 // Phi is not in header block so Unknown.
360 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
361 "unexpected value saved");
362 return Unknown;
363 }
364
365 // If Phi is an induction, register it as a starting point.
366 if (PeelForIV && isInductionPHI(Phi))
367 return (IterationsToInvarianceOrInduction[&V] =
368 makeZero(PeelCounterType::Induction));
369
370 // We need to analyze the input from the back edge and add 1.
371 Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
372 PeelCounter Iterations = calculate(*Input);
373 assert(IterationsToInvarianceOrInduction[Input] == Iterations &&
374 "unexpected value saved");
375 return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations));
376 }
377 if (const Instruction *I = dyn_cast<Instruction>(&V)) {
378 if (isa<CmpInst>(I) || I->isBinaryOp()) {
379 // Binary instructions get the max of the operands.
380 PeelCounter LHS = calculate(*I->getOperand(0));
381 if (LHS == Unknown)
382 return Unknown;
383 PeelCounter RHS = calculate(*I->getOperand(1));
384 if (RHS == Unknown)
385 return Unknown;
386 return (IterationsToInvarianceOrInduction[I] =
387 mergeTwoCounters(*I, *LHS, *RHS));
388 }
389 if (I->isCast())
390 // Cast instructions get the value of the operand.
391 return (IterationsToInvarianceOrInduction[I] =
392 calculate(*I->getOperand(0)));
393 }
394 // TODO: handle more expressions
395
396 // Everything else is Unknown.
397 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
398 "unexpected value saved");
399 return Unknown;
400}
401
402std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
403 unsigned Iterations = 0;
404 for (auto &PHI : L.getHeader()->phis()) {
405 PeelCounter ToInvarianceOrInduction = calculate(PHI);
406 if (ToInvarianceOrInduction != Unknown) {
407 unsigned Val = ToInvarianceOrInduction->first;
408 assert(Val <= MaxIterations && "bad result in phi analysis");
409 Iterations = std::max(Iterations, Val);
410 if (Iterations == MaxIterations)
411 break;
412 }
413 }
414 assert((Iterations <= MaxIterations) && "bad result in phi analysis");
415 return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
416}
417
418} // unnamed namespace
419
420// Try to find any invariant memory reads that will become dereferenceable in
421// the remainder loop after peeling. The load must also be used (transitively)
422// by an exit condition. Returns the number of iterations to peel off (at the
423// moment either 0 or 1).
425 DominatorTree &DT,
426 AssumptionCache *AC) {
427 // Skip loops with a single exiting block, because there should be no benefit
428 // for the heuristic below.
429 if (L.getExitingBlock())
430 return 0;
431
432 // All non-latch exit blocks must have an UnreachableInst terminator.
433 // Otherwise the heuristic below may not be profitable.
435 L.getUniqueNonLatchExitBlocks(Exits);
436 if (any_of(Exits, [](const BasicBlock *BB) {
437 return !isa<UnreachableInst>(BB->getTerminator());
438 }))
439 return 0;
440
441 // Now look for invariant loads that dominate the latch and are not known to
442 // be dereferenceable. If there are such loads and no writes, they will become
443 // dereferenceable in the loop if the first iteration is peeled off. Also
444 // collect the set of instructions controlled by such loads. Only peel if an
445 // exit condition uses (transitively) such a load.
446 BasicBlock *Header = L.getHeader();
447 BasicBlock *Latch = L.getLoopLatch();
448 SmallPtrSet<Value *, 8> LoadUsers;
449 const DataLayout &DL = L.getHeader()->getDataLayout();
450 for (BasicBlock *BB : L.blocks()) {
451 for (Instruction &I : *BB) {
452 // Calls that only access inaccessible memory can never alias with loads.
453 if (I.mayWriteToMemory() &&
454 !(isa<CallBase>(I) &&
455 cast<CallBase>(I).onlyAccessesInaccessibleMemory()))
456 return 0;
457
458 if (LoadUsers.contains(&I))
459 LoadUsers.insert_range(I.users());
460 // Do not look for reads in the header; they can already be hoisted
461 // without peeling.
462 if (BB == Header)
463 continue;
464 if (auto *LI = dyn_cast<LoadInst>(&I)) {
465 Value *Ptr = LI->getPointerOperand();
466 if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
467 !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
468 LoadUsers.insert_range(I.users());
469 }
470 }
471 }
472 SmallVector<BasicBlock *> ExitingBlocks;
473 L.getExitingBlocks(ExitingBlocks);
474 if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
475 return LoadUsers.contains(Exiting->getTerminator());
476 }))
477 return 1;
478 return 0;
479}
480
482 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
484 return false;
485
486 // Check if the exit condition of the loop can be adjusted by the peeling
487 // codegen. For now, it must
488 // * exit via the latch,
489 // * the exit condition must be a NE/EQ compare of an induction with step
490 // of 1 and must only be used by the exiting branch.
491 BasicBlock *Latch = L.getLoopLatch();
492 Value *Inc;
493 Value *Bound;
494 CmpPredicate Pred;
495 BasicBlock *Succ1;
496 BasicBlock *Succ2;
497 return Latch && Latch == L.getExitingBlock() &&
498 match(Latch->getTerminator(),
499 m_Br(m_OneUse(m_ICmp(Pred, m_Value(Inc), m_Value(Bound))),
500 m_BasicBlock(Succ1), m_BasicBlock(Succ2))) &&
501 ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) ||
502 (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) &&
503 Bound->getType()->isIntegerTy() &&
504 SE.isLoopInvariant(SE.getSCEV(Bound), &L) &&
505 match(SE.getSCEV(Inc),
507}
508
509/// Returns true if the last iteration can be peeled off and the condition (Pred
510/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition
511/// is known at the second-to-last.
513 const SCEVAddRecExpr *LeftAR,
514 const SCEV *RightSCEV, ScalarEvolution &SE,
515 const TargetTransformInfo &TTI) {
516 if (!canPeelLastIteration(L, SE))
517 return false;
518
519 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
520 SCEVExpander Expander(SE, "loop-peel");
521 if (!SE.isKnownNonZero(BTC) &&
523 L.getLoopPredecessor()->getTerminator()))
524 return false;
525
526 auto Guards = ScalarEvolution::LoopGuards::collect(&L, SE);
527 BTC = SE.applyLoopGuards(BTC, Guards);
528 RightSCEV = SE.applyLoopGuards(RightSCEV, Guards);
529 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
530 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
531 SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
532
533 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter,
534 RightSCEV) &&
535 SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV);
536}
537
538// Return the number of iterations to peel off from the beginning and end of the
539// loop respectively, that make conditions in the body true/false. For example,
540// if we peel 2 iterations off the loop below, the condition i < 2 can be
541// evaluated at compile time.
542//
543// for (i = 0; i < n; i++)
544// if (i < 2)
545// ..
546// else
547// ..
548// }
549static std::pair<unsigned, unsigned>
550countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
551 const TargetTransformInfo &TTI) {
552 assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
553 unsigned DesiredPeelCount = 0;
554 unsigned DesiredPeelCountLast = 0;
555
556 // Do not peel the entire loop.
557 const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L);
558 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(BE))
559 MaxPeelCount =
560 std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
561
562 // Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
563 // return true if inversed condition become known before reaching the
564 // MaxPeelCount limit.
565 auto PeelWhilePredicateIsKnown =
566 [&](unsigned &PeelCount, const SCEV *&IterVal, const SCEV *BoundSCEV,
567 const SCEV *Step, ICmpInst::Predicate Pred) {
568 while (PeelCount < MaxPeelCount &&
569 SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
570 IterVal = SE.getAddExpr(IterVal, Step);
571 ++PeelCount;
572 }
573 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
574 BoundSCEV);
575 };
576
577 const unsigned MaxDepth = 4;
578 std::function<void(Value *, unsigned)> ComputePeelCount =
579 [&](Value *Condition, unsigned Depth) -> void {
580 if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
581 return;
582
583 Value *LeftVal, *RightVal;
584 if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
585 match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
586 ComputePeelCount(LeftVal, Depth + 1);
587 ComputePeelCount(RightVal, Depth + 1);
588 return;
589 }
590
591 CmpPredicate Pred;
592 if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
593 return;
594
595 const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
596 const SCEV *RightSCEV = SE.getSCEV(RightVal);
597
598 // Do not consider predicates that are known to be true or false
599 // independently of the loop iteration.
600 if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
601 return;
602
603 // Check if we have a condition with one AddRec and one non AddRec
604 // expression. Normalize LeftSCEV to be the AddRec.
605 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
606 if (isa<SCEVAddRecExpr>(RightSCEV)) {
607 std::swap(LeftSCEV, RightSCEV);
609 } else
610 return;
611 }
612
613 const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
614
615 // Avoid huge SCEV computations in the loop below, make sure we only
616 // consider AddRecs of the loop we are trying to peel.
617 if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
618 return;
619 if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
620 !SE.getMonotonicPredicateType(LeftAR, Pred))
621 return;
622
623 // Check if extending the current DesiredPeelCount lets us evaluate Pred
624 // or !Pred in the loop body statically.
625 unsigned NewPeelCount = DesiredPeelCount;
626
627 const SCEV *IterVal = LeftAR->evaluateAtIteration(
628 SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
629
630 // If the original condition is not known, get the negated predicate
631 // (which holds on the else branch) and check if it is known. This allows
632 // us to peel of iterations that make the original condition false.
633 if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
635
636 const SCEV *Step = LeftAR->getStepRecurrence(SE);
637 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
638 Pred)) {
639 if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
640 DesiredPeelCountLast = 1;
641 return;
642 }
643
644 // However, for equality comparisons, that isn't always sufficient to
645 // eliminate the comparsion in loop body, we may need to peel one more
646 // iteration. See if that makes !Pred become unknown again.
647 const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
648 if (ICmpInst::isEquality(Pred) &&
649 !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
650 RightSCEV) &&
651 !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
652 SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
653 if (NewPeelCount >= MaxPeelCount)
654 return; // Need to peel one more iteration, but can't. Give up.
655 ++NewPeelCount; // Great!
656 }
657
658 DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
659 DesiredPeelCountLast = std::max(DesiredPeelCountLast, NewPeelCount);
660 };
661
662 auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
663 if (!MinMax->getType()->isIntegerTy())
664 return;
665 Value *LHS = MinMax->getLHS(), *RHS = MinMax->getRHS();
666 const SCEV *BoundSCEV, *IterSCEV;
667 if (L.isLoopInvariant(LHS)) {
668 BoundSCEV = SE.getSCEV(LHS);
669 IterSCEV = SE.getSCEV(RHS);
670 } else if (L.isLoopInvariant(RHS)) {
671 BoundSCEV = SE.getSCEV(RHS);
672 IterSCEV = SE.getSCEV(LHS);
673 } else
674 return;
675 const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
676 // For simplicity, we support only affine recurrences.
677 if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
678 return;
679 const SCEV *Step = AddRec->getStepRecurrence(SE);
680 bool IsSigned = MinMax->isSigned();
681 // To minimize number of peeled iterations, we use strict relational
682 // predicates here.
684 if (SE.isKnownPositive(Step))
685 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
686 else if (SE.isKnownNegative(Step))
687 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
688 else
689 return;
690 // Check that AddRec is not wrapping.
691 if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
692 return;
693 unsigned NewPeelCount = DesiredPeelCount;
694 const SCEV *IterVal = AddRec->evaluateAtIteration(
695 SE.getConstant(AddRec->getType(), NewPeelCount), SE);
696 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
697 Pred)) {
698 if (shouldPeelLastIteration(L, Pred, AddRec, BoundSCEV, SE, TTI))
699 DesiredPeelCountLast = 1;
700 return;
701 }
702 DesiredPeelCount = NewPeelCount;
703 };
704
705 for (BasicBlock *BB : L.blocks()) {
706 for (Instruction &I : *BB) {
708 ComputePeelCount(SI->getCondition(), 0);
710 ComputePeelCountMinMax(MinMax);
711 }
712
713 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
714 if (!BI)
715 continue;
716
717 // Ignore loop exit condition.
718 if (L.getLoopLatch() == BB)
719 continue;
720
721 ComputePeelCount(BI->getCondition(), 0);
722 }
723
724 return {DesiredPeelCount, DesiredPeelCountLast};
725}
726
727/// This "heuristic" exactly matches implicit behavior which used to exist
728/// inside getLoopEstimatedTripCount. It was added here to keep an
729/// improvement inside that API from causing peeling to become more aggressive.
730/// This should probably be removed.
732 BasicBlock *Latch = L->getLoopLatch();
733 if (!Latch)
734 return true;
735
736 CondBrInst *LatchBR = dyn_cast<CondBrInst>(Latch->getTerminator());
737 if (!LatchBR || !L->isLoopExiting(Latch))
738 return true;
739
740 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
741 LatchBR->getSuccessor(1) == L->getHeader()) &&
742 "At least one edge out of the latch must go to the header");
743
745 L->getUniqueNonLatchExitBlocks(ExitBlocks);
746 return any_of(ExitBlocks, [](const BasicBlock *EB) {
747 return !EB->getTerminatingDeoptimizeCall();
748 });
749}
750
751
752// Return the number of iterations we want to peel off.
753void llvm::computePeelCount(Loop *L, unsigned LoopSize,
755 unsigned TripCount, DominatorTree &DT,
757 AssumptionCache *AC, unsigned Threshold) {
758 assert(LoopSize > 0 && "Zero loop size is not allowed!");
759 // Save the PP.PeelCount value set by the target in
760 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
761 unsigned TargetPeelCount = PP.PeelCount;
762 PP.PeelCount = 0;
763 PP.PeelLast = false;
764 if (!canPeel(L))
765 return;
766
767 // Only try to peel innermost loops by default.
768 // The constraint can be relaxed by the target in TTI.getPeelingPreferences
769 // or by the flag -unroll-allow-loop-nests-peeling.
770 if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
771 return;
772
773 // If the user provided a peel count, use that.
774 bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
775 if (UserPeelCount) {
776 LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
777 << " iterations.\n");
779 PP.PeelProfiledIterations = true;
780 return;
781 }
782
783 // Skip peeling if it's disabled.
784 if (!PP.AllowPeeling)
785 return;
786
787 // Check that we can peel at least one iteration.
788 if (2 * LoopSize > Threshold)
789 return;
790
791 unsigned AlreadyPeeled = 0;
793 AlreadyPeeled = *Peeled;
794 // Stop if we already peeled off the maximum number of iterations.
795 if (AlreadyPeeled >= UnrollPeelMaxCount)
796 return;
797
798 // Pay respect to limitations implied by loop size and the max peel count.
799 unsigned MaxPeelCount = UnrollPeelMaxCount;
800 MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
801
802 // Start the max computation with the PP.PeelCount value set by the target
803 // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
804 unsigned DesiredPeelCount = TargetPeelCount;
805
806 // Here we try to get rid of Phis which become invariants or inductions after
807 // 1, 2, ..., N iterations of the loop. For this we compute the number for
808 // iterations after which every Phi is guaranteed to become an invariant or an
809 // induction, and try to peel the maximum number of iterations among these
810 // values, thus turning all those Phis into invariants or inductions.
811 if (MaxPeelCount > DesiredPeelCount) {
812 // Check how many iterations are useful for resolving Phis
813 auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV)
814 .calculateIterationsToPeel();
815 if (NumPeels)
816 DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
817 }
818
819 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
820 countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
821 DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
822
823 if (DesiredPeelCount == 0)
824 DesiredPeelCount = peelToTurnInvariantLoadsDereferenceable(*L, DT, AC);
825
826 if (DesiredPeelCount > 0) {
827 DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
828 // Consider max peel count limitation.
829 assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
830 if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
831 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
832 << " iteration(s) to turn"
833 << " some Phis into invariants or inductions.\n");
834 PP.PeelCount = DesiredPeelCount;
835 PP.PeelProfiledIterations = false;
836 PP.PeelLast = false;
837 return;
838 }
839 }
840
841 if (CountToEliminateCmpsLast > 0) {
842 unsigned DesiredPeelCountLast =
843 std::min(CountToEliminateCmpsLast, MaxPeelCount);
844 // Consider max peel count limitation.
845 assert(DesiredPeelCountLast > 0 && "Wrong loop size estimation?");
846 if (DesiredPeelCountLast + AlreadyPeeled <= UnrollPeelMaxCount) {
847 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
848 << " iteration(s) to turn"
849 << " some Phis into invariants.\n");
850 PP.PeelCount = DesiredPeelCountLast;
851 PP.PeelProfiledIterations = false;
852 PP.PeelLast = true;
853 return;
854 }
855 }
856
857 // Bail if we know the statically calculated trip count.
858 // In this case we rather prefer partial unrolling.
859 if (TripCount)
860 return;
861
862 // Do not apply profile base peeling if it is disabled.
864 return;
865 // If we don't know the trip count, but have reason to believe the average
866 // trip count is low, peeling should be beneficial, since we will usually
867 // hit the peeled section.
868 // We only do this in the presence of profile information, since otherwise
869 // our estimates of the trip count are not reliable enough.
870 if (L->getHeader()->getParent()->hasProfileData()) {
872 return;
873 std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
874 if (!EstimatedTripCount)
875 return;
876
877 LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
878 << *EstimatedTripCount << "\n");
879
880 std::optional<unsigned> TotalPeeled =
881 llvm::checkedAddUnsigned(*EstimatedTripCount, AlreadyPeeled);
882 if (TotalPeeled && *TotalPeeled <= MaxPeelCount) {
883 unsigned PeelCount = *EstimatedTripCount;
884 LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
885 PP.PeelCount = PeelCount;
886 return;
887 }
888 LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
889 LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
890 LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
891 LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
892 LLVM_DEBUG(dbgs() << "Max peel count by cost: "
893 << (Threshold / LoopSize - 1) << "\n");
894 }
895}
896
897/// Clones the body of the loop L, putting it between \p InsertTop and \p
898/// InsertBot.
899/// \param IterNumber The serial number of the iteration currently being
900/// peeled off.
901/// \param PeelLast Peel off the last iterations from \p L.
902/// \param ExitEdges The exit edges of the original loop.
903/// \param[out] NewBlocks A list of the blocks in the newly created clone
904/// \param[out] VMap The value map between the loop and the new clone.
905/// \param LoopBlocks A helper for DFS-traversal of the loop.
906/// \param LVMap A value-map that maps instructions from the original loop to
907/// instructions in the last peeled-off iteration.
908static void cloneLoopBlocks(
909 Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
910 BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
911 SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
912 SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
914 LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
915 ScalarEvolution &SE) {
916 BasicBlock *Header = L->getHeader();
917 BasicBlock *Latch = L->getLoopLatch();
918 BasicBlock *PreHeader = L->getLoopPreheader();
919
920 Function *F = Header->getParent();
921 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
922 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
923 Loop *ParentLoop = L->getParentLoop();
924
925 // For each block in the original loop, create a new copy,
926 // and update the value map with the newly created values.
927 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
928 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
929 NewBlocks.push_back(NewBB);
930
931 // If an original block is an immediate child of the loop L, its copy
932 // is a child of a ParentLoop after peeling. If a block is a child of
933 // a nested loop, it is handled in the cloneLoop() call below.
934 if (ParentLoop && LI->getLoopFor(*BB) == L)
935 ParentLoop->addBasicBlockToLoop(NewBB, *LI);
936
937 VMap[*BB] = NewBB;
938
939 // If dominator tree is available, insert nodes to represent cloned blocks.
940 if (DT) {
941 if (Header == *BB)
942 DT->addNewBlock(NewBB, InsertTop);
943 else {
944 DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
945 // VMap must contain entry for IDom, as the iteration order is RPO.
946 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
947 }
948 }
949 }
950
951 {
952 // Identify what other metadata depends on the cloned version. After
953 // cloning, replace the metadata with the corrected version for both
954 // memory instructions and noalias intrinsics.
955 std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
956 cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
957 Header->getContext(), Ext);
958 }
959
960 // Recursively create the new Loop objects for nested loops, if any,
961 // to preserve LoopInfo.
962 for (Loop *ChildLoop : *L) {
963 cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
964 }
965
966 // Hook-up the control flow for the newly inserted blocks.
967 // The new header is hooked up directly to the "top", which is either
968 // the original loop preheader (for the first iteration) or the previous
969 // iteration's exiting block (for every other iteration)
970 InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
971
972 // Similarly, for the latch:
973 // The original exiting edge is still hooked up to the loop exit.
974 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
975 if (PeelLast) {
976 // This is the last iteration and we definitely will go to the exit. Just
977 // set both successors to InsertBot and let the branch be simplified later.
978 assert(IterNumber == 0 && "Only peeling a single iteration implemented.");
979 auto *LatchTerm = cast<CondBrInst>(NewLatch->getTerminator());
980 LatchTerm->setSuccessor(0, InsertBot);
981 LatchTerm->setSuccessor(1, InsertBot);
982 } else {
983 auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
984 // The backedge now goes to the "bottom", which is either the loop's real
985 // header (for the last peeled iteration) or the copied header of the next
986 // iteration (for every other iteration)
987 for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) {
988 if (LatchTerm->getSuccessor(idx) == Header) {
989 LatchTerm->setSuccessor(idx, InsertBot);
990 break;
991 }
992 }
993 }
994 if (DT)
995 DT->changeImmediateDominator(InsertBot, NewLatch);
996
997 // The new copy of the loop body starts with a bunch of PHI nodes
998 // that pick an incoming value from either the preheader, or the previous
999 // loop iteration. Since this copy is no longer part of the loop, we
1000 // resolve this statically:
1001 if (PeelLast) {
1002 // For the last iteration, we introduce new phis for each header phi in
1003 // InsertTop, using the incoming value from the preheader for the original
1004 // preheader (when skipping the main loop) and the incoming value from the
1005 // latch for the latch (when continuing from the main loop).
1006 IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt());
1007 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1008 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1009 PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
1010 NewPHI->eraseFromParent();
1011 if (OrigPreHeader)
1012 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
1013 OrigPreHeader);
1014
1015 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
1016 Latch);
1017 VMap[&*I] = PN;
1018 }
1019 } else {
1020 // For the first iteration, we use the value from the preheader directly.
1021 // For any other iteration, we replace the phi with the value generated by
1022 // the immediately preceding clone of the loop body (which represents
1023 // the previous iteration).
1024 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1025 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1026 if (IterNumber == 0) {
1027 VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
1028 } else {
1029 Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
1030 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1031 if (LatchInst && L->contains(LatchInst))
1032 VMap[&*I] = LVMap[LatchInst];
1033 else
1034 VMap[&*I] = LatchVal;
1035 }
1036 NewPHI->eraseFromParent();
1037 }
1038 }
1039
1040 // Fix up the outgoing values - we need to add a value for the iteration
1041 // we've just created. Note that this must happen *after* the incoming
1042 // values are adjusted, since the value going out of the latch may also be
1043 // a value coming into the header.
1044 for (auto Edge : ExitEdges)
1045 for (PHINode &PHI : Edge.second->phis()) {
1046 Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
1047 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1048 if (LatchInst && L->contains(LatchInst))
1049 LatchVal = VMap[LatchVal];
1050 PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
1052 }
1053
1054 // LastValueMap is updated with the values for the current loop
1055 // which are used the next time this function is called.
1056 for (auto KV : VMap)
1057 LVMap[KV.first] = KV.second;
1058}
1059
1060TargetTransformInfo::PeelingPreferences
1062 const TargetTransformInfo &TTI,
1063 std::optional<bool> UserAllowPeeling,
1064 std::optional<bool> UserAllowProfileBasedPeeling,
1065 bool UnrollingSpecficValues) {
1067
1068 // Set the default values.
1069 PP.PeelCount = 0;
1070 PP.AllowPeeling = true;
1071 PP.AllowLoopNestsPeeling = false;
1072 PP.PeelLast = false;
1073 PP.PeelProfiledIterations = true;
1074
1075 // Get the target specifc values.
1076 TTI.getPeelingPreferences(L, SE, PP);
1077
1078 // User specified values using cl::opt.
1079 if (UnrollingSpecficValues) {
1080 if (UnrollPeelCount.getNumOccurrences() > 0)
1082 if (UnrollAllowPeeling.getNumOccurrences() > 0)
1084 if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
1086 }
1087
1088 // User specifed values provided by argument.
1089 if (UserAllowPeeling)
1090 PP.AllowPeeling = *UserAllowPeeling;
1091 if (UserAllowProfileBasedPeeling)
1092 PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
1093
1094 return PP;
1095}
1096
1097/// Peel off the first \p PeelCount iterations of loop \p L.
1098///
1099/// Note that this does not peel them off as a single straight-line block.
1100/// Rather, each iteration is peeled off separately, and needs to check the
1101/// exit condition.
1102/// For loops that dynamically execute \p PeelCount iterations or less
1103/// this provides a benefit, since the peeled off iterations, which account
1104/// for the bulk of dynamic execution, can be further simplified by scalar
1105/// optimizations.
1106void llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1108 bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
1109 assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
1110 assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
1111 assert((!PeelLast || (canPeelLastIteration(*L, *SE) && PeelCount == 1)) &&
1112 "when peeling the last iteration, the loop must be supported and can "
1113 "only peel a single iteration");
1114
1115 LoopBlocksDFS LoopBlocks(L);
1116 LoopBlocks.perform(LI);
1117
1118 BasicBlock *Header = L->getHeader();
1119 BasicBlock *PreHeader = L->getLoopPreheader();
1120 BasicBlock *Latch = L->getLoopLatch();
1122 L->getExitEdges(ExitEdges);
1123
1124 // Remember dominators of blocks we might reach through exits to change them
1125 // later. Immediate dominator of such block might change, because we add more
1126 // routes which can lead to the exit: we can reach it from the peeled
1127 // iterations too.
1128 MapVector<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
1129 for (auto *BB : L->blocks()) {
1130 auto *BBDomNode = DT.getNode(BB);
1131 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
1132 for (auto *ChildDomNode : BBDomNode->children()) {
1133 auto *ChildBB = ChildDomNode->getBlock();
1134 if (!L->contains(ChildBB))
1135 ChildrenToUpdate.push_back(ChildBB);
1136 }
1137 // The new idom of the block will be the nearest common dominator
1138 // of all copies of the previous idom. This is equivalent to the
1139 // nearest common dominator of the previous idom and the first latch,
1140 // which dominates all copies of the previous idom.
1141 BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
1142 for (auto *ChildBB : ChildrenToUpdate)
1143 NonLoopBlocksIDom[ChildBB] = NewIDom;
1144 }
1145
1146 Function *F = Header->getParent();
1147
1148 // Set up all the necessary basic blocks.
1149 BasicBlock *InsertTop;
1150 BasicBlock *InsertBot;
1151 BasicBlock *NewPreHeader = nullptr;
1153 if (PeelLast) {
1154 // It is convenient to split the single exit block from the latch the
1155 // into 3 parts - two blocks to anchor the peeled copy of the loop body,
1156 // and a new final exit block.
1157
1158 // Peeling the last iteration transforms.
1159 //
1160 // PreHeader:
1161 // ...
1162 // Header:
1163 // LoopBody
1164 // If (cond) goto Header
1165 // Exit:
1166 //
1167 // into
1168 //
1169 // Header:
1170 // LoopBody
1171 // If (cond) goto Header
1172 // InsertTop:
1173 // LoopBody
1174 // If (!cond) goto InsertBot
1175 // InsertBot:
1176 // Exit:
1177 // ...
1178 BasicBlock *Exit = L->getExitBlock();
1179 for (PHINode &P : Exit->phis())
1180 ExitValues[&P] = P.getIncomingValueForBlock(Latch);
1181
1182 const SCEV *BTC = SE->getBackedgeTakenCount(L);
1183
1184 InsertTop = SplitEdge(Latch, Exit, &DT, LI);
1185 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1186
1187 InsertTop->setName(Exit->getName() + ".peel.begin");
1188 InsertBot->setName(Exit->getName() + ".peel.next");
1189 NewPreHeader = nullptr;
1190
1191 // If the original loop may only execute a single iteration we need to
1192 // insert a trip count check and skip the original loop with the last
1193 // iteration peeled off if necessary. Either way, we must update branch
1194 // weights to maintain the loop body frequency.
1195 if (SE->isKnownNonZero(BTC)) {
1196 // We have just proven that, when reached, the original loop always
1197 // executes at least two iterations. Thus, we unconditionally execute
1198 // both the remaining loop's initial iteration and the peeled iteration.
1199 // But that increases the latter's frequency above its frequency in the
1200 // original loop. To maintain the total frequency, we compensate by
1201 // decreasing the remaining loop body's frequency to indicate one less
1202 // iteration.
1203 //
1204 // We use this formula to convert probability to/from frequency:
1205 // Sum(i=0..inf)(P^i) = 1/(1-P) = Freq.
1206 if (BranchProbability P = getLoopProbability(L); !P.isUnknown()) {
1207 // Trying to subtract one from an infinite loop is pointless, and our
1208 // formulas then produce division by zero, so skip that case.
1209 if (BranchProbability ExitP = P.getCompl(); !ExitP.isZero()) {
1210 double Freq = 1 / ExitP.toDouble();
1211 // No branch weights can produce a frequency of less than one given
1212 // the initial iteration, and our formulas produce a negative
1213 // probability if we try.
1214 assert(Freq >= 1.0 && "expected freq >= 1 due to initial iteration");
1215 double NewFreq = std::max(Freq - 1, 1.0);
1217 L, BranchProbability::getBranchProbability(1 - 1 / NewFreq));
1218 }
1219 }
1220 } else {
1221 NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
1222 SCEVExpander Expander(*SE, "loop-peel");
1223
1224 Instruction *PreHeaderBR = PreHeader->getTerminator();
1225 Value *BTCValue =
1226 Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
1227 IRBuilder<> B(PreHeaderBR);
1228 Value *Cond =
1229 B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
1230 auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop);
1231 SmallVector<uint32_t> Weights;
1232 auto *OrigLatchBr = Latch->getTerminator();
1233 auto HasBranchWeights = !ProfcheckDisableMetadataFixes &&
1234 extractBranchWeights(*OrigLatchBr, Weights);
1235 if (HasBranchWeights) {
1236 // The probability that the new guard skips the loop to execute just one
1237 // iteration is the original loop's probability of exiting at the latch
1238 // after any iteration. That should maintain the original loop body
1239 // frequency. Upon arriving at the loop, due to the guard, the
1240 // probability of reaching iteration i of the new loop is the
1241 // probability of reaching iteration i+1 of the original loop. The
1242 // probability of reaching the peeled iteration is 1, which is the
1243 // probability of reaching iteration 0 of the original loop.
1244 if (L->getExitBlock() == OrigLatchBr->getSuccessor(0))
1245 std::swap(Weights[0], Weights[1]);
1246 setBranchWeights(*BI, Weights, /*IsExpected=*/false);
1247 }
1248 PreHeaderBR->eraseFromParent();
1249
1250 // PreHeader now dominates InsertTop.
1251 DT.changeImmediateDominator(InsertTop, PreHeader);
1252 }
1253 } else {
1254 // It is convenient to split the preheader into 3 parts - two blocks to
1255 // anchor the peeled copy of the loop body, and a new preheader for the
1256 // "real" loop.
1257
1258 // Peeling the first iteration transforms.
1259 //
1260 // PreHeader:
1261 // ...
1262 // Header:
1263 // LoopBody
1264 // If (cond) goto Header
1265 // Exit:
1266 //
1267 // into
1268 //
1269 // InsertTop:
1270 // LoopBody
1271 // If (!cond) goto Exit
1272 // InsertBot:
1273 // NewPreHeader:
1274 // ...
1275 // Header:
1276 // LoopBody
1277 // If (cond) goto Header
1278 // Exit:
1279 //
1280 // Each following iteration will split the current bottom anchor in two,
1281 // and put the new copy of the loop body between these two blocks. That
1282 // is, after peeling another iteration from the example above, we'll
1283 // split InsertBot, and get:
1284 //
1285 // InsertTop:
1286 // LoopBody
1287 // If (!cond) goto Exit
1288 // InsertBot:
1289 // LoopBody
1290 // If (!cond) goto Exit
1291 // InsertBot.next:
1292 // NewPreHeader:
1293 // ...
1294 // Header:
1295 // LoopBody
1296 // If (cond) goto Header
1297 // Exit:
1298 //
1299 InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
1300 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1301 NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1302
1303 InsertTop->setName(Header->getName() + ".peel.begin");
1304 InsertBot->setName(Header->getName() + ".peel.next");
1305 NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1306 }
1307
1308 Instruction *LatchTerm =
1309 cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
1310
1311 // Identify what noalias metadata is inside the loop: if it is inside the
1312 // loop, the associated metadata must be cloned for each iteration.
1313 SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
1314 identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
1315
1316 // For each peeled-off iteration, make a copy of the loop.
1317 ValueToValueMapTy VMap;
1318 for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
1320
1321 cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
1322 NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
1323 LoopBlocks, VMap, LVMap, &DT, LI,
1324 LoopLocalNoAliasDeclScopes, *SE);
1325
1326 // Remap to use values from the current iteration instead of the
1327 // previous one.
1328 remapInstructionsInBlocks(NewBlocks, VMap);
1329
1330 if (Iter == 0) {
1331 if (PeelLast) {
1332 // Adjust the exit condition so the loop exits one iteration early.
1333 // For now we simply subtract one form the second operand of the
1334 // exit condition. This relies on the peel count computation to
1335 // check that this is actually legal. In particular, it ensures that
1336 // the first operand of the compare is an AddRec with step 1 and we
1337 // execute more than one iteration.
1338 auto *Cmp =
1339 cast<ICmpInst>(L->getLoopLatch()->getTerminator()->getOperand(0));
1340 IRBuilder B(Cmp);
1341 Cmp->setOperand(
1342 1, B.CreateSub(Cmp->getOperand(1),
1343 ConstantInt::get(Cmp->getOperand(1)->getType(), 1)));
1344 } else {
1345 // Update IDoms of the blocks reachable through exits.
1346 for (auto BBIDom : NonLoopBlocksIDom)
1347 DT.changeImmediateDominator(BBIDom.first,
1348 cast<BasicBlock>(LVMap[BBIDom.second]));
1349 }
1350 }
1351
1352#ifdef EXPENSIVE_CHECKS
1353 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1354#endif
1355
1356 // Remove Loop metadata from the latch branch instruction
1357 // because it is not the Loop's latch branch anymore.
1358 auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
1359 LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
1360
1361 InsertTop = InsertBot;
1362 InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1363 InsertBot->setName(Header->getName() + ".peel.next");
1364
1365 F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
1366 F->end());
1367 }
1368
1369 if (PeelLast) {
1370 // Now adjust users of the original exit values by replacing them with the
1371 // exit value from the peeled iteration and remove them.
1372 for (const auto &[P, E] : ExitValues) {
1373 Instruction *ExitInst = dyn_cast<Instruction>(E);
1374 if (ExitInst && L->contains(ExitInst))
1375 P->replaceAllUsesWith(&*VMap[ExitInst]);
1376 else
1377 P->replaceAllUsesWith(E);
1378 P->eraseFromParent();
1379 }
1380 formLCSSA(*L, DT, LI, SE);
1381 } else {
1382 // Now adjust the phi nodes in the loop header to get their initial values
1383 // from the last peeled-off iteration instead of the preheader.
1384 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1386 Value *NewVal = PHI->getIncomingValueForBlock(Latch);
1387 Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
1388 if (LatchInst && L->contains(LatchInst))
1389 NewVal = LVMap[LatchInst];
1390
1391 PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
1392 }
1393 }
1394
1395 // Update Metadata for count of peeled off iterations.
1396 unsigned AlreadyPeeled = 0;
1398 AlreadyPeeled = *Peeled;
1399 unsigned TotalPeeled = AlreadyPeeled + PeelCount;
1401
1402 // Update metadata for the estimated trip count. The original branch weight
1403 // metadata is already correct for both the remaining loop and the peeled loop
1404 // iterations, so do not adjust it.
1405 //
1406 // For example, consider what happens when peeling 2 iterations from a loop
1407 // with an estimated trip count of 10 and inserting them before the remaining
1408 // loop. Each of the peeled iterations and each iteration in the remaining
1409 // loop still has the same probability of exiting the *entire original* loop
1410 // as it did when in the original loop, and thus it should still have the same
1411 // branch weights. The peeled iterations' non-zero probabilities of exiting
1412 // already appropriately reduce the probability of reaching the remaining
1413 // iterations just as they did in the original loop. Trying to also adjust
1414 // the remaining loop's branch weights to reflect its new trip count of 8 will
1415 // erroneously further reduce its block frequencies. However, in case an
1416 // analysis later needs to determine the trip count of the remaining loop
1417 // while examining it in isolation without considering the probability of
1418 // actually reaching it, we store the new trip count as separate metadata.
1419 if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) {
1420 unsigned EstimatedTripCountNew = *EstimatedTripCount;
1421 if (EstimatedTripCountNew < TotalPeeled)
1422 EstimatedTripCountNew = 0;
1423 else
1424 EstimatedTripCountNew -= TotalPeeled;
1425 setLoopEstimatedTripCount(L, EstimatedTripCountNew);
1426 }
1427
1428 if (Loop *ParentLoop = L->getParentLoop())
1429 L = ParentLoop;
1430
1431 // We modified the loop, update SE.
1432 SE->forgetTopmostLoop(L);
1434
1435#ifdef EXPENSIVE_CHECKS
1436 // Finally DomtTree must be correct.
1437 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1438#endif
1439
1440 // FIXME: Incrementally update loop-simplify
1441 simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
1442
1443 NumPeeled++;
1444 NumPeeledEnd += PeelLast;
1445}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEVAddRecExpr *LeftAR, const SCEV *RightSCEV, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Returns true if the last iteration can be peeled off and the condition (Pred LeftAR,...
Definition LoopPeel.cpp:512
static bool violatesLegacyMultiExitLoopCheck(Loop *L)
This "heuristic" exactly matches implicit behavior which used to exist inside getLoopEstimatedTripCou...
Definition LoopPeel.cpp:731
static std::pair< unsigned, unsigned > countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Definition LoopPeel.cpp:550
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *OrigPreHeader, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes, ScalarEvolution &SE)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition LoopPeel.cpp:908
static unsigned peelToTurnInvariantLoadsDereferenceable(Loop &L, DominatorTree &DT, AssumptionCache *AC)
Definition LoopPeel.cpp:424
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
#define P(N)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Store the result of a depth first search within basic blocks contained by a single loop.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
RPOIterator endRPO() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
This class represents min/max intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
LLVM_ABI std::optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI std::optional< bool > evaluatePredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
This class represents the LLVM 'select' instruction.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
self_iterator getIterator()
Definition ilist_node.h:123
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_BasicBlock()
Match an arbitrary basic block value and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Return either:
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static cl::opt< bool > DisableAdvancedPeeling("disable-advanced-peeling", cl::init(false), cl::Hidden, cl::desc("Disable advance peeling. Issues for convergent targets (D134803)."))
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
static cl::opt< bool > EnablePeelingForIV("enable-peeling-for-iv", cl::init(false), cl::Hidden, cl::desc("Enable peeling to convert Phi nodes into IVs"))
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool canPeel(const Loop *L)
Definition LoopPeel.cpp:97
bool canPeelLastIteration(const Loop &L, ScalarEvolution &SE)
Returns true if the last iteration of L can be peeled off.
Definition LoopPeel.cpp:481
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static const char * PeeledCountMetaData
Definition LoopPeel.cpp:91
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition LoopPeel.cpp:753
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
BranchProbability getLoopProbability(Loop *L)
Based on branch weight metadata, return either:
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
LLVM_ABI std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
bool setLoopProbability(Loop *L, BranchProbability P)
Set branch weight metadata for the latch of L to indicate that, at the end of any iteration,...
TargetTransformInfo TTI
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
std::enable_if_t< std::is_unsigned_v< T >, std::optional< T > > checkedAddUnsigned(T LHS, T RHS)
Add two unsigned integers LHS and RHS.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:250
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
LLVM_ABI Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelLast
Peel off the last PeelCount loop iterations.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...