LLVM  16.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1 //===- LoopPeel.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loop Peeling Utilities.
10 //===----------------------------------------------------------------------===//
11 
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/Loads.h"
18 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/ProfDataUtils.h"
33 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstdint>
45 #include <optional>
46 
47 using namespace llvm;
48 using namespace llvm::PatternMatch;
49 
50 #define DEBUG_TYPE "loop-peel"
51 
52 STATISTIC(NumPeeled, "Number of loops peeled");
53 
55  "unroll-peel-count", cl::Hidden,
56  cl::desc("Set the unroll peeling count, for testing purposes"));
57 
58 static cl::opt<bool>
59  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
60  cl::desc("Allows loops to be peeled when the dynamic "
61  "trip count is known to be low."));
62 
63 static cl::opt<bool>
64  UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
65  cl::init(false), cl::Hidden,
66  cl::desc("Allows loop nests to be peeled."));
67 
69  "unroll-peel-max-count", cl::init(7), cl::Hidden,
70  cl::desc("Max average trip count which will cause loop peeling."));
71 
73  "unroll-force-peel-count", cl::init(0), cl::Hidden,
74  cl::desc("Force a peel count regardless of profiling information."));
75 
77  "disable-advanced-peeling", cl::init(false), cl::Hidden,
78  cl::desc(
79  "Disable advance peeling. Issues for convergent targets (D134803)."));
80 
81 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
82 
83 // Check whether we are capable of peeling this loop.
84 bool llvm::canPeel(const Loop *L) {
85  // Make sure the loop is in simplified form
86  if (!L->isLoopSimplifyForm())
87  return false;
89  return true;
90 
93  // The latch must either be the only exiting block or all non-latch exit
94  // blocks have either a deopt or unreachable terminator or compose a chain of
95  // blocks where the last one is either deopt or unreachable terminated. Both
96  // deopt and unreachable terminators are a strong indication they are not
97  // taken. Note that this is a profitability check, not a legality check. Also
98  // note that LoopPeeling currently can only update the branch weights of latch
99  // blocks and branch weights to blocks with deopt or unreachable do not need
100  // updating.
102 }
103 
104 namespace {
105 
106 // As a loop is peeled, it may be the case that Phi nodes become
107 // loop-invariant (ie, known because there is only one choice).
108 // For example, consider the following function:
109 // void g(int);
110 // void binary() {
111 // int x = 0;
112 // int y = 0;
113 // int a = 0;
114 // for(int i = 0; i <100000; ++i) {
115 // g(x);
116 // x = y;
117 // g(a);
118 // y = a + 1;
119 // a = 5;
120 // }
121 // }
122 // Peeling 3 iterations is beneficial because the values for x, y and a
123 // become known. The IR for this loop looks something like the following:
124 //
125 // %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
126 // %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
127 // %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
128 // %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
129 // ...
130 // tail call void @_Z1gi(i32 signext %x)
131 // tail call void @_Z1gi(i32 signext %a)
132 // %add = add nuw nsw i32 %a, 1
133 // %inc = add nuw nsw i32 %i, 1
134 // %exitcond = icmp eq i32 %inc, 100000
135 // br i1 %exitcond, label %for.cond.cleanup, label %for.body
136 //
137 // The arguments for the calls to g will become known after 3 iterations
138 // of the loop, because the phi nodes values become known after 3 iterations
139 // of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
140 // The first iteration has g(0), g(0); the second has g(0), g(5); the
141 // third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
142 // Now consider the phi nodes:
143 // %a is a phi with constants so it is determined after iteration 1.
144 // %y is a phi based on a constant and %a so it is determined on
145 // the iteration after %a is determined, so iteration 2.
146 // %x is a phi based on a constant and %y so it is determined on
147 // the iteration after %y, so iteration 3.
148 // %i is based on itself (and is an induction variable) so it is
149 // never determined.
150 // This means that peeling off 3 iterations will result in being able to
151 // remove the phi nodes for %a, %y, and %x. The arguments for the
152 // corresponding calls to g are determined and the code for computing
153 // x, y, and a can be removed.
154 //
155 // The PhiAnalyzer class calculates how many times a loop should be
156 // peeled based on the above analysis of the phi nodes in the loop while
157 // respecting the maximum specified.
158 class PhiAnalyzer {
159 public:
160  PhiAnalyzer(const Loop &L, unsigned MaxIterations);
161 
162  // Calculate the sufficient minimum number of iterations of the loop to peel
163  // such that phi instructions become determined (subject to allowable limits)
164  Optional<unsigned> calculateIterationsToPeel();
165 
166 protected:
167  using PeelCounter = std::optional<unsigned>;
168  const PeelCounter Unknown = None;
169 
170  // Add 1 respecting Unknown and return Unknown if result over MaxIterations
171  PeelCounter addOne(PeelCounter PC) const {
172  if (PC == Unknown)
173  return Unknown;
174  return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown;
175  }
176 
177  // Calculate the number of iterations after which the given value
178  // becomes an invariant.
179  PeelCounter calculate(const Value &);
180 
181  const Loop &L;
182  const unsigned MaxIterations;
183 
184  // Map of Values to number of iterations to invariance
185  SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
186 };
187 
188 PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
189  : L(L), MaxIterations(MaxIterations) {
190  assert(canPeel(&L) && "loop is not suitable for peeling");
191  assert(MaxIterations > 0 && "no peeling is allowed?");
192 }
193 
194 // This function calculates the number of iterations after which the value
195 // becomes an invariant. The pre-calculated values are memorized in a map.
196 // N.B. This number will be Unknown or <= MaxIterations.
197 // The function is calculated according to the following definition:
198 // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
199 // F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
200 // G(%y) = 0 if %y is a loop invariant
201 // G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
202 // G(%y) = TODO: if %y is an expression based on phis and loop invariants
203 // The example looks like:
204 // %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
205 // %y = phi(0, 5)
206 // %a = %y + 1
207 // G(%y) = Unknown otherwise (including phi not in header block)
208 PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
209  // If we already know the answer, take it from the map.
210  auto I = IterationsToInvariance.find(&V);
211  if (I != IterationsToInvariance.end())
212  return I->second;
213 
214  // Place Unknown to map to avoid infinite recursion. Such
215  // cycles can never stop on an invariant.
216  IterationsToInvariance[&V] = Unknown;
217 
218  if (L.isLoopInvariant(&V))
219  // Loop invariant so known at start.
220  return (IterationsToInvariance[&V] = 0);
221  if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
222  if (Phi->getParent() != L.getHeader()) {
223  // Phi is not in header block so Unknown.
224  assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
225  return Unknown;
226  }
227  // We need to analyze the input from the back edge and add 1.
228  Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
229  PeelCounter Iterations = calculate(*Input);
230  assert(IterationsToInvariance[Input] == Iterations &&
231  "unexpected value saved");
232  return (IterationsToInvariance[Phi] = addOne(Iterations));
233  }
234  // TODO: handle expressions
235 
236  // Everything else is Unknown.
237  assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
238  return Unknown;
239 }
240 
241 Optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
242  unsigned Iterations = 0;
243  for (auto &PHI : L.getHeader()->phis()) {
244  PeelCounter ToInvariance = calculate(PHI);
245  if (ToInvariance != Unknown) {
246  assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
247  Iterations = std::max(Iterations, *ToInvariance);
248  if (Iterations == MaxIterations)
249  break;
250  }
251  }
252  assert((Iterations <= MaxIterations) && "bad result in phi analysis");
253  return Iterations ? Optional<unsigned>(Iterations) : None;
254 }
255 
256 } // unnamed namespace
257 
258 // Try to find any invariant memory reads that will become dereferenceable in
259 // the remainder loop after peeling. The load must also be used (transitively)
260 // by an exit condition. Returns the number of iterations to peel off (at the
261 // moment either 0 or 1).
263  DominatorTree &DT,
264  AssumptionCache *AC) {
265  // Skip loops with a single exiting block, because there should be no benefit
266  // for the heuristic below.
267  if (L.getExitingBlock())
268  return 0;
269 
270  // All non-latch exit blocks must have an UnreachableInst terminator.
271  // Otherwise the heuristic below may not be profitable.
274  if (any_of(Exits, [](const BasicBlock *BB) {
275  return !isa<UnreachableInst>(BB->getTerminator());
276  }))
277  return 0;
278 
279  // Now look for invariant loads that dominate the latch and are not known to
280  // be dereferenceable. If there are such loads and no writes, they will become
281  // dereferenceable in the loop if the first iteration is peeled off. Also
282  // collect the set of instructions controlled by such loads. Only peel if an
283  // exit condition uses (transitively) such a load.
284  BasicBlock *Header = L.getHeader();
285  BasicBlock *Latch = L.getLoopLatch();
286  SmallPtrSet<Value *, 8> LoadUsers;
287  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
288  for (BasicBlock *BB : L.blocks()) {
289  for (Instruction &I : *BB) {
290  if (I.mayWriteToMemory())
291  return 0;
292 
293  auto Iter = LoadUsers.find(&I);
294  if (Iter != LoadUsers.end()) {
295  for (Value *U : I.users())
296  LoadUsers.insert(U);
297  }
298  // Do not look for reads in the header; they can already be hoisted
299  // without peeling.
300  if (BB == Header)
301  continue;
302  if (auto *LI = dyn_cast<LoadInst>(&I)) {
303  Value *Ptr = LI->getPointerOperand();
304  if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
305  !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
306  for (Value *U : I.users())
307  LoadUsers.insert(U);
308  }
309  }
310  }
311  SmallVector<BasicBlock *> ExitingBlocks;
312  L.getExitingBlocks(ExitingBlocks);
313  if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
314  return LoadUsers.contains(Exiting->getTerminator());
315  }))
316  return 1;
317  return 0;
318 }
319 
320 // Return the number of iterations to peel off that make conditions in the
321 // body true/false. For example, if we peel 2 iterations off the loop below,
322 // the condition i < 2 can be evaluated at compile time.
323 // for (i = 0; i < n; i++)
324 // if (i < 2)
325 // ..
326 // else
327 // ..
328 // }
329 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
330  ScalarEvolution &SE) {
331  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
332  unsigned DesiredPeelCount = 0;
333 
334  for (auto *BB : L.blocks()) {
335  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
336  if (!BI || BI->isUnconditional())
337  continue;
338 
339  // Ignore loop exit condition.
340  if (L.getLoopLatch() == BB)
341  continue;
342 
343  Value *Condition = BI->getCondition();
344  Value *LeftVal, *RightVal;
345  CmpInst::Predicate Pred;
346  if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
347  continue;
348 
349  const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
350  const SCEV *RightSCEV = SE.getSCEV(RightVal);
351 
352  // Do not consider predicates that are known to be true or false
353  // independently of the loop iteration.
354  if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
355  continue;
356 
357  // Check if we have a condition with one AddRec and one non AddRec
358  // expression. Normalize LeftSCEV to be the AddRec.
359  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
360  if (isa<SCEVAddRecExpr>(RightSCEV)) {
361  std::swap(LeftSCEV, RightSCEV);
362  Pred = ICmpInst::getSwappedPredicate(Pred);
363  } else
364  continue;
365  }
366 
367  const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
368 
369  // Avoid huge SCEV computations in the loop below, make sure we only
370  // consider AddRecs of the loop we are trying to peel.
371  if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
372  continue;
373  if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
374  !SE.getMonotonicPredicateType(LeftAR, Pred))
375  continue;
376 
377  // Check if extending the current DesiredPeelCount lets us evaluate Pred
378  // or !Pred in the loop body statically.
379  unsigned NewPeelCount = DesiredPeelCount;
380 
381  const SCEV *IterVal = LeftAR->evaluateAtIteration(
382  SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
383 
384  // If the original condition is not known, get the negated predicate
385  // (which holds on the else branch) and check if it is known. This allows
386  // us to peel of iterations that make the original condition false.
387  if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
388  Pred = ICmpInst::getInversePredicate(Pred);
389 
390  const SCEV *Step = LeftAR->getStepRecurrence(SE);
391  const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
392  auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
393  &NewPeelCount]() {
394  IterVal = NextIterVal;
395  NextIterVal = SE.getAddExpr(IterVal, Step);
396  NewPeelCount++;
397  };
398 
399  auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
400  return NewPeelCount < MaxPeelCount;
401  };
402 
403  while (CanPeelOneMoreIteration() &&
404  SE.isKnownPredicate(Pred, IterVal, RightSCEV))
405  PeelOneMoreIteration();
406 
407  // With *that* peel count, does the predicate !Pred become known in the
408  // first iteration of the loop body after peeling?
409  if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
410  RightSCEV))
411  continue; // If not, give up.
412 
413  // However, for equality comparisons, that isn't always sufficient to
414  // eliminate the comparsion in loop body, we may need to peel one more
415  // iteration. See if that makes !Pred become unknown again.
416  if (ICmpInst::isEquality(Pred) &&
417  !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
418  RightSCEV) &&
419  !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
420  SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
421  if (!CanPeelOneMoreIteration())
422  continue; // Need to peel one more iteration, but can't. Give up.
423  PeelOneMoreIteration(); // Great!
424  }
425 
426  DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
427  }
428 
429  return DesiredPeelCount;
430 }
431 
432 /// This "heuristic" exactly matches implicit behavior which used to exist
433 /// inside getLoopEstimatedTripCount. It was added here to keep an
434 /// improvement inside that API from causing peeling to become more aggressive.
435 /// This should probably be removed.
437  BasicBlock *Latch = L->getLoopLatch();
438  if (!Latch)
439  return true;
440 
441  BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
442  if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
443  return true;
444 
445  assert((LatchBR->getSuccessor(0) == L->getHeader() ||
446  LatchBR->getSuccessor(1) == L->getHeader()) &&
447  "At least one edge out of the latch must go to the header");
448 
449  SmallVector<BasicBlock *, 4> ExitBlocks;
450  L->getUniqueNonLatchExitBlocks(ExitBlocks);
451  return any_of(ExitBlocks, [](const BasicBlock *EB) {
452  return !EB->getTerminatingDeoptimizeCall();
453  });
454 }
455 
456 
457 // Return the number of iterations we want to peel off.
458 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
460  unsigned TripCount, DominatorTree &DT,
462  unsigned Threshold) {
463  assert(LoopSize > 0 && "Zero loop size is not allowed!");
464  // Save the PP.PeelCount value set by the target in
465  // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
466  unsigned TargetPeelCount = PP.PeelCount;
467  PP.PeelCount = 0;
468  if (!canPeel(L))
469  return;
470 
471  // Only try to peel innermost loops by default.
472  // The constraint can be relaxed by the target in TTI.getPeelingPreferences
473  // or by the flag -unroll-allow-loop-nests-peeling.
474  if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
475  return;
476 
477  // If the user provided a peel count, use that.
478  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
479  if (UserPeelCount) {
480  LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
481  << " iterations.\n");
483  PP.PeelProfiledIterations = true;
484  return;
485  }
486 
487  // Skip peeling if it's disabled.
488  if (!PP.AllowPeeling)
489  return;
490 
491  // Check that we can peel at least one iteration.
492  if (2 * LoopSize > Threshold)
493  return;
494 
495  unsigned AlreadyPeeled = 0;
496  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
497  AlreadyPeeled = *Peeled;
498  // Stop if we already peeled off the maximum number of iterations.
499  if (AlreadyPeeled >= UnrollPeelMaxCount)
500  return;
501 
502  // Pay respect to limitations implied by loop size and the max peel count.
503  unsigned MaxPeelCount = UnrollPeelMaxCount;
504  MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
505 
506  // Start the max computation with the PP.PeelCount value set by the target
507  // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
508  unsigned DesiredPeelCount = TargetPeelCount;
509 
510  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
511  // iterations of the loop. For this we compute the number for iterations after
512  // which every Phi is guaranteed to become an invariant, and try to peel the
513  // maximum number of iterations among these values, thus turning all those
514  // Phis into invariants.
515  if (MaxPeelCount > DesiredPeelCount) {
516  // Check how many iterations are useful for resolving Phis
517  auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel();
518  if (NumPeels)
519  DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
520  }
521 
522  DesiredPeelCount = std::max(DesiredPeelCount,
523  countToEliminateCompares(*L, MaxPeelCount, SE));
524 
525  if (DesiredPeelCount == 0)
526  DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC);
527 
528  if (DesiredPeelCount > 0) {
529  DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
530  // Consider max peel count limitation.
531  assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
532  if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
533  LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
534  << " iteration(s) to turn"
535  << " some Phis into invariants.\n");
536  PP.PeelCount = DesiredPeelCount;
537  PP.PeelProfiledIterations = false;
538  return;
539  }
540  }
541 
542  // Bail if we know the statically calculated trip count.
543  // In this case we rather prefer partial unrolling.
544  if (TripCount)
545  return;
546 
547  // Do not apply profile base peeling if it is disabled.
548  if (!PP.PeelProfiledIterations)
549  return;
550  // If we don't know the trip count, but have reason to believe the average
551  // trip count is low, peeling should be beneficial, since we will usually
552  // hit the peeled section.
553  // We only do this in the presence of profile information, since otherwise
554  // our estimates of the trip count are not reliable enough.
555  if (L->getHeader()->getParent()->hasProfileData()) {
557  return;
558  Optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
559  if (!EstimatedTripCount)
560  return;
561 
562  LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
563  << *EstimatedTripCount << "\n");
564 
565  if (*EstimatedTripCount) {
566  if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
567  unsigned PeelCount = *EstimatedTripCount;
568  LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
569  PP.PeelCount = PeelCount;
570  return;
571  }
572  LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
573  LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
574  LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
575  LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
576  LLVM_DEBUG(dbgs() << "Max peel count by cost: "
577  << (Threshold / LoopSize - 1) << "\n");
578  }
579  }
580 }
581 
582 struct WeightInfo {
583  // Weights for current iteration.
585  // Weights to subtract after each iteration.
587 };
588 
589 /// Update the branch weights of an exiting block of a peeled-off loop
590 /// iteration.
591 /// Let F is a weight of the edge to continue (fallthrough) into the loop.
592 /// Let E is a weight of the edge to an exit.
593 /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
594 /// go to exit.
595 /// Then, Estimated ExitCount = F / E.
596 /// For I-th (counting from 0) peeled off iteration we set the the weights for
597 /// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
598 /// The probability to go to exit 1/(EC-I) increases. At the same time
599 /// the estimated exit count in the remainder loop reduces by I.
600 /// To avoid dealing with division rounding we can just multiple both part
601 /// of weights to E and use weight as (F - I * E, E).
603  MDBuilder MDB(Term->getContext());
604  Term->setMetadata(LLVMContext::MD_prof,
605  MDB.createBranchWeights(Info.Weights));
606  for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
607  if (SubWeight != 0)
608  Info.Weights[Idx] = Info.Weights[Idx] > SubWeight
609  ? Info.Weights[Idx] - SubWeight
610  : 1;
611 }
612 
613 /// Initialize the weights for all exiting blocks.
615  Loop *L) {
616  SmallVector<BasicBlock *> ExitingBlocks;
617  L->getExitingBlocks(ExitingBlocks);
618  for (BasicBlock *ExitingBlock : ExitingBlocks) {
619  Instruction *Term = ExitingBlock->getTerminator();
620  SmallVector<uint32_t> Weights;
621  if (!extractBranchWeights(*Term, Weights))
622  continue;
623 
624  // See the comment on updateBranchWeights() for an explanation of what we
625  // do here.
626  uint32_t FallThroughWeights = 0;
627  uint32_t ExitWeights = 0;
628  for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
629  if (L->contains(Succ))
630  FallThroughWeights += Weight;
631  else
632  ExitWeights += Weight;
633  }
634 
635  // Don't try to update weights for degenerate case.
636  if (FallThroughWeights == 0)
637  continue;
638 
639  SmallVector<uint32_t> SubWeights;
640  for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
641  if (!L->contains(Succ)) {
642  // Exit weights stay the same.
643  SubWeights.push_back(0);
644  continue;
645  }
646 
647  // Subtract exit weights on each iteration, distributed across all
648  // fallthrough edges.
649  double W = (double)Weight / (double)FallThroughWeights;
650  SubWeights.push_back((uint32_t)(ExitWeights * W));
651  }
652 
653  WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
654  }
655 }
656 
657 /// Update the weights of original exiting block after peeling off all
658 /// iterations.
660  MDBuilder MDB(Term->getContext());
661  Term->setMetadata(LLVMContext::MD_prof,
662  MDB.createBranchWeights(Info.Weights));
663 }
664 
665 /// Clones the body of the loop L, putting it between \p InsertTop and \p
666 /// InsertBot.
667 /// \param IterNumber The serial number of the iteration currently being
668 /// peeled off.
669 /// \param ExitEdges The exit edges of the original loop.
670 /// \param[out] NewBlocks A list of the blocks in the newly created clone
671 /// \param[out] VMap The value map between the loop and the new clone.
672 /// \param LoopBlocks A helper for DFS-traversal of the loop.
673 /// \param LVMap A value-map that maps instructions from the original loop to
674 /// instructions in the last peeled-off iteration.
675 static void cloneLoopBlocks(
676  Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
677  SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
678  SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
680  LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
681  ScalarEvolution &SE) {
682  BasicBlock *Header = L->getHeader();
683  BasicBlock *Latch = L->getLoopLatch();
684  BasicBlock *PreHeader = L->getLoopPreheader();
685 
686  Function *F = Header->getParent();
687  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
688  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
689  Loop *ParentLoop = L->getParentLoop();
690 
691  // For each block in the original loop, create a new copy,
692  // and update the value map with the newly created values.
693  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
694  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
695  NewBlocks.push_back(NewBB);
696 
697  // If an original block is an immediate child of the loop L, its copy
698  // is a child of a ParentLoop after peeling. If a block is a child of
699  // a nested loop, it is handled in the cloneLoop() call below.
700  if (ParentLoop && LI->getLoopFor(*BB) == L)
701  ParentLoop->addBasicBlockToLoop(NewBB, *LI);
702 
703  VMap[*BB] = NewBB;
704 
705  // If dominator tree is available, insert nodes to represent cloned blocks.
706  if (DT) {
707  if (Header == *BB)
708  DT->addNewBlock(NewBB, InsertTop);
709  else {
710  DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
711  // VMap must contain entry for IDom, as the iteration order is RPO.
712  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
713  }
714  }
715  }
716 
717  {
718  // Identify what other metadata depends on the cloned version. After
719  // cloning, replace the metadata with the corrected version for both
720  // memory instructions and noalias intrinsics.
721  std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
722  cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
723  Header->getContext(), Ext);
724  }
725 
726  // Recursively create the new Loop objects for nested loops, if any,
727  // to preserve LoopInfo.
728  for (Loop *ChildLoop : *L) {
729  cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
730  }
731 
732  // Hook-up the control flow for the newly inserted blocks.
733  // The new header is hooked up directly to the "top", which is either
734  // the original loop preheader (for the first iteration) or the previous
735  // iteration's exiting block (for every other iteration)
736  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
737 
738  // Similarly, for the latch:
739  // The original exiting edge is still hooked up to the loop exit.
740  // The backedge now goes to the "bottom", which is either the loop's real
741  // header (for the last peeled iteration) or the copied header of the next
742  // iteration (for every other iteration)
743  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
744  auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
745  for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx)
746  if (LatchTerm->getSuccessor(idx) == Header) {
747  LatchTerm->setSuccessor(idx, InsertBot);
748  break;
749  }
750  if (DT)
751  DT->changeImmediateDominator(InsertBot, NewLatch);
752 
753  // The new copy of the loop body starts with a bunch of PHI nodes
754  // that pick an incoming value from either the preheader, or the previous
755  // loop iteration. Since this copy is no longer part of the loop, we
756  // resolve this statically:
757  // For the first iteration, we use the value from the preheader directly.
758  // For any other iteration, we replace the phi with the value generated by
759  // the immediately preceding clone of the loop body (which represents
760  // the previous iteration).
761  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
762  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
763  if (IterNumber == 0) {
764  VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
765  } else {
766  Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
767  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
768  if (LatchInst && L->contains(LatchInst))
769  VMap[&*I] = LVMap[LatchInst];
770  else
771  VMap[&*I] = LatchVal;
772  }
773  NewPHI->eraseFromParent();
774  }
775 
776  // Fix up the outgoing values - we need to add a value for the iteration
777  // we've just created. Note that this must happen *after* the incoming
778  // values are adjusted, since the value going out of the latch may also be
779  // a value coming into the header.
780  for (auto Edge : ExitEdges)
781  for (PHINode &PHI : Edge.second->phis()) {
782  Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
783  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
784  if (LatchInst && L->contains(LatchInst))
785  LatchVal = VMap[LatchVal];
786  PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
787  SE.forgetValue(&PHI);
788  }
789 
790  // LastValueMap is updated with the values for the current loop
791  // which are used the next time this function is called.
792  for (auto KV : VMap)
793  LVMap[KV.first] = KV.second;
794 }
795 
798  Optional<bool> UserAllowPeeling,
799  Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) {
801 
802  // Set the default values.
803  PP.PeelCount = 0;
804  PP.AllowPeeling = true;
805  PP.AllowLoopNestsPeeling = false;
806  PP.PeelProfiledIterations = true;
807 
808  // Get the target specifc values.
809  TTI.getPeelingPreferences(L, SE, PP);
810 
811  // User specified values using cl::opt.
812  if (UnrollingSpecficValues) {
813  if (UnrollPeelCount.getNumOccurrences() > 0)
819  }
820 
821  // User specifed values provided by argument.
822  if (UserAllowPeeling)
823  PP.AllowPeeling = *UserAllowPeeling;
824  if (UserAllowProfileBasedPeeling)
825  PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
826 
827  return PP;
828 }
829 
830 /// Peel off the first \p PeelCount iterations of loop \p L.
831 ///
832 /// Note that this does not peel them off as a single straight-line block.
833 /// Rather, each iteration is peeled off separately, and needs to check the
834 /// exit condition.
835 /// For loops that dynamically execute \p PeelCount iterations or less
836 /// this provides a benefit, since the peeled off iterations, which account
837 /// for the bulk of dynamic execution, can be further simplified by scalar
838 /// optimizations.
839 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
841  bool PreserveLCSSA) {
842  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
843  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
844 
845  LoopBlocksDFS LoopBlocks(L);
846  LoopBlocks.perform(LI);
847 
848  BasicBlock *Header = L->getHeader();
849  BasicBlock *PreHeader = L->getLoopPreheader();
850  BasicBlock *Latch = L->getLoopLatch();
852  L->getExitEdges(ExitEdges);
853 
854  // Remember dominators of blocks we might reach through exits to change them
855  // later. Immediate dominator of such block might change, because we add more
856  // routes which can lead to the exit: we can reach it from the peeled
857  // iterations too.
858  DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
859  for (auto *BB : L->blocks()) {
860  auto *BBDomNode = DT.getNode(BB);
861  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
862  for (auto *ChildDomNode : BBDomNode->children()) {
863  auto *ChildBB = ChildDomNode->getBlock();
864  if (!L->contains(ChildBB))
865  ChildrenToUpdate.push_back(ChildBB);
866  }
867  // The new idom of the block will be the nearest common dominator
868  // of all copies of the previous idom. This is equivalent to the
869  // nearest common dominator of the previous idom and the first latch,
870  // which dominates all copies of the previous idom.
871  BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
872  for (auto *ChildBB : ChildrenToUpdate)
873  NonLoopBlocksIDom[ChildBB] = NewIDom;
874  }
875 
876  Function *F = Header->getParent();
877 
878  // Set up all the necessary basic blocks. It is convenient to split the
879  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
880  // body, and a new preheader for the "real" loop.
881 
882  // Peeling the first iteration transforms.
883  //
884  // PreHeader:
885  // ...
886  // Header:
887  // LoopBody
888  // If (cond) goto Header
889  // Exit:
890  //
891  // into
892  //
893  // InsertTop:
894  // LoopBody
895  // If (!cond) goto Exit
896  // InsertBot:
897  // NewPreHeader:
898  // ...
899  // Header:
900  // LoopBody
901  // If (cond) goto Header
902  // Exit:
903  //
904  // Each following iteration will split the current bottom anchor in two,
905  // and put the new copy of the loop body between these two blocks. That is,
906  // after peeling another iteration from the example above, we'll split
907  // InsertBot, and get:
908  //
909  // InsertTop:
910  // LoopBody
911  // If (!cond) goto Exit
912  // InsertBot:
913  // LoopBody
914  // If (!cond) goto Exit
915  // InsertBot.next:
916  // NewPreHeader:
917  // ...
918  // Header:
919  // LoopBody
920  // If (cond) goto Header
921  // Exit:
922 
923  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
924  BasicBlock *InsertBot =
925  SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
926  BasicBlock *NewPreHeader =
927  SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
928 
929  InsertTop->setName(Header->getName() + ".peel.begin");
930  InsertBot->setName(Header->getName() + ".peel.next");
931  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
932 
933  ValueToValueMapTy LVMap;
934 
935  Instruction *LatchTerm =
936  cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
937 
938  // If we have branch weight information, we'll want to update it for the
939  // newly created branches.
941  initBranchWeights(Weights, L);
942 
943  // Identify what noalias metadata is inside the loop: if it is inside the
944  // loop, the associated metadata must be cloned for each iteration.
945  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
946  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
947 
948  // For each peeled-off iteration, make a copy of the loop.
949  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
951  ValueToValueMapTy VMap;
952 
953  cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
954  LoopBlocks, VMap, LVMap, &DT, LI,
955  LoopLocalNoAliasDeclScopes, *SE);
956 
957  // Remap to use values from the current iteration instead of the
958  // previous one.
959  remapInstructionsInBlocks(NewBlocks, VMap);
960 
961  // Update IDoms of the blocks reachable through exits.
962  if (Iter == 0)
963  for (auto BBIDom : NonLoopBlocksIDom)
964  DT.changeImmediateDominator(BBIDom.first,
965  cast<BasicBlock>(LVMap[BBIDom.second]));
966 #ifdef EXPENSIVE_CHECKS
968 #endif
969 
970  for (auto &[Term, Info] : Weights) {
971  auto *TermCopy = cast<Instruction>(VMap[Term]);
972  updateBranchWeights(TermCopy, Info);
973  }
974 
975  // Remove Loop metadata from the latch branch instruction
976  // because it is not the Loop's latch branch anymore.
977  auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
978  LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
979 
980  InsertTop = InsertBot;
981  InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
982  InsertBot->setName(Header->getName() + ".peel.next");
983 
984  F->getBasicBlockList().splice(InsertTop->getIterator(),
985  F->getBasicBlockList(),
986  NewBlocks[0]->getIterator(), F->end());
987  }
988 
989  // Now adjust the phi nodes in the loop header to get their initial values
990  // from the last peeled-off iteration instead of the preheader.
991  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
992  PHINode *PHI = cast<PHINode>(I);
993  Value *NewVal = PHI->getIncomingValueForBlock(Latch);
994  Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
995  if (LatchInst && L->contains(LatchInst))
996  NewVal = LVMap[LatchInst];
997 
998  PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
999  }
1000 
1001  for (const auto &[Term, Info] : Weights)
1003 
1004  // Update Metadata for count of peeled off iterations.
1005  unsigned AlreadyPeeled = 0;
1006  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
1007  AlreadyPeeled = *Peeled;
1008  addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
1009 
1010  if (Loop *ParentLoop = L->getParentLoop())
1011  L = ParentLoop;
1012 
1013  // We modified the loop, update SE.
1014  SE->forgetTopmostLoop(L);
1015 
1016 #ifdef EXPENSIVE_CHECKS
1017  // Finally DomtTree must be correct.
1019 #endif
1020 
1021  // FIXME: Incrementally update loop-simplify
1022  simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
1023 
1024  NumPeeled++;
1025 
1026  return true;
1027 }
UnrollPeelCount
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
llvm::BasicBlock::getTerminatingDeoptimizeCall
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:182
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
LoopSimplify.h
Optional.h
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
Loads.h
violatesLegacyMultiExitLoopCheck
static bool violatesLegacyMultiExitLoopCheck(Loop *L)
This "heuristic" exactly matches implicit behavior which used to exist inside getLoopEstimatedTripCou...
Definition: LoopPeel.cpp:436
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::computePeelCount
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:458
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
double
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in and only one load from a constant double
Definition: README-SSE.txt:85
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:535
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2263
UnrollPeelMaxCount
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
ScalarEvolution.h
DenseMap.h
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
updateBranchWeights
static void updateBranchWeights(Instruction *Term, WeightInfo &Info)
Update the branch weights of an exiting block of a peeled-off loop iteration.
Definition: LoopPeel.cpp:602
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::LoopBase::getExitEdges
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:164
llvm::Optional< unsigned >
llvm::BranchInst::getNumSuccessors
unsigned getNumSuccessors() const
Definition: Instructions.h:3225
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ScalarEvolution::evaluatePredicate
Optional< bool > evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
Definition: ScalarEvolution.cpp:10772
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::canPeel
bool canPeel(const Loop *L)
Definition: LoopPeel.cpp:84
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
WeightInfo
Definition: LoopPeel.cpp:582
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
PeeledCountMetaData
static const char * PeeledCountMetaData
Definition: LoopPeel.cpp:81
Instruction.h
CommandLine.h
WeightInfo::SubWeights
const SmallVector< uint32_t > SubWeights
Definition: LoopPeel.cpp:586
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:542
WeightInfo::Weights
SmallVector< uint32_t > Weights
Definition: LoopPeel.cpp:584
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
countToEliminateCompares
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
Definition: LoopPeel.cpp:329
InstrTypes.h
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:926
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::PPC::getSwappedPredicate
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Definition: PPCPredicates.cpp:52
UnrollForcePeelCount
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
llvm::cloneLoop
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1542
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2884
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:10757
llvm::Instruction
Definition: Instruction.h:42
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
MDBuilder.h
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:364
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:375
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:403
LoopUtils.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
PatternMatch.h
LoopIterator.h
LoopInfo.h
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4442
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::cloneAndAdaptNoAliasScopes
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
Definition: CloneFunction.cpp:1130
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:42
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::SmallPtrSetImpl::find
iterator find(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:386
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:81
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
llvm::pdb::Unknown
@ Unknown
Definition: PDBTypes.h:396
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:408
llvm::addStringMetadataToLoop
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:215
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
ProfDataUtils.h
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
I
#define I(x, y, z)
Definition: MD5.cpp:58
Cloning.h
llvm::SCEVNAryExpr::hasNoSelfWrap
bool hasNoSelfWrap() const
Definition: ScalarEvolutionExpressions.h:225
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:8432
llvm::ScalarEvolution::getMonotonicPredicateType
Optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
Definition: ScalarEvolution.cpp:10816
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1222
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:537
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:839
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:8428
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:533
llvm::getOptionalIntLoopAttribute
std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1089
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::isDereferenceablePointer
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:221
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1167
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:838
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:472
UnrollAllowLoopNestsPeeling
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
UnrollAllowPeeling
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
llvm::DomTreeNodeBase< BasicBlock >
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::LoopBase::getUniqueNonLatchExitBlocks
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:149
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:597
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
fixupBranchWeights
static void fixupBranchWeights(Instruction *Term, const WeightInfo &Info)
Update the weights of original exiting block after peeling off all iterations.
Definition: LoopPeel.cpp:659
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
initBranchWeights
static void initBranchWeights(DenseMap< Instruction *, WeightInfo > &WeightInfos, Loop *L)
Initialize the weights for all exiting blocks.
Definition: LoopPeel.cpp:614
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:708
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:290
llvm::MDBuilder
Definition: MDBuilder.h:36
ScalarEvolutionExpressions.h
Instructions.h
llvm::IsBlockFollowedByDeoptOrUnreachable
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
Definition: BasicBlockUtils.cpp:578
peelToTurnInvariantLoadsDerefencebale
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, DominatorTree &DT, AssumptionCache *AC)
Definition: LoopPeel.cpp:262
SmallVector.h
cloneLoopBlocks
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes, ScalarEvolution &SE)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition: LoopPeel.cpp:675
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
llvm::zip
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:879
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1394
Dominators.h
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:341
llvm::PHINode
Definition: Instructions.h:2699
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:399
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:403
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2493
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:242
llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition: ProfDataUtils.cpp:104
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
LLVMContext.h
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
raw_ostream.h
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
LoopPeel.h
BasicBlockUtils.h
DisableAdvancedPeeling
static cl::opt< bool > DisableAdvancedPeeling("disable-advanced-peeling", cl::init(false), cl::Hidden, cl::desc("Disable advance peeling. Issues for convergent targets (D134803)."))
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:917
llvm::getLoopEstimatedTripCount
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:812
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:796
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3227
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1053
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365