LLVM  14.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1 //===- LoopPeel.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loop Peeling Utilities.
10 //===----------------------------------------------------------------------===//
11 
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/Loads.h"
18 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
43 #include <algorithm>
44 #include <cassert>
45 #include <cstdint>
46 #include <limits>
47 
48 using namespace llvm;
49 using namespace llvm::PatternMatch;
50 
51 #define DEBUG_TYPE "loop-peel"
52 
53 STATISTIC(NumPeeled, "Number of loops peeled");
54 
56  "unroll-peel-count", cl::Hidden,
57  cl::desc("Set the unroll peeling count, for testing purposes"));
58 
59 static cl::opt<bool>
60  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
61  cl::desc("Allows loops to be peeled when the dynamic "
62  "trip count is known to be low."));
63 
64 static cl::opt<bool>
65  UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
66  cl::init(false), cl::Hidden,
67  cl::desc("Allows loop nests to be peeled."));
68 
70  "unroll-peel-max-count", cl::init(7), cl::Hidden,
71  cl::desc("Max average trip count which will cause loop peeling."));
72 
74  "unroll-force-peel-count", cl::init(0), cl::Hidden,
75  cl::desc("Force a peel count regardless of profiling information."));
76 
77 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
78 
79 // Designates that a Phi is estimated to become invariant after an "infinite"
80 // number of loop iterations (i.e. only may become an invariant if the loop is
81 // fully unrolled).
82 static const unsigned InfiniteIterationsToInvariance =
84 
85 // Check whether we are capable of peeling this loop.
86 bool llvm::canPeel(Loop *L) {
87  // Make sure the loop is in simplified form
88  if (!L->isLoopSimplifyForm())
89  return false;
90 
91  // Don't try to peel loops where the latch is not the exiting block.
92  // This can be an indication of two different things:
93  // 1) The loop is not rotated.
94  // 2) The loop contains irreducible control flow that involves the latch.
95  const BasicBlock *Latch = L->getLoopLatch();
96  if (!L->isLoopExiting(Latch))
97  return false;
98 
99  // Peeling is only supported if the latch is a branch.
100  if (!isa<BranchInst>(Latch->getTerminator()))
101  return false;
102 
104  L->getUniqueNonLatchExitBlocks(Exits);
105  // The latch must either be the only exiting block or all non-latch exit
106  // blocks have either a deopt or unreachable terminator. Both deopt and
107  // unreachable terminators are a strong indication they are not taken. Note
108  // that this is a profitability check, not a legality check. Also note that
109  // LoopPeeling currently can only update the branch weights of latch blocks
110  // and branch weights to blocks with deopt or unreachable do not need
111  // updating.
112  return all_of(Exits, [](const BasicBlock *BB) {
113  return BB->getTerminatingDeoptimizeCall() ||
114  isa<UnreachableInst>(BB->getTerminator());
115  });
116 }
117 
118 // This function calculates the number of iterations after which the given Phi
119 // becomes an invariant. The pre-calculated values are memorized in the map. The
120 // function (shortcut is I) is calculated according to the following definition:
121 // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
122 // If %y is a loop invariant, then I(%x) = 1.
123 // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
124 // Otherwise, I(%x) is infinite.
125 // TODO: Actually if %y is an expression that depends only on Phi %z and some
126 // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
127 // looks like:
128 // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
129 // %y = phi(0, 5),
130 // %a = %y + 1.
132  PHINode *Phi, Loop *L, BasicBlock *BackEdge,
133  SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
134  assert(Phi->getParent() == L->getHeader() &&
135  "Non-loop Phi should not be checked for turning into invariant.");
136  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
137  // If we already know the answer, take it from the map.
138  auto I = IterationsToInvariance.find(Phi);
139  if (I != IterationsToInvariance.end())
140  return I->second;
141 
142  // Otherwise we need to analyze the input from the back edge.
143  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
144  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
145  // cycles can never stop on an invariant.
146  IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
147  unsigned ToInvariance = InfiniteIterationsToInvariance;
148 
149  if (L->isLoopInvariant(Input))
150  ToInvariance = 1u;
151  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
152  // Only consider Phis in header block.
153  if (IncPhi->getParent() != L->getHeader())
155  // If the input becomes an invariant after X iterations, then our Phi
156  // becomes an invariant after X + 1 iterations.
157  unsigned InputToInvariance = calculateIterationsToInvariance(
158  IncPhi, L, BackEdge, IterationsToInvariance);
159  if (InputToInvariance != InfiniteIterationsToInvariance)
160  ToInvariance = InputToInvariance + 1u;
161  }
162 
163  // If we found that this Phi lies in an invariant chain, update the map.
164  if (ToInvariance != InfiniteIterationsToInvariance)
165  IterationsToInvariance[Phi] = ToInvariance;
166  return ToInvariance;
167 }
168 
169 // Try to find any invariant memory reads that will become dereferenceable in
170 // the remainder loop after peeling. The load must also be used (transitively)
171 // by an exit condition. Returns the number of iterations to peel off (at the
172 // moment either 0 or 1).
174  DominatorTree &DT) {
175  // Skip loops with a single exiting block, because there should be no benefit
176  // for the heuristic below.
177  if (L.getExitingBlock())
178  return 0;
179 
180  // All non-latch exit blocks must have an UnreachableInst terminator.
181  // Otherwise the heuristic below may not be profitable.
184  if (any_of(Exits, [](const BasicBlock *BB) {
185  return !isa<UnreachableInst>(BB->getTerminator());
186  }))
187  return 0;
188 
189  // Now look for invariant loads that dominate the latch and are not known to
190  // be dereferenceable. If there are such loads and no writes, they will become
191  // dereferenceable in the loop if the first iteration is peeled off. Also
192  // collect the set of instructions controlled by such loads. Only peel if an
193  // exit condition uses (transitively) such a load.
194  BasicBlock *Header = L.getHeader();
195  BasicBlock *Latch = L.getLoopLatch();
196  SmallPtrSet<Value *, 8> LoadUsers;
197  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
198  for (BasicBlock *BB : L.blocks()) {
199  for (Instruction &I : *BB) {
200  if (I.mayWriteToMemory())
201  return 0;
202 
203  auto Iter = LoadUsers.find(&I);
204  if (Iter != LoadUsers.end()) {
205  for (Value *U : I.users())
206  LoadUsers.insert(U);
207  }
208  // Do not look for reads in the header; they can already be hoisted
209  // without peeling.
210  if (BB == Header)
211  continue;
212  if (auto *LI = dyn_cast<LoadInst>(&I)) {
213  Value *Ptr = LI->getPointerOperand();
214  if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
215  !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, &DT))
216  for (Value *U : I.users())
217  LoadUsers.insert(U);
218  }
219  }
220  }
221  SmallVector<BasicBlock *> ExitingBlocks;
222  L.getExitingBlocks(ExitingBlocks);
223  if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
224  return LoadUsers.contains(Exiting->getTerminator());
225  }))
226  return 1;
227  return 0;
228 }
229 
230 // Return the number of iterations to peel off that make conditions in the
231 // body true/false. For example, if we peel 2 iterations off the loop below,
232 // the condition i < 2 can be evaluated at compile time.
233 // for (i = 0; i < n; i++)
234 // if (i < 2)
235 // ..
236 // else
237 // ..
238 // }
239 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
240  ScalarEvolution &SE) {
241  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
242  unsigned DesiredPeelCount = 0;
243 
244  for (auto *BB : L.blocks()) {
245  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
246  if (!BI || BI->isUnconditional())
247  continue;
248 
249  // Ignore loop exit condition.
250  if (L.getLoopLatch() == BB)
251  continue;
252 
253  Value *Condition = BI->getCondition();
254  Value *LeftVal, *RightVal;
255  CmpInst::Predicate Pred;
256  if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
257  continue;
258 
259  const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
260  const SCEV *RightSCEV = SE.getSCEV(RightVal);
261 
262  // Do not consider predicates that are known to be true or false
263  // independently of the loop iteration.
264  if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
265  continue;
266 
267  // Check if we have a condition with one AddRec and one non AddRec
268  // expression. Normalize LeftSCEV to be the AddRec.
269  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
270  if (isa<SCEVAddRecExpr>(RightSCEV)) {
271  std::swap(LeftSCEV, RightSCEV);
272  Pred = ICmpInst::getSwappedPredicate(Pred);
273  } else
274  continue;
275  }
276 
277  const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
278 
279  // Avoid huge SCEV computations in the loop below, make sure we only
280  // consider AddRecs of the loop we are trying to peel.
281  if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
282  continue;
283  if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
284  !SE.getMonotonicPredicateType(LeftAR, Pred))
285  continue;
286 
287  // Check if extending the current DesiredPeelCount lets us evaluate Pred
288  // or !Pred in the loop body statically.
289  unsigned NewPeelCount = DesiredPeelCount;
290 
291  const SCEV *IterVal = LeftAR->evaluateAtIteration(
292  SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
293 
294  // If the original condition is not known, get the negated predicate
295  // (which holds on the else branch) and check if it is known. This allows
296  // us to peel of iterations that make the original condition false.
297  if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
298  Pred = ICmpInst::getInversePredicate(Pred);
299 
300  const SCEV *Step = LeftAR->getStepRecurrence(SE);
301  const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
302  auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
303  &NewPeelCount]() {
304  IterVal = NextIterVal;
305  NextIterVal = SE.getAddExpr(IterVal, Step);
306  NewPeelCount++;
307  };
308 
309  auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
310  return NewPeelCount < MaxPeelCount;
311  };
312 
313  while (CanPeelOneMoreIteration() &&
314  SE.isKnownPredicate(Pred, IterVal, RightSCEV))
315  PeelOneMoreIteration();
316 
317  // With *that* peel count, does the predicate !Pred become known in the
318  // first iteration of the loop body after peeling?
319  if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
320  RightSCEV))
321  continue; // If not, give up.
322 
323  // However, for equality comparisons, that isn't always sufficient to
324  // eliminate the comparsion in loop body, we may need to peel one more
325  // iteration. See if that makes !Pred become unknown again.
326  if (ICmpInst::isEquality(Pred) &&
327  !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
328  RightSCEV) &&
329  !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
330  SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
331  if (!CanPeelOneMoreIteration())
332  continue; // Need to peel one more iteration, but can't. Give up.
333  PeelOneMoreIteration(); // Great!
334  }
335 
336  DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
337  }
338 
339  return DesiredPeelCount;
340 }
341 
342 // Return the number of iterations we want to peel off.
343 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
345  unsigned &TripCount, DominatorTree &DT,
346  ScalarEvolution &SE, unsigned Threshold) {
347  assert(LoopSize > 0 && "Zero loop size is not allowed!");
348  // Save the PP.PeelCount value set by the target in
349  // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
350  unsigned TargetPeelCount = PP.PeelCount;
351  PP.PeelCount = 0;
352  if (!canPeel(L))
353  return;
354 
355  // Only try to peel innermost loops by default.
356  // The constraint can be relaxed by the target in TTI.getUnrollingPreferences
357  // or by the flag -unroll-allow-loop-nests-peeling.
358  if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
359  return;
360 
361  // If the user provided a peel count, use that.
362  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
363  if (UserPeelCount) {
364  LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
365  << " iterations.\n");
367  PP.PeelProfiledIterations = true;
368  return;
369  }
370 
371  // Skip peeling if it's disabled.
372  if (!PP.AllowPeeling)
373  return;
374 
375  unsigned AlreadyPeeled = 0;
376  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
377  AlreadyPeeled = *Peeled;
378  // Stop if we already peeled off the maximum number of iterations.
379  if (AlreadyPeeled >= UnrollPeelMaxCount)
380  return;
381 
382  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
383  // iterations of the loop. For this we compute the number for iterations after
384  // which every Phi is guaranteed to become an invariant, and try to peel the
385  // maximum number of iterations among these values, thus turning all those
386  // Phis into invariants.
387  // First, check that we can peel at least one iteration.
388  if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
389  // Store the pre-calculated values here.
390  SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
391  // Now go through all Phis to calculate their the number of iterations they
392  // need to become invariants.
393  // Start the max computation with the UP.PeelCount value set by the target
394  // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
395  unsigned DesiredPeelCount = TargetPeelCount;
396  BasicBlock *BackEdge = L->getLoopLatch();
397  assert(BackEdge && "Loop is not in simplified form?");
398  for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
399  PHINode *Phi = cast<PHINode>(&*BI);
400  unsigned ToInvariance = calculateIterationsToInvariance(
401  Phi, L, BackEdge, IterationsToInvariance);
402  if (ToInvariance != InfiniteIterationsToInvariance)
403  DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
404  }
405 
406  // Pay respect to limitations implied by loop size and the max peel count.
407  unsigned MaxPeelCount = UnrollPeelMaxCount;
408  MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
409 
410  DesiredPeelCount = std::max(DesiredPeelCount,
411  countToEliminateCompares(*L, MaxPeelCount, SE));
412 
413  if (DesiredPeelCount == 0)
414  DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
415 
416  if (DesiredPeelCount > 0) {
417  DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
418  // Consider max peel count limitation.
419  assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
420  if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
421  LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
422  << " iteration(s) to turn"
423  << " some Phis into invariants.\n");
424  PP.PeelCount = DesiredPeelCount;
425  PP.PeelProfiledIterations = false;
426  return;
427  }
428  }
429  }
430 
431  // Bail if we know the statically calculated trip count.
432  // In this case we rather prefer partial unrolling.
433  if (TripCount)
434  return;
435 
436  // Do not apply profile base peeling if it is disabled.
437  if (!PP.PeelProfiledIterations)
438  return;
439  // If we don't know the trip count, but have reason to believe the average
440  // trip count is low, peeling should be beneficial, since we will usually
441  // hit the peeled section.
442  // We only do this in the presence of profile information, since otherwise
443  // our estimates of the trip count are not reliable enough.
444  if (L->getHeader()->getParent()->hasProfileData()) {
446  if (!PeelCount)
447  return;
448 
449  LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
450  << "\n");
451 
452  if (*PeelCount) {
453  if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
454  (LoopSize * (*PeelCount + 1) <= Threshold)) {
455  LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
456  << " iterations.\n");
457  PP.PeelCount = *PeelCount;
458  return;
459  }
460  LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
461  LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
462  LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
463  LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
464  << "\n");
465  LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
466  }
467  }
468 }
469 
470 /// Update the branch weights of the latch of a peeled-off loop
471 /// iteration.
472 /// This sets the branch weights for the latch of the recently peeled off loop
473 /// iteration correctly.
474 /// Let F is a weight of the edge from latch to header.
475 /// Let E is a weight of the edge from latch to exit.
476 /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
477 /// go to exit.
478 /// Then, Estimated TripCount = F / E.
479 /// For I-th (counting from 0) peeled off iteration we set the the weights for
480 /// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
481 /// The probability to go to exit 1/(TC-I) increases. At the same time
482 /// the estimated trip count of remaining loop reduces by I.
483 /// To avoid dealing with division rounding we can just multiple both part
484 /// of weights to E and use weight as (F - I * E, E).
485 ///
486 /// \param Header The copy of the header block that belongs to next iteration.
487 /// \param LatchBR The copy of the latch branch that belongs to this iteration.
488 /// \param[in,out] FallThroughWeight The weight of the edge from latch to
489 /// header before peeling (in) and after peeled off one iteration (out).
490 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
491  uint64_t ExitWeight,
492  uint64_t &FallThroughWeight) {
493  // FallThroughWeight is 0 means that there is no branch weights on original
494  // latch block or estimated trip count is zero.
495  if (!FallThroughWeight)
496  return;
497 
498  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
499  MDBuilder MDB(LatchBR->getContext());
500  MDNode *WeightNode =
501  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
502  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
503  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
504  FallThroughWeight =
505  FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
506 }
507 
508 /// Initialize the weights.
509 ///
510 /// \param Header The header block.
511 /// \param LatchBR The latch branch.
512 /// \param[out] ExitWeight The weight of the edge from Latch to Exit.
513 /// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
514 static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
515  uint64_t &ExitWeight,
516  uint64_t &FallThroughWeight) {
517  uint64_t TrueWeight, FalseWeight;
518  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
519  return;
520  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
521  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
522  FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
523 }
524 
525 /// Update the weights of original Latch block after peeling off all iterations.
526 ///
527 /// \param Header The header block.
528 /// \param LatchBR The latch branch.
529 /// \param ExitWeight The weight of the edge from Latch to Exit.
530 /// \param FallThroughWeight The weight of the edge from Latch to Header.
531 static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
532  uint64_t ExitWeight,
533  uint64_t FallThroughWeight) {
534  // FallThroughWeight is 0 means that there is no branch weights on original
535  // latch block or estimated trip count is zero.
536  if (!FallThroughWeight)
537  return;
538 
539  // Sets the branch weights on the loop exit.
540  MDBuilder MDB(LatchBR->getContext());
541  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
542  MDNode *WeightNode =
543  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
544  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
545  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
546 }
547 
548 /// Clones the body of the loop L, putting it between \p InsertTop and \p
549 /// InsertBot.
550 /// \param IterNumber The serial number of the iteration currently being
551 /// peeled off.
552 /// \param ExitEdges The exit edges of the original loop.
553 /// \param[out] NewBlocks A list of the blocks in the newly created clone
554 /// \param[out] VMap The value map between the loop and the new clone.
555 /// \param LoopBlocks A helper for DFS-traversal of the loop.
556 /// \param LVMap A value-map that maps instructions from the original loop to
557 /// instructions in the last peeled-off iteration.
558 static void cloneLoopBlocks(
559  Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
560  SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
561  SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
563  LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
564  BasicBlock *Header = L->getHeader();
565  BasicBlock *Latch = L->getLoopLatch();
566  BasicBlock *PreHeader = L->getLoopPreheader();
567 
568  Function *F = Header->getParent();
569  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
570  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
571  Loop *ParentLoop = L->getParentLoop();
572 
573  // For each block in the original loop, create a new copy,
574  // and update the value map with the newly created values.
575  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
576  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
577  NewBlocks.push_back(NewBB);
578 
579  // If an original block is an immediate child of the loop L, its copy
580  // is a child of a ParentLoop after peeling. If a block is a child of
581  // a nested loop, it is handled in the cloneLoop() call below.
582  if (ParentLoop && LI->getLoopFor(*BB) == L)
583  ParentLoop->addBasicBlockToLoop(NewBB, *LI);
584 
585  VMap[*BB] = NewBB;
586 
587  // If dominator tree is available, insert nodes to represent cloned blocks.
588  if (DT) {
589  if (Header == *BB)
590  DT->addNewBlock(NewBB, InsertTop);
591  else {
592  DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
593  // VMap must contain entry for IDom, as the iteration order is RPO.
594  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
595  }
596  }
597  }
598 
599  {
600  // Identify what other metadata depends on the cloned version. After
601  // cloning, replace the metadata with the corrected version for both
602  // memory instructions and noalias intrinsics.
603  std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
604  cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
605  Header->getContext(), Ext);
606  }
607 
608  // Recursively create the new Loop objects for nested loops, if any,
609  // to preserve LoopInfo.
610  for (Loop *ChildLoop : *L) {
611  cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
612  }
613 
614  // Hook-up the control flow for the newly inserted blocks.
615  // The new header is hooked up directly to the "top", which is either
616  // the original loop preheader (for the first iteration) or the previous
617  // iteration's exiting block (for every other iteration)
618  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
619 
620  // Similarly, for the latch:
621  // The original exiting edge is still hooked up to the loop exit.
622  // The backedge now goes to the "bottom", which is either the loop's real
623  // header (for the last peeled iteration) or the copied header of the next
624  // iteration (for every other iteration)
625  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
626  BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
627  for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
628  if (LatchBR->getSuccessor(idx) == Header) {
629  LatchBR->setSuccessor(idx, InsertBot);
630  break;
631  }
632  if (DT)
633  DT->changeImmediateDominator(InsertBot, NewLatch);
634 
635  // The new copy of the loop body starts with a bunch of PHI nodes
636  // that pick an incoming value from either the preheader, or the previous
637  // loop iteration. Since this copy is no longer part of the loop, we
638  // resolve this statically:
639  // For the first iteration, we use the value from the preheader directly.
640  // For any other iteration, we replace the phi with the value generated by
641  // the immediately preceding clone of the loop body (which represents
642  // the previous iteration).
643  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
644  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
645  if (IterNumber == 0) {
646  VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
647  } else {
648  Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
649  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
650  if (LatchInst && L->contains(LatchInst))
651  VMap[&*I] = LVMap[LatchInst];
652  else
653  VMap[&*I] = LatchVal;
654  }
655  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
656  }
657 
658  // Fix up the outgoing values - we need to add a value for the iteration
659  // we've just created. Note that this must happen *after* the incoming
660  // values are adjusted, since the value going out of the latch may also be
661  // a value coming into the header.
662  for (auto Edge : ExitEdges)
663  for (PHINode &PHI : Edge.second->phis()) {
664  Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
665  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
666  if (LatchInst && L->contains(LatchInst))
667  LatchVal = VMap[LatchVal];
668  PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
669  }
670 
671  // LastValueMap is updated with the values for the current loop
672  // which are used the next time this function is called.
673  for (auto KV : VMap)
674  LVMap[KV.first] = KV.second;
675 }
676 
679  Optional<bool> UserAllowPeeling,
680  Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) {
682 
683  // Set the default values.
684  PP.PeelCount = 0;
685  PP.AllowPeeling = true;
686  PP.AllowLoopNestsPeeling = false;
687  PP.PeelProfiledIterations = true;
688 
689  // Get the target specifc values.
690  TTI.getPeelingPreferences(L, SE, PP);
691 
692  // User specified values using cl::opt.
693  if (UnrollingSpecficValues) {
694  if (UnrollPeelCount.getNumOccurrences() > 0)
700  }
701 
702  // User specifed values provided by argument.
703  if (UserAllowPeeling.hasValue())
704  PP.AllowPeeling = *UserAllowPeeling;
705  if (UserAllowProfileBasedPeeling.hasValue())
706  PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
707 
708  return PP;
709 }
710 
711 /// Peel off the first \p PeelCount iterations of loop \p L.
712 ///
713 /// Note that this does not peel them off as a single straight-line block.
714 /// Rather, each iteration is peeled off separately, and needs to check the
715 /// exit condition.
716 /// For loops that dynamically execute \p PeelCount iterations or less
717 /// this provides a benefit, since the peeled off iterations, which account
718 /// for the bulk of dynamic execution, can be further simplified by scalar
719 /// optimizations.
720 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
722  bool PreserveLCSSA) {
723  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
724  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
725 
726  LoopBlocksDFS LoopBlocks(L);
727  LoopBlocks.perform(LI);
728 
729  BasicBlock *Header = L->getHeader();
730  BasicBlock *PreHeader = L->getLoopPreheader();
731  BasicBlock *Latch = L->getLoopLatch();
733  L->getExitEdges(ExitEdges);
734 
736  if (DT) {
737  // We'd like to determine the idom of exit block after peeling one
738  // iteration.
739  // Let Exit is exit block.
740  // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
741  // blocks.
742  // Let Latch' and ExitingSet' are copies after a peeling.
743  // We'd like to find an idom'(Exit) - idom of Exit after peeling.
744  // It is an evident that idom'(Exit) will be the nearest common dominator
745  // of ExitingSet and ExitingSet'.
746  // idom(Exit) is a nearest common dominator of ExitingSet.
747  // idom(Exit)' is a nearest common dominator of ExitingSet'.
748  // Taking into account that we have a single Latch, Latch' will dominate
749  // Header and idom(Exit).
750  // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
751  // All these basic blocks are in the same loop, so what we find is
752  // (nearest common dominator of idom(Exit) and Latch)'.
753  // In the loop below we remember nearest common dominator of idom(Exit) and
754  // Latch to update idom of Exit later.
755  assert(L->hasDedicatedExits() && "No dedicated exits?");
756  for (auto Edge : ExitEdges) {
757  if (ExitIDom.count(Edge.second))
758  continue;
760  DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
761  assert(L->contains(BB) && "IDom is not in a loop");
762  ExitIDom[Edge.second] = BB;
763  }
764  }
765 
766  Function *F = Header->getParent();
767 
768  // Set up all the necessary basic blocks. It is convenient to split the
769  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
770  // body, and a new preheader for the "real" loop.
771 
772  // Peeling the first iteration transforms.
773  //
774  // PreHeader:
775  // ...
776  // Header:
777  // LoopBody
778  // If (cond) goto Header
779  // Exit:
780  //
781  // into
782  //
783  // InsertTop:
784  // LoopBody
785  // If (!cond) goto Exit
786  // InsertBot:
787  // NewPreHeader:
788  // ...
789  // Header:
790  // LoopBody
791  // If (cond) goto Header
792  // Exit:
793  //
794  // Each following iteration will split the current bottom anchor in two,
795  // and put the new copy of the loop body between these two blocks. That is,
796  // after peeling another iteration from the example above, we'll split
797  // InsertBot, and get:
798  //
799  // InsertTop:
800  // LoopBody
801  // If (!cond) goto Exit
802  // InsertBot:
803  // LoopBody
804  // If (!cond) goto Exit
805  // InsertBot.next:
806  // NewPreHeader:
807  // ...
808  // Header:
809  // LoopBody
810  // If (cond) goto Header
811  // Exit:
812 
813  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
814  BasicBlock *InsertBot =
815  SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
816  BasicBlock *NewPreHeader =
817  SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
818 
819  InsertTop->setName(Header->getName() + ".peel.begin");
820  InsertBot->setName(Header->getName() + ".peel.next");
821  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
822 
823  ValueToValueMapTy LVMap;
824 
825  // If we have branch weight information, we'll want to update it for the
826  // newly created branches.
827  BranchInst *LatchBR =
828  cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
829  uint64_t ExitWeight = 0, FallThroughWeight = 0;
830  initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
831 
832  // Identify what noalias metadata is inside the loop: if it is inside the
833  // loop, the associated metadata must be cloned for each iteration.
834  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
835  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
836 
837  // For each peeled-off iteration, make a copy of the loop.
838  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
840  ValueToValueMapTy VMap;
841 
842  cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
843  LoopBlocks, VMap, LVMap, DT, LI,
844  LoopLocalNoAliasDeclScopes);
845 
846  // Remap to use values from the current iteration instead of the
847  // previous one.
848  remapInstructionsInBlocks(NewBlocks, VMap);
849 
850  if (DT) {
851  // Latches of the cloned loops dominate over the loop exit, so idom of the
852  // latter is the first cloned loop body, as original PreHeader dominates
853  // the original loop body.
854  if (Iter == 0)
855  for (auto Exit : ExitIDom)
856  DT->changeImmediateDominator(Exit.first,
857  cast<BasicBlock>(LVMap[Exit.second]));
858 #ifdef EXPENSIVE_CHECKS
860 #endif
861  }
862 
863  auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
864  updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
865  // Remove Loop metadata from the latch branch instruction
866  // because it is not the Loop's latch branch anymore.
867  LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
868 
869  InsertTop = InsertBot;
870  InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
871  InsertBot->setName(Header->getName() + ".peel.next");
872 
873  F->getBasicBlockList().splice(InsertTop->getIterator(),
874  F->getBasicBlockList(),
875  NewBlocks[0]->getIterator(), F->end());
876  }
877 
878  // Now adjust the phi nodes in the loop header to get their initial values
879  // from the last peeled-off iteration instead of the preheader.
880  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
881  PHINode *PHI = cast<PHINode>(I);
882  Value *NewVal = PHI->getIncomingValueForBlock(Latch);
883  Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
884  if (LatchInst && L->contains(LatchInst))
885  NewVal = LVMap[LatchInst];
886 
887  PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
888  }
889 
890  fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
891 
892  // Update Metadata for count of peeled off iterations.
893  unsigned AlreadyPeeled = 0;
894  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
895  AlreadyPeeled = *Peeled;
896  addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
897 
898  if (Loop *ParentLoop = L->getParentLoop())
899  L = ParentLoop;
900 
901  // We modified the loop, update SE.
902  SE->forgetTopmostLoop(L);
903 
904  // Finally DomtTree must be correct.
906 
907  // FIXME: Incrementally update loop-simplify
908  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
909 
910  NumPeeled++;
911 
912  return true;
913 }
UnrollPeelCount
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
fixupBranchWeights
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t FallThroughWeight)
Update the weights of original Latch block after peeling off all iterations.
Definition: LoopPeel.cpp:531
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:836
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
LoopSimplify.h
InfiniteIterationsToInvariance
static const unsigned InfiniteIterationsToInvariance
Definition: LoopPeel.cpp:82
Optional.h
initBranchWeights
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t &ExitWeight, uint64_t &FallThroughWeight)
Initialize the weights.
Definition: LoopPeel.cpp:514
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:720
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:379
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
Loads.h
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:541
UnrollPeelMaxCount
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::computePeelCount
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned &TripCount, DominatorTree &DT, ScalarEvolution &SE, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:343
calculateIterationsToInvariance
static unsigned calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, unsigned > &IterationsToInvariance)
Definition: LoopPeel.cpp:131
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:820
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ScalarEvolution.h
DenseMap.h
llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1298
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::LoopBase::getExitEdges
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:148
llvm::Optional< unsigned >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::BranchInst::getNumSuccessors
unsigned getNumSuccessors() const
Definition: Instructions.h:3159
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ScalarEvolution::evaluatePredicate
Optional< bool > evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
Definition: ScalarEvolution.cpp:9988
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
peelToTurnInvariantLoadsDerefencebale
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, DominatorTree &DT)
Definition: LoopPeel.cpp:173
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:105
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3166
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::canPeel
bool canPeel(Loop *L)
Definition: LoopPeel.cpp:86
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
PeeledCountMetaData
static const char * PeeledCountMetaData
Definition: LoopPeel.cpp:77
Instruction.h
CommandLine.h
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:548
llvm::getOptionalIntLoopAttribute
llvm::Optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1091
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
countToEliminateCompares
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
Definition: LoopPeel.cpp:239
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:787
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
UnrollForcePeelCount
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
llvm::cloneLoop
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1495
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2818
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:9973
llvm::Instruction
Definition: Instruction.h:45
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
llvm::isDereferenceablePointer
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:234
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
LoopUtils.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
PatternMatch.h
llvm::Instruction::extractProfMetadata
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1405
LoopIterator.h
LoopInfo.h
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4090
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::cloneAndAdaptNoAliasScopes
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
Definition: CloneFunction.cpp:991
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::SmallPtrSetImpl::find
iterator find(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:385
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
uint64_t
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::addStringMetadataToLoop
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:222
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::SCEVNAryExpr::hasNoSelfWrap
bool hasNoSelfWrap() const
Definition: ScalarEvolutionExpressions.h:225
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::ScalarEvolution::getMonotonicPredicateType
Optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
Definition: ScalarEvolution.cpp:10032
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1220
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:543
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::LoopBase::hasDedicatedExits
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:92
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::PHINode::setIncomingValueForBlock
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
Definition: Instructions.h:2825
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7579
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:539
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1028
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:789
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:446
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
UnrollAllowLoopNestsPeeling
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:990
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
UnrollAllowPeeling
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
llvm::DomTreeNodeBase< BasicBlock >
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::LoopBase::getUniqueNonLatchExitBlocks
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:129
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:493
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:364
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
updateBranchWeights
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t &FallThroughWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
Definition: LoopPeel.cpp:490
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:719
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:303
llvm::MDBuilder
Definition: MDBuilder.h:35
ScalarEvolutionExpressions.h
Instructions.h
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1404
Dominators.h
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:326
llvm::PHINode
Definition: Instructions.h:2633
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2419
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
LLVMContext.h
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
raw_ostream.h
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
LoopPeel.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:814
cloneLoopBlocks
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition: LoopPeel.cpp:558
llvm::getLoopEstimatedTripCount
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:807
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:677
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3161
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:370
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1025
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364