LLVM  15.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1 //===- LoopPeel.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loop Peeling Utilities.
10 //===----------------------------------------------------------------------===//
11 
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/Loads.h"
18 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/Debug.h"
41 #include <algorithm>
42 #include <cassert>
43 #include <cstdint>
44 
45 using namespace llvm;
46 using namespace llvm::PatternMatch;
47 
48 #define DEBUG_TYPE "loop-peel"
49 
50 STATISTIC(NumPeeled, "Number of loops peeled");
51 
53  "unroll-peel-count", cl::Hidden,
54  cl::desc("Set the unroll peeling count, for testing purposes"));
55 
56 static cl::opt<bool>
57  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
58  cl::desc("Allows loops to be peeled when the dynamic "
59  "trip count is known to be low."));
60 
61 static cl::opt<bool>
62  UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
63  cl::init(false), cl::Hidden,
64  cl::desc("Allows loop nests to be peeled."));
65 
67  "unroll-peel-max-count", cl::init(7), cl::Hidden,
68  cl::desc("Max average trip count which will cause loop peeling."));
69 
71  "unroll-force-peel-count", cl::init(0), cl::Hidden,
72  cl::desc("Force a peel count regardless of profiling information."));
73 
74 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
75 
76 // Check whether we are capable of peeling this loop.
77 bool llvm::canPeel(Loop *L) {
78  // Make sure the loop is in simplified form
79  if (!L->isLoopSimplifyForm())
80  return false;
81 
82  // Don't try to peel loops where the latch is not the exiting block.
83  // This can be an indication of two different things:
84  // 1) The loop is not rotated.
85  // 2) The loop contains irreducible control flow that involves the latch.
86  const BasicBlock *Latch = L->getLoopLatch();
87  if (!L->isLoopExiting(Latch))
88  return false;
89 
90  // Peeling is only supported if the latch is a branch.
91  if (!isa<BranchInst>(Latch->getTerminator()))
92  return false;
93 
96  // The latch must either be the only exiting block or all non-latch exit
97  // blocks have either a deopt or unreachable terminator or compose a chain of
98  // blocks where the last one is either deopt or unreachable terminated. Both
99  // deopt and unreachable terminators are a strong indication they are not
100  // taken. Note that this is a profitability check, not a legality check. Also
101  // note that LoopPeeling currently can only update the branch weights of latch
102  // blocks and branch weights to blocks with deopt or unreachable do not need
103  // updating.
105 }
106 
107 // This function calculates the number of iterations after which the given Phi
108 // becomes an invariant. The pre-calculated values are memorized in the map. The
109 // function (shortcut is I) is calculated according to the following definition:
110 // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
111 // If %y is a loop invariant, then I(%x) = 1.
112 // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
113 // Otherwise, I(%x) is infinite.
114 // TODO: Actually if %y is an expression that depends only on Phi %z and some
115 // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
116 // looks like:
117 // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
118 // %y = phi(0, 5),
119 // %a = %y + 1.
121  PHINode *Phi, Loop *L, BasicBlock *BackEdge,
122  SmallDenseMap<PHINode *, Optional<unsigned> > &IterationsToInvariance) {
123  assert(Phi->getParent() == L->getHeader() &&
124  "Non-loop Phi should not be checked for turning into invariant.");
125  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
126  // If we already know the answer, take it from the map.
127  auto I = IterationsToInvariance.find(Phi);
128  if (I != IterationsToInvariance.end())
129  return I->second;
130 
131  // Otherwise we need to analyze the input from the back edge.
132  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
133  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
134  // cycles can never stop on an invariant.
135  IterationsToInvariance[Phi] = None;
136  Optional<unsigned> ToInvariance = None;
137 
138  if (L->isLoopInvariant(Input))
139  ToInvariance = 1u;
140  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
141  // Only consider Phis in header block.
142  if (IncPhi->getParent() != L->getHeader())
143  return None;
144  // If the input becomes an invariant after X iterations, then our Phi
145  // becomes an invariant after X + 1 iterations.
146  auto InputToInvariance = calculateIterationsToInvariance(
147  IncPhi, L, BackEdge, IterationsToInvariance);
148  if (InputToInvariance)
149  ToInvariance = *InputToInvariance + 1u;
150  }
151 
152  // If we found that this Phi lies in an invariant chain, update the map.
153  if (ToInvariance)
154  IterationsToInvariance[Phi] = ToInvariance;
155  return ToInvariance;
156 }
157 
158 // Try to find any invariant memory reads that will become dereferenceable in
159 // the remainder loop after peeling. The load must also be used (transitively)
160 // by an exit condition. Returns the number of iterations to peel off (at the
161 // moment either 0 or 1).
163  DominatorTree &DT) {
164  // Skip loops with a single exiting block, because there should be no benefit
165  // for the heuristic below.
166  if (L.getExitingBlock())
167  return 0;
168 
169  // All non-latch exit blocks must have an UnreachableInst terminator.
170  // Otherwise the heuristic below may not be profitable.
173  if (any_of(Exits, [](const BasicBlock *BB) {
174  return !isa<UnreachableInst>(BB->getTerminator());
175  }))
176  return 0;
177 
178  // Now look for invariant loads that dominate the latch and are not known to
179  // be dereferenceable. If there are such loads and no writes, they will become
180  // dereferenceable in the loop if the first iteration is peeled off. Also
181  // collect the set of instructions controlled by such loads. Only peel if an
182  // exit condition uses (transitively) such a load.
183  BasicBlock *Header = L.getHeader();
184  BasicBlock *Latch = L.getLoopLatch();
185  SmallPtrSet<Value *, 8> LoadUsers;
186  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
187  for (BasicBlock *BB : L.blocks()) {
188  for (Instruction &I : *BB) {
189  if (I.mayWriteToMemory())
190  return 0;
191 
192  auto Iter = LoadUsers.find(&I);
193  if (Iter != LoadUsers.end()) {
194  for (Value *U : I.users())
195  LoadUsers.insert(U);
196  }
197  // Do not look for reads in the header; they can already be hoisted
198  // without peeling.
199  if (BB == Header)
200  continue;
201  if (auto *LI = dyn_cast<LoadInst>(&I)) {
202  Value *Ptr = LI->getPointerOperand();
203  if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
204  !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, &DT))
205  for (Value *U : I.users())
206  LoadUsers.insert(U);
207  }
208  }
209  }
210  SmallVector<BasicBlock *> ExitingBlocks;
211  L.getExitingBlocks(ExitingBlocks);
212  if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
213  return LoadUsers.contains(Exiting->getTerminator());
214  }))
215  return 1;
216  return 0;
217 }
218 
219 // Return the number of iterations to peel off that make conditions in the
220 // body true/false. For example, if we peel 2 iterations off the loop below,
221 // the condition i < 2 can be evaluated at compile time.
222 // for (i = 0; i < n; i++)
223 // if (i < 2)
224 // ..
225 // else
226 // ..
227 // }
228 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
229  ScalarEvolution &SE) {
230  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
231  unsigned DesiredPeelCount = 0;
232 
233  for (auto *BB : L.blocks()) {
234  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
235  if (!BI || BI->isUnconditional())
236  continue;
237 
238  // Ignore loop exit condition.
239  if (L.getLoopLatch() == BB)
240  continue;
241 
242  Value *Condition = BI->getCondition();
243  Value *LeftVal, *RightVal;
244  CmpInst::Predicate Pred;
245  if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
246  continue;
247 
248  const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
249  const SCEV *RightSCEV = SE.getSCEV(RightVal);
250 
251  // Do not consider predicates that are known to be true or false
252  // independently of the loop iteration.
253  if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
254  continue;
255 
256  // Check if we have a condition with one AddRec and one non AddRec
257  // expression. Normalize LeftSCEV to be the AddRec.
258  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
259  if (isa<SCEVAddRecExpr>(RightSCEV)) {
260  std::swap(LeftSCEV, RightSCEV);
261  Pred = ICmpInst::getSwappedPredicate(Pred);
262  } else
263  continue;
264  }
265 
266  const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
267 
268  // Avoid huge SCEV computations in the loop below, make sure we only
269  // consider AddRecs of the loop we are trying to peel.
270  if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
271  continue;
272  if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
273  !SE.getMonotonicPredicateType(LeftAR, Pred))
274  continue;
275 
276  // Check if extending the current DesiredPeelCount lets us evaluate Pred
277  // or !Pred in the loop body statically.
278  unsigned NewPeelCount = DesiredPeelCount;
279 
280  const SCEV *IterVal = LeftAR->evaluateAtIteration(
281  SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
282 
283  // If the original condition is not known, get the negated predicate
284  // (which holds on the else branch) and check if it is known. This allows
285  // us to peel of iterations that make the original condition false.
286  if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
287  Pred = ICmpInst::getInversePredicate(Pred);
288 
289  const SCEV *Step = LeftAR->getStepRecurrence(SE);
290  const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
291  auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
292  &NewPeelCount]() {
293  IterVal = NextIterVal;
294  NextIterVal = SE.getAddExpr(IterVal, Step);
295  NewPeelCount++;
296  };
297 
298  auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
299  return NewPeelCount < MaxPeelCount;
300  };
301 
302  while (CanPeelOneMoreIteration() &&
303  SE.isKnownPredicate(Pred, IterVal, RightSCEV))
304  PeelOneMoreIteration();
305 
306  // With *that* peel count, does the predicate !Pred become known in the
307  // first iteration of the loop body after peeling?
308  if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
309  RightSCEV))
310  continue; // If not, give up.
311 
312  // However, for equality comparisons, that isn't always sufficient to
313  // eliminate the comparsion in loop body, we may need to peel one more
314  // iteration. See if that makes !Pred become unknown again.
315  if (ICmpInst::isEquality(Pred) &&
316  !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
317  RightSCEV) &&
318  !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
319  SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
320  if (!CanPeelOneMoreIteration())
321  continue; // Need to peel one more iteration, but can't. Give up.
322  PeelOneMoreIteration(); // Great!
323  }
324 
325  DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
326  }
327 
328  return DesiredPeelCount;
329 }
330 
331 /// This "heuristic" exactly matches implicit behavior which used to exist
332 /// inside getLoopEstimatedTripCount. It was added here to keep an
333 /// improvement inside that API from causing peeling to become more agressive.
334 /// This should probably be removed.
336  BasicBlock *Latch = L->getLoopLatch();
337  if (!Latch)
338  return true;
339 
340  BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
341  if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
342  return true;
343 
344  assert((LatchBR->getSuccessor(0) == L->getHeader() ||
345  LatchBR->getSuccessor(1) == L->getHeader()) &&
346  "At least one edge out of the latch must go to the header");
347 
348  SmallVector<BasicBlock *, 4> ExitBlocks;
349  L->getUniqueNonLatchExitBlocks(ExitBlocks);
350  return any_of(ExitBlocks, [](const BasicBlock *EB) {
351  return !EB->getTerminatingDeoptimizeCall();
352  });
353 }
354 
355 
356 // Return the number of iterations we want to peel off.
357 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
359  unsigned TripCount, DominatorTree &DT,
360  ScalarEvolution &SE, unsigned Threshold) {
361  assert(LoopSize > 0 && "Zero loop size is not allowed!");
362  // Save the PP.PeelCount value set by the target in
363  // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
364  unsigned TargetPeelCount = PP.PeelCount;
365  PP.PeelCount = 0;
366  if (!canPeel(L))
367  return;
368 
369  // Only try to peel innermost loops by default.
370  // The constraint can be relaxed by the target in TTI.getPeelingPreferences
371  // or by the flag -unroll-allow-loop-nests-peeling.
372  if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
373  return;
374 
375  // If the user provided a peel count, use that.
376  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
377  if (UserPeelCount) {
378  LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
379  << " iterations.\n");
381  PP.PeelProfiledIterations = true;
382  return;
383  }
384 
385  // Skip peeling if it's disabled.
386  if (!PP.AllowPeeling)
387  return;
388 
389  // Check that we can peel at least one iteration.
390  if (2 * LoopSize > Threshold)
391  return;
392 
393  unsigned AlreadyPeeled = 0;
394  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
395  AlreadyPeeled = *Peeled;
396  // Stop if we already peeled off the maximum number of iterations.
397  if (AlreadyPeeled >= UnrollPeelMaxCount)
398  return;
399 
400  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
401  // iterations of the loop. For this we compute the number for iterations after
402  // which every Phi is guaranteed to become an invariant, and try to peel the
403  // maximum number of iterations among these values, thus turning all those
404  // Phis into invariants.
405 
406  // Store the pre-calculated values here.
407  SmallDenseMap<PHINode *, Optional<unsigned>> IterationsToInvariance;
408  // Now go through all Phis to calculate their the number of iterations they
409  // need to become invariants.
410  // Start the max computation with the PP.PeelCount value set by the target
411  // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
412  unsigned DesiredPeelCount = TargetPeelCount;
413  BasicBlock *BackEdge = L->getLoopLatch();
414  assert(BackEdge && "Loop is not in simplified form?");
415  for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
416  PHINode *Phi = cast<PHINode>(&*BI);
417  auto ToInvariance = calculateIterationsToInvariance(Phi, L, BackEdge,
418  IterationsToInvariance);
419  if (ToInvariance)
420  DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance);
421  }
422 
423  // Pay respect to limitations implied by loop size and the max peel count.
424  unsigned MaxPeelCount = UnrollPeelMaxCount;
425  MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
426 
427  DesiredPeelCount = std::max(DesiredPeelCount,
428  countToEliminateCompares(*L, MaxPeelCount, SE));
429 
430  if (DesiredPeelCount == 0)
431  DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
432 
433  if (DesiredPeelCount > 0) {
434  DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
435  // Consider max peel count limitation.
436  assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
437  if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
438  LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
439  << " iteration(s) to turn"
440  << " some Phis into invariants.\n");
441  PP.PeelCount = DesiredPeelCount;
442  PP.PeelProfiledIterations = false;
443  return;
444  }
445  }
446 
447  // Bail if we know the statically calculated trip count.
448  // In this case we rather prefer partial unrolling.
449  if (TripCount)
450  return;
451 
452  // Do not apply profile base peeling if it is disabled.
453  if (!PP.PeelProfiledIterations)
454  return;
455  // If we don't know the trip count, but have reason to believe the average
456  // trip count is low, peeling should be beneficial, since we will usually
457  // hit the peeled section.
458  // We only do this in the presence of profile information, since otherwise
459  // our estimates of the trip count are not reliable enough.
460  if (L->getHeader()->getParent()->hasProfileData()) {
462  return;
463  Optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
464  if (!EstimatedTripCount)
465  return;
466 
467  LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
468  << *EstimatedTripCount << "\n");
469 
470  if (*EstimatedTripCount) {
471  if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
472  unsigned PeelCount = *EstimatedTripCount;
473  LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
474  PP.PeelCount = PeelCount;
475  return;
476  }
477  LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
478  LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
479  LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
480  LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
481  LLVM_DEBUG(dbgs() << "Max peel count by cost: "
482  << (Threshold / LoopSize - 1) << "\n");
483  }
484  }
485 }
486 
487 /// Update the branch weights of the latch of a peeled-off loop
488 /// iteration.
489 /// This sets the branch weights for the latch of the recently peeled off loop
490 /// iteration correctly.
491 /// Let F is a weight of the edge from latch to header.
492 /// Let E is a weight of the edge from latch to exit.
493 /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
494 /// go to exit.
495 /// Then, Estimated TripCount = F / E.
496 /// For I-th (counting from 0) peeled off iteration we set the the weights for
497 /// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
498 /// The probability to go to exit 1/(TC-I) increases. At the same time
499 /// the estimated trip count of remaining loop reduces by I.
500 /// To avoid dealing with division rounding we can just multiple both part
501 /// of weights to E and use weight as (F - I * E, E).
502 ///
503 /// \param Header The copy of the header block that belongs to next iteration.
504 /// \param LatchBR The copy of the latch branch that belongs to this iteration.
505 /// \param[in,out] FallThroughWeight The weight of the edge from latch to
506 /// header before peeling (in) and after peeled off one iteration (out).
507 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
508  uint64_t ExitWeight,
509  uint64_t &FallThroughWeight) {
510  // FallThroughWeight is 0 means that there is no branch weights on original
511  // latch block or estimated trip count is zero.
512  if (!FallThroughWeight)
513  return;
514 
515  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
516  MDBuilder MDB(LatchBR->getContext());
517  MDNode *WeightNode =
518  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
519  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
520  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
521  FallThroughWeight =
522  FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
523 }
524 
525 /// Initialize the weights.
526 ///
527 /// \param Header The header block.
528 /// \param LatchBR The latch branch.
529 /// \param[out] ExitWeight The weight of the edge from Latch to Exit.
530 /// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
531 static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
532  uint64_t &ExitWeight,
533  uint64_t &FallThroughWeight) {
534  uint64_t TrueWeight, FalseWeight;
535  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
536  return;
537  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
538  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
539  FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
540 }
541 
542 /// Update the weights of original Latch block after peeling off all iterations.
543 ///
544 /// \param Header The header block.
545 /// \param LatchBR The latch branch.
546 /// \param ExitWeight The weight of the edge from Latch to Exit.
547 /// \param FallThroughWeight The weight of the edge from Latch to Header.
548 static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
549  uint64_t ExitWeight,
550  uint64_t FallThroughWeight) {
551  // FallThroughWeight is 0 means that there is no branch weights on original
552  // latch block or estimated trip count is zero.
553  if (!FallThroughWeight)
554  return;
555 
556  // Sets the branch weights on the loop exit.
557  MDBuilder MDB(LatchBR->getContext());
558  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
559  MDNode *WeightNode =
560  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
561  : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
562  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
563 }
564 
565 /// Clones the body of the loop L, putting it between \p InsertTop and \p
566 /// InsertBot.
567 /// \param IterNumber The serial number of the iteration currently being
568 /// peeled off.
569 /// \param ExitEdges The exit edges of the original loop.
570 /// \param[out] NewBlocks A list of the blocks in the newly created clone
571 /// \param[out] VMap The value map between the loop and the new clone.
572 /// \param LoopBlocks A helper for DFS-traversal of the loop.
573 /// \param LVMap A value-map that maps instructions from the original loop to
574 /// instructions in the last peeled-off iteration.
575 static void cloneLoopBlocks(
576  Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
577  SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
578  SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
580  LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
581  BasicBlock *Header = L->getHeader();
582  BasicBlock *Latch = L->getLoopLatch();
583  BasicBlock *PreHeader = L->getLoopPreheader();
584 
585  Function *F = Header->getParent();
586  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
587  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
588  Loop *ParentLoop = L->getParentLoop();
589 
590  // For each block in the original loop, create a new copy,
591  // and update the value map with the newly created values.
592  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
593  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
594  NewBlocks.push_back(NewBB);
595 
596  // If an original block is an immediate child of the loop L, its copy
597  // is a child of a ParentLoop after peeling. If a block is a child of
598  // a nested loop, it is handled in the cloneLoop() call below.
599  if (ParentLoop && LI->getLoopFor(*BB) == L)
600  ParentLoop->addBasicBlockToLoop(NewBB, *LI);
601 
602  VMap[*BB] = NewBB;
603 
604  // If dominator tree is available, insert nodes to represent cloned blocks.
605  if (DT) {
606  if (Header == *BB)
607  DT->addNewBlock(NewBB, InsertTop);
608  else {
609  DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
610  // VMap must contain entry for IDom, as the iteration order is RPO.
611  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
612  }
613  }
614  }
615 
616  {
617  // Identify what other metadata depends on the cloned version. After
618  // cloning, replace the metadata with the corrected version for both
619  // memory instructions and noalias intrinsics.
620  std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
621  cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
622  Header->getContext(), Ext);
623  }
624 
625  // Recursively create the new Loop objects for nested loops, if any,
626  // to preserve LoopInfo.
627  for (Loop *ChildLoop : *L) {
628  cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
629  }
630 
631  // Hook-up the control flow for the newly inserted blocks.
632  // The new header is hooked up directly to the "top", which is either
633  // the original loop preheader (for the first iteration) or the previous
634  // iteration's exiting block (for every other iteration)
635  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
636 
637  // Similarly, for the latch:
638  // The original exiting edge is still hooked up to the loop exit.
639  // The backedge now goes to the "bottom", which is either the loop's real
640  // header (for the last peeled iteration) or the copied header of the next
641  // iteration (for every other iteration)
642  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
643  BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
644  for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
645  if (LatchBR->getSuccessor(idx) == Header) {
646  LatchBR->setSuccessor(idx, InsertBot);
647  break;
648  }
649  if (DT)
650  DT->changeImmediateDominator(InsertBot, NewLatch);
651 
652  // The new copy of the loop body starts with a bunch of PHI nodes
653  // that pick an incoming value from either the preheader, or the previous
654  // loop iteration. Since this copy is no longer part of the loop, we
655  // resolve this statically:
656  // For the first iteration, we use the value from the preheader directly.
657  // For any other iteration, we replace the phi with the value generated by
658  // the immediately preceding clone of the loop body (which represents
659  // the previous iteration).
660  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
661  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
662  if (IterNumber == 0) {
663  VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
664  } else {
665  Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
666  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
667  if (LatchInst && L->contains(LatchInst))
668  VMap[&*I] = LVMap[LatchInst];
669  else
670  VMap[&*I] = LatchVal;
671  }
672  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
673  }
674 
675  // Fix up the outgoing values - we need to add a value for the iteration
676  // we've just created. Note that this must happen *after* the incoming
677  // values are adjusted, since the value going out of the latch may also be
678  // a value coming into the header.
679  for (auto Edge : ExitEdges)
680  for (PHINode &PHI : Edge.second->phis()) {
681  Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
682  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
683  if (LatchInst && L->contains(LatchInst))
684  LatchVal = VMap[LatchVal];
685  PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
686  }
687 
688  // LastValueMap is updated with the values for the current loop
689  // which are used the next time this function is called.
690  for (auto KV : VMap)
691  LVMap[KV.first] = KV.second;
692 }
693 
696  Optional<bool> UserAllowPeeling,
697  Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) {
699 
700  // Set the default values.
701  PP.PeelCount = 0;
702  PP.AllowPeeling = true;
703  PP.AllowLoopNestsPeeling = false;
704  PP.PeelProfiledIterations = true;
705 
706  // Get the target specifc values.
707  TTI.getPeelingPreferences(L, SE, PP);
708 
709  // User specified values using cl::opt.
710  if (UnrollingSpecficValues) {
711  if (UnrollPeelCount.getNumOccurrences() > 0)
717  }
718 
719  // User specifed values provided by argument.
720  if (UserAllowPeeling.hasValue())
721  PP.AllowPeeling = *UserAllowPeeling;
722  if (UserAllowProfileBasedPeeling.hasValue())
723  PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
724 
725  return PP;
726 }
727 
728 /// Peel off the first \p PeelCount iterations of loop \p L.
729 ///
730 /// Note that this does not peel them off as a single straight-line block.
731 /// Rather, each iteration is peeled off separately, and needs to check the
732 /// exit condition.
733 /// For loops that dynamically execute \p PeelCount iterations or less
734 /// this provides a benefit, since the peeled off iterations, which account
735 /// for the bulk of dynamic execution, can be further simplified by scalar
736 /// optimizations.
737 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
739  bool PreserveLCSSA) {
740  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
741  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
742 
743  LoopBlocksDFS LoopBlocks(L);
744  LoopBlocks.perform(LI);
745 
746  BasicBlock *Header = L->getHeader();
747  BasicBlock *PreHeader = L->getLoopPreheader();
748  BasicBlock *Latch = L->getLoopLatch();
750  L->getExitEdges(ExitEdges);
751 
752  // Remember dominators of blocks we might reach through exits to change them
753  // later. Immediate dominator of such block might change, because we add more
754  // routes which can lead to the exit: we can reach it from the peeled
755  // iterations too.
756  DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
757  for (auto *BB : L->blocks()) {
758  auto *BBDomNode = DT.getNode(BB);
759  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
760  for (auto *ChildDomNode : BBDomNode->children()) {
761  auto *ChildBB = ChildDomNode->getBlock();
762  if (!L->contains(ChildBB))
763  ChildrenToUpdate.push_back(ChildBB);
764  }
765  // The new idom of the block will be the nearest common dominator
766  // of all copies of the previous idom. This is equivalent to the
767  // nearest common dominator of the previous idom and the first latch,
768  // which dominates all copies of the previous idom.
769  BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
770  for (auto *ChildBB : ChildrenToUpdate)
771  NonLoopBlocksIDom[ChildBB] = NewIDom;
772  }
773 
774  Function *F = Header->getParent();
775 
776  // Set up all the necessary basic blocks. It is convenient to split the
777  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
778  // body, and a new preheader for the "real" loop.
779 
780  // Peeling the first iteration transforms.
781  //
782  // PreHeader:
783  // ...
784  // Header:
785  // LoopBody
786  // If (cond) goto Header
787  // Exit:
788  //
789  // into
790  //
791  // InsertTop:
792  // LoopBody
793  // If (!cond) goto Exit
794  // InsertBot:
795  // NewPreHeader:
796  // ...
797  // Header:
798  // LoopBody
799  // If (cond) goto Header
800  // Exit:
801  //
802  // Each following iteration will split the current bottom anchor in two,
803  // and put the new copy of the loop body between these two blocks. That is,
804  // after peeling another iteration from the example above, we'll split
805  // InsertBot, and get:
806  //
807  // InsertTop:
808  // LoopBody
809  // If (!cond) goto Exit
810  // InsertBot:
811  // LoopBody
812  // If (!cond) goto Exit
813  // InsertBot.next:
814  // NewPreHeader:
815  // ...
816  // Header:
817  // LoopBody
818  // If (cond) goto Header
819  // Exit:
820 
821  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
822  BasicBlock *InsertBot =
823  SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
824  BasicBlock *NewPreHeader =
825  SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
826 
827  InsertTop->setName(Header->getName() + ".peel.begin");
828  InsertBot->setName(Header->getName() + ".peel.next");
829  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
830 
831  ValueToValueMapTy LVMap;
832 
833  // If we have branch weight information, we'll want to update it for the
834  // newly created branches.
835  BranchInst *LatchBR =
836  cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
837  uint64_t ExitWeight = 0, FallThroughWeight = 0;
838  initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
839 
840  // Identify what noalias metadata is inside the loop: if it is inside the
841  // loop, the associated metadata must be cloned for each iteration.
842  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
843  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
844 
845  // For each peeled-off iteration, make a copy of the loop.
846  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
848  ValueToValueMapTy VMap;
849 
850  cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
851  LoopBlocks, VMap, LVMap, &DT, LI,
852  LoopLocalNoAliasDeclScopes);
853 
854  // Remap to use values from the current iteration instead of the
855  // previous one.
856  remapInstructionsInBlocks(NewBlocks, VMap);
857 
858  // Update IDoms of the blocks reachable through exits.
859  if (Iter == 0)
860  for (auto BBIDom : NonLoopBlocksIDom)
861  DT.changeImmediateDominator(BBIDom.first,
862  cast<BasicBlock>(LVMap[BBIDom.second]));
863 #ifdef EXPENSIVE_CHECKS
865 #endif
866 
867  auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
868  updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
869  // Remove Loop metadata from the latch branch instruction
870  // because it is not the Loop's latch branch anymore.
871  LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
872 
873  InsertTop = InsertBot;
874  InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
875  InsertBot->setName(Header->getName() + ".peel.next");
876 
877  F->getBasicBlockList().splice(InsertTop->getIterator(),
878  F->getBasicBlockList(),
879  NewBlocks[0]->getIterator(), F->end());
880  }
881 
882  // Now adjust the phi nodes in the loop header to get their initial values
883  // from the last peeled-off iteration instead of the preheader.
884  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
885  PHINode *PHI = cast<PHINode>(I);
886  Value *NewVal = PHI->getIncomingValueForBlock(Latch);
887  Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
888  if (LatchInst && L->contains(LatchInst))
889  NewVal = LVMap[LatchInst];
890 
891  PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
892  }
893 
894  fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
895 
896  // Update Metadata for count of peeled off iterations.
897  unsigned AlreadyPeeled = 0;
898  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
899  AlreadyPeeled = *Peeled;
900  addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
901 
902  if (Loop *ParentLoop = L->getParentLoop())
903  L = ParentLoop;
904 
905  // We modified the loop, update SE.
906  SE->forgetTopmostLoop(L);
907 
908 #ifdef EXPENSIVE_CHECKS
909  // Finally DomtTree must be correct.
911 #endif
912 
913  // FIXME: Incrementally update loop-simplify
914  simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
915 
916  NumPeeled++;
917 
918  return true;
919 }
UnrollPeelCount
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
llvm::BasicBlock::getTerminatingDeoptimizeCall
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:182
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
fixupBranchWeights
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t FallThroughWeight)
Update the weights of original Latch block after peeling off all iterations.
Definition: LoopPeel.cpp:548
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:849
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
LoopSimplify.h
Optional.h
initBranchWeights
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t &ExitWeight, uint64_t &FallThroughWeight)
Initialize the weights.
Definition: LoopPeel.cpp:531
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
Loads.h
violatesLegacyMultiExitLoopCheck
static bool violatesLegacyMultiExitLoopCheck(Loop *L)
This "heuristic" exactly matches implicit behavior which used to exist inside getLoopEstimatedTripCou...
Definition: LoopPeel.cpp:335
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
calculateIterationsToInvariance
static Optional< unsigned > calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, Optional< unsigned > > &IterationsToInvariance)
Definition: LoopPeel.cpp:120
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:543
UnrollPeelMaxCount
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::SmallDenseMap
Definition: DenseMap.h:882
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
ScalarEvolution.h
DenseMap.h
llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1281
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:537
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::LoopBase::getExitEdges
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Definition: LoopInfoImpl.h:147
llvm::Optional< unsigned >
llvm::BranchInst::getNumSuccessors
unsigned getNumSuccessors() const
Definition: Instructions.h:3190
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ScalarEvolution::evaluatePredicate
Optional< bool > evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
Definition: ScalarEvolution.cpp:10427
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
peelToTurnInvariantLoadsDerefencebale
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, DominatorTree &DT)
Definition: LoopPeel.cpp:162
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3197
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1368
llvm::canPeel
bool canPeel(Loop *L)
Definition: LoopPeel.cpp:77
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:283
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
PeeledCountMetaData
static const char * PeeledCountMetaData
Definition: LoopPeel.cpp:74
Instruction.h
CommandLine.h
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1607
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:550
llvm::getOptionalIntLoopAttribute
llvm::Optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1088
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
countToEliminateCompares
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
Definition: LoopPeel.cpp:228
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:882
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
UnrollForcePeelCount
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
llvm::cloneLoop
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1489
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2849
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:10412
llvm::Instruction
Definition: Instruction.h:42
llvm::computePeelCount
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:357
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
llvm::isDereferenceablePointer
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:227
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:372
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
LoopUtils.h
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
PatternMatch.h
llvm::Instruction::extractProfMetadata
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1437
llvm::None
const NoneType None
Definition: None.h:24
LoopIterator.h
LoopInfo.h
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4407
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::cloneAndAdaptNoAliasScopes
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
Definition: CloneFunction.cpp:1086
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:475
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:41
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::SmallPtrSetImpl::find
iterator find(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:386
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
uint64_t
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:408
llvm::addStringMetadataToLoop
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:217
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
I
#define I(x, y, z)
Definition: MD5.cpp:58
Cloning.h
llvm::SCEVNAryExpr::hasNoSelfWrap
bool hasNoSelfWrap() const
Definition: ScalarEvolutionExpressions.h:225
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::ScalarEvolution::getMonotonicPredicateType
Optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
Definition: ScalarEvolution.cpp:10471
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1221
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:545
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::PHINode::setIncomingValueForBlock
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
Definition: Instructions.h:2856
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:737
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:8093
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:541
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1614
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1123
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:801
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:462
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
UnrollAllowLoopNestsPeeling
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
UnrollAllowPeeling
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
llvm::DomTreeNodeBase< BasicBlock >
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::LoopBase::getUniqueNonLatchExitBlocks
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:128
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:516
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
updateBranchWeights
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t &FallThroughWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
Definition: LoopPeel.cpp:507
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:713
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:290
llvm::MDBuilder
Definition: MDBuilder.h:35
ScalarEvolutionExpressions.h
Instructions.h
llvm::IsBlockFollowedByDeoptOrUnreachable
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
Definition: BasicBlockUtils.cpp:497
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:241
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1404
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:329
llvm::PHINode
Definition: Instructions.h:2664
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:393
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2454
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
LLVMContext.h
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
raw_ostream.h
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
LoopPeel.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:837
cloneLoopBlocks
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition: LoopPeel.cpp:575
llvm::getLoopEstimatedTripCount
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:819
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:694
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3192
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::SCEVAddRecExpr::evaluateAtIteration
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
Definition: ScalarEvolution.cpp:1043
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365