LLVM  9.0.0svn
LoopUnrollPeel.cpp
Go to the documentation of this file.
1 //===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities for peeling loops
10 // with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
11 // unrolling loops with compile-time constant trip counts.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/Debug.h"
44 #include <algorithm>
45 #include <cassert>
46 #include <cstdint>
47 #include <limits>
48 
49 using namespace llvm;
50 using namespace llvm::PatternMatch;
51 
52 #define DEBUG_TYPE "loop-unroll"
53 
54 STATISTIC(NumPeeled, "Number of loops peeled");
55 
57  "unroll-peel-max-count", cl::init(7), cl::Hidden,
58  cl::desc("Max average trip count which will cause loop peeling."));
59 
61  "unroll-force-peel-count", cl::init(0), cl::Hidden,
62  cl::desc("Force a peel count regardless of profiling information."));
63 
64 // Designates that a Phi is estimated to become invariant after an "infinite"
65 // number of loop iterations (i.e. only may become an invariant if the loop is
66 // fully unrolled).
67 static const unsigned InfiniteIterationsToInvariance =
69 
70 // Check whether we are capable of peeling this loop.
71 bool llvm::canPeel(Loop *L) {
72  // Make sure the loop is in simplified form
73  if (!L->isLoopSimplifyForm())
74  return false;
75 
76  // Only peel loops that contain a single exit
77  if (!L->getExitingBlock() || !L->getUniqueExitBlock())
78  return false;
79 
80  // Don't try to peel loops where the latch is not the exiting block.
81  // This can be an indication of two different things:
82  // 1) The loop is not rotated.
83  // 2) The loop contains irreducible control flow that involves the latch.
84  if (L->getLoopLatch() != L->getExitingBlock())
85  return false;
86 
87  return true;
88 }
89 
90 // This function calculates the number of iterations after which the given Phi
91 // becomes an invariant. The pre-calculated values are memorized in the map. The
92 // function (shortcut is I) is calculated according to the following definition:
93 // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
94 // If %y is a loop invariant, then I(%x) = 1.
95 // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
96 // Otherwise, I(%x) is infinite.
97 // TODO: Actually if %y is an expression that depends only on Phi %z and some
98 // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
99 // looks like:
100 // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
101 // %y = phi(0, 5),
102 // %a = %y + 1.
104  PHINode *Phi, Loop *L, BasicBlock *BackEdge,
105  SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
106  assert(Phi->getParent() == L->getHeader() &&
107  "Non-loop Phi should not be checked for turning into invariant.");
108  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
109  // If we already know the answer, take it from the map.
110  auto I = IterationsToInvariance.find(Phi);
111  if (I != IterationsToInvariance.end())
112  return I->second;
113 
114  // Otherwise we need to analyze the input from the back edge.
115  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
116  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
117  // cycles can never stop on an invariant.
118  IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
119  unsigned ToInvariance = InfiniteIterationsToInvariance;
120 
121  if (L->isLoopInvariant(Input))
122  ToInvariance = 1u;
123  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
124  // Only consider Phis in header block.
125  if (IncPhi->getParent() != L->getHeader())
126  return InfiniteIterationsToInvariance;
127  // If the input becomes an invariant after X iterations, then our Phi
128  // becomes an invariant after X + 1 iterations.
129  unsigned InputToInvariance = calculateIterationsToInvariance(
130  IncPhi, L, BackEdge, IterationsToInvariance);
131  if (InputToInvariance != InfiniteIterationsToInvariance)
132  ToInvariance = InputToInvariance + 1u;
133  }
134 
135  // If we found that this Phi lies in an invariant chain, update the map.
136  if (ToInvariance != InfiniteIterationsToInvariance)
137  IterationsToInvariance[Phi] = ToInvariance;
138  return ToInvariance;
139 }
140 
141 // Return the number of iterations to peel off that make conditions in the
142 // body true/false. For example, if we peel 2 iterations off the loop below,
143 // the condition i < 2 can be evaluated at compile time.
144 // for (i = 0; i < n; i++)
145 // if (i < 2)
146 // ..
147 // else
148 // ..
149 // }
150 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
151  ScalarEvolution &SE) {
152  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
153  unsigned DesiredPeelCount = 0;
154 
155  for (auto *BB : L.blocks()) {
156  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
157  if (!BI || BI->isUnconditional())
158  continue;
159 
160  // Ignore loop exit condition.
161  if (L.getLoopLatch() == BB)
162  continue;
163 
164  Value *Condition = BI->getCondition();
165  Value *LeftVal, *RightVal;
166  CmpInst::Predicate Pred;
167  if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
168  continue;
169 
170  const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
171  const SCEV *RightSCEV = SE.getSCEV(RightVal);
172 
173  // Do not consider predicates that are known to be true or false
174  // independently of the loop iteration.
175  if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
177  RightSCEV))
178  continue;
179 
180  // Check if we have a condition with one AddRec and one non AddRec
181  // expression. Normalize LeftSCEV to be the AddRec.
182  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
183  if (isa<SCEVAddRecExpr>(RightSCEV)) {
184  std::swap(LeftSCEV, RightSCEV);
185  Pred = ICmpInst::getSwappedPredicate(Pred);
186  } else
187  continue;
188  }
189 
190  const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
191 
192  // Avoid huge SCEV computations in the loop below, make sure we only
193  // consider AddRecs of the loop we are trying to peel and avoid
194  // non-monotonic predicates, as we will not be able to simplify the loop
195  // body.
196  // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
197  // simplify the loop, if we peel 1 additional iteration, if there
198  // is no wrapping.
199  bool Increasing;
200  if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
201  !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
202  continue;
203  (void)Increasing;
204 
205  // Check if extending the current DesiredPeelCount lets us evaluate Pred
206  // or !Pred in the loop body statically.
207  unsigned NewPeelCount = DesiredPeelCount;
208 
209  const SCEV *IterVal = LeftAR->evaluateAtIteration(
210  SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
211 
212  // If the original condition is not known, get the negated predicate
213  // (which holds on the else branch) and check if it is known. This allows
214  // us to peel of iterations that make the original condition false.
215  if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
216  Pred = ICmpInst::getInversePredicate(Pred);
217 
218  const SCEV *Step = LeftAR->getStepRecurrence(SE);
219  while (NewPeelCount < MaxPeelCount &&
220  SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
221  IterVal = SE.getAddExpr(IterVal, Step);
222  NewPeelCount++;
223  }
224 
225  // Only peel the loop if the monotonic predicate !Pred becomes known in the
226  // first iteration of the loop body after peeling.
227  if (NewPeelCount > DesiredPeelCount &&
229  RightSCEV))
230  DesiredPeelCount = NewPeelCount;
231  }
232 
233  return DesiredPeelCount;
234 }
235 
236 // Return the number of iterations we want to peel off.
237 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
239  unsigned &TripCount, ScalarEvolution &SE) {
240  assert(LoopSize > 0 && "Zero loop size is not allowed!");
241  // Save the UP.PeelCount value set by the target in
242  // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
243  unsigned TargetPeelCount = UP.PeelCount;
244  UP.PeelCount = 0;
245  if (!canPeel(L))
246  return;
247 
248  // Only try to peel innermost loops.
249  if (!L->empty())
250  return;
251 
252  // If the user provided a peel count, use that.
253  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
254  if (UserPeelCount) {
255  LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
256  << " iterations.\n");
258  return;
259  }
260 
261  // Skip peeling if it's disabled.
262  if (!UP.AllowPeeling)
263  return;
264 
265  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
266  // iterations of the loop. For this we compute the number for iterations after
267  // which every Phi is guaranteed to become an invariant, and try to peel the
268  // maximum number of iterations among these values, thus turning all those
269  // Phis into invariants.
270  // First, check that we can peel at least one iteration.
271  if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
272  // Store the pre-calculated values here.
273  SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
274  // Now go through all Phis to calculate their the number of iterations they
275  // need to become invariants.
276  // Start the max computation with the UP.PeelCount value set by the target
277  // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
278  unsigned DesiredPeelCount = TargetPeelCount;
279  BasicBlock *BackEdge = L->getLoopLatch();
280  assert(BackEdge && "Loop is not in simplified form?");
281  for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
282  PHINode *Phi = cast<PHINode>(&*BI);
283  unsigned ToInvariance = calculateIterationsToInvariance(
284  Phi, L, BackEdge, IterationsToInvariance);
285  if (ToInvariance != InfiniteIterationsToInvariance)
286  DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
287  }
288 
289  // Pay respect to limitations implied by loop size and the max peel count.
290  unsigned MaxPeelCount = UnrollPeelMaxCount;
291  MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
292 
293  DesiredPeelCount = std::max(DesiredPeelCount,
294  countToEliminateCompares(*L, MaxPeelCount, SE));
295 
296  if (DesiredPeelCount > 0) {
297  DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
298  // Consider max peel count limitation.
299  assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
300  LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
301  << " iteration(s) to turn"
302  << " some Phis into invariants.\n");
303  UP.PeelCount = DesiredPeelCount;
304  return;
305  }
306  }
307 
308  // Bail if we know the statically calculated trip count.
309  // In this case we rather prefer partial unrolling.
310  if (TripCount)
311  return;
312 
313  // If we don't know the trip count, but have reason to believe the average
314  // trip count is low, peeling should be beneficial, since we will usually
315  // hit the peeled section.
316  // We only do this in the presence of profile information, since otherwise
317  // our estimates of the trip count are not reliable enough.
318  if (L->getHeader()->getParent()->hasProfileData()) {
320  if (!PeelCount)
321  return;
322 
323  LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
324  << "\n");
325 
326  if (*PeelCount) {
327  if ((*PeelCount <= UnrollPeelMaxCount) &&
328  (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
329  LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
330  << " iterations.\n");
331  UP.PeelCount = *PeelCount;
332  return;
333  }
334  LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
335  LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
336  LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
337  << "\n");
338  LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
339  }
340  }
341 }
342 
343 /// Update the branch weights of the latch of a peeled-off loop
344 /// iteration.
345 /// This sets the branch weights for the latch of the recently peeled off loop
346 /// iteration correctly.
347 /// Our goal is to make sure that:
348 /// a) The total weight of all the copies of the loop body is preserved.
349 /// b) The total weight of the loop exit is preserved.
350 /// c) The body weight is reasonably distributed between the peeled iterations.
351 ///
352 /// \param Header The copy of the header block that belongs to next iteration.
353 /// \param LatchBR The copy of the latch branch that belongs to this iteration.
354 /// \param IterNumber The serial number of the iteration that was just
355 /// peeled off.
356 /// \param AvgIters The average number of iterations we expect the loop to have.
357 /// \param[in,out] PeeledHeaderWeight The total number of dynamic loop
358 /// iterations that are unaccounted for. As an input, it represents the number
359 /// of times we expect to enter the header of the iteration currently being
360 /// peeled off. The output is the number of times we expect to enter the
361 /// header of the next iteration.
362 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
363  unsigned IterNumber, unsigned AvgIters,
364  uint64_t &PeeledHeaderWeight) {
365  // FIXME: Pick a more realistic distribution.
366  // Currently the proportion of weight we assign to the fall-through
367  // side of the branch drops linearly with the iteration number, and we use
368  // a 0.9 fudge factor to make the drop-off less sharp...
369  if (PeeledHeaderWeight) {
370  uint64_t FallThruWeight =
371  PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
372  uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
373  PeeledHeaderWeight -= ExitWeight;
374 
375  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
376  MDBuilder MDB(LatchBR->getContext());
377  MDNode *WeightNode =
378  HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
379  : MDB.createBranchWeights(FallThruWeight, ExitWeight);
380  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
381  }
382 }
383 
384 /// Clones the body of the loop L, putting it between \p InsertTop and \p
385 /// InsertBot.
386 /// \param IterNumber The serial number of the iteration currently being
387 /// peeled off.
388 /// \param Exit The exit block of the original loop.
389 /// \param[out] NewBlocks A list of the blocks in the newly created clone
390 /// \param[out] VMap The value map between the loop and the new clone.
391 /// \param LoopBlocks A helper for DFS-traversal of the loop.
392 /// \param LVMap A value-map that maps instructions from the original loop to
393 /// instructions in the last peeled-off iteration.
394 static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
395  BasicBlock *InsertBot, BasicBlock *Exit,
397  LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
398  ValueToValueMapTy &LVMap, DominatorTree *DT,
399  LoopInfo *LI) {
400  BasicBlock *Header = L->getHeader();
401  BasicBlock *Latch = L->getLoopLatch();
402  BasicBlock *PreHeader = L->getLoopPreheader();
403 
404  Function *F = Header->getParent();
405  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
406  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
407  Loop *ParentLoop = L->getParentLoop();
408 
409  // For each block in the original loop, create a new copy,
410  // and update the value map with the newly created values.
411  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
412  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
413  NewBlocks.push_back(NewBB);
414 
415  if (ParentLoop)
416  ParentLoop->addBasicBlockToLoop(NewBB, *LI);
417 
418  VMap[*BB] = NewBB;
419 
420  // If dominator tree is available, insert nodes to represent cloned blocks.
421  if (DT) {
422  if (Header == *BB)
423  DT->addNewBlock(NewBB, InsertTop);
424  else {
425  DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
426  // VMap must contain entry for IDom, as the iteration order is RPO.
427  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
428  }
429  }
430  }
431 
432  // Hook-up the control flow for the newly inserted blocks.
433  // The new header is hooked up directly to the "top", which is either
434  // the original loop preheader (for the first iteration) or the previous
435  // iteration's exiting block (for every other iteration)
436  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
437 
438  // Similarly, for the latch:
439  // The original exiting edge is still hooked up to the loop exit.
440  // The backedge now goes to the "bottom", which is either the loop's real
441  // header (for the last peeled iteration) or the copied header of the next
442  // iteration (for every other iteration)
443  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
444  BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
445  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
446  LatchBR->setSuccessor(HeaderIdx, InsertBot);
447  LatchBR->setSuccessor(1 - HeaderIdx, Exit);
448  if (DT)
449  DT->changeImmediateDominator(InsertBot, NewLatch);
450 
451  // The new copy of the loop body starts with a bunch of PHI nodes
452  // that pick an incoming value from either the preheader, or the previous
453  // loop iteration. Since this copy is no longer part of the loop, we
454  // resolve this statically:
455  // For the first iteration, we use the value from the preheader directly.
456  // For any other iteration, we replace the phi with the value generated by
457  // the immediately preceding clone of the loop body (which represents
458  // the previous iteration).
459  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
460  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
461  if (IterNumber == 0) {
462  VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
463  } else {
464  Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
465  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
466  if (LatchInst && L->contains(LatchInst))
467  VMap[&*I] = LVMap[LatchInst];
468  else
469  VMap[&*I] = LatchVal;
470  }
471  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
472  }
473 
474  // Fix up the outgoing values - we need to add a value for the iteration
475  // we've just created. Note that this must happen *after* the incoming
476  // values are adjusted, since the value going out of the latch may also be
477  // a value coming into the header.
478  for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) {
479  PHINode *PHI = cast<PHINode>(I);
480  Value *LatchVal = PHI->getIncomingValueForBlock(Latch);
481  Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
482  if (LatchInst && L->contains(LatchInst))
483  LatchVal = VMap[LatchVal];
484  PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
485  }
486 
487  // LastValueMap is updated with the values for the current loop
488  // which are used the next time this function is called.
489  for (const auto &KV : VMap)
490  LVMap[KV.first] = KV.second;
491 }
492 
493 /// Peel off the first \p PeelCount iterations of loop \p L.
494 ///
495 /// Note that this does not peel them off as a single straight-line block.
496 /// Rather, each iteration is peeled off separately, and needs to check the
497 /// exit condition.
498 /// For loops that dynamically execute \p PeelCount iterations or less
499 /// this provides a benefit, since the peeled off iterations, which account
500 /// for the bulk of dynamic execution, can be further simplified by scalar
501 /// optimizations.
502 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
504  AssumptionCache *AC, bool PreserveLCSSA) {
505  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
506  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
507 
508  LoopBlocksDFS LoopBlocks(L);
509  LoopBlocks.perform(LI);
510 
511  BasicBlock *Header = L->getHeader();
512  BasicBlock *PreHeader = L->getLoopPreheader();
513  BasicBlock *Latch = L->getLoopLatch();
514  BasicBlock *Exit = L->getUniqueExitBlock();
515 
516  Function *F = Header->getParent();
517 
518  // Set up all the necessary basic blocks. It is convenient to split the
519  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
520  // body, and a new preheader for the "real" loop.
521 
522  // Peeling the first iteration transforms.
523  //
524  // PreHeader:
525  // ...
526  // Header:
527  // LoopBody
528  // If (cond) goto Header
529  // Exit:
530  //
531  // into
532  //
533  // InsertTop:
534  // LoopBody
535  // If (!cond) goto Exit
536  // InsertBot:
537  // NewPreHeader:
538  // ...
539  // Header:
540  // LoopBody
541  // If (cond) goto Header
542  // Exit:
543  //
544  // Each following iteration will split the current bottom anchor in two,
545  // and put the new copy of the loop body between these two blocks. That is,
546  // after peeling another iteration from the example above, we'll split
547  // InsertBot, and get:
548  //
549  // InsertTop:
550  // LoopBody
551  // If (!cond) goto Exit
552  // InsertBot:
553  // LoopBody
554  // If (!cond) goto Exit
555  // InsertBot.next:
556  // NewPreHeader:
557  // ...
558  // Header:
559  // LoopBody
560  // If (cond) goto Header
561  // Exit:
562 
563  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
564  BasicBlock *InsertBot =
565  SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
566  BasicBlock *NewPreHeader =
567  SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
568 
569  InsertTop->setName(Header->getName() + ".peel.begin");
570  InsertBot->setName(Header->getName() + ".peel.next");
571  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
572 
573  ValueToValueMapTy LVMap;
574 
575  // If we have branch weight information, we'll want to update it for the
576  // newly created branches.
577  BranchInst *LatchBR =
578  cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
579  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
580 
581  uint64_t TrueWeight, FalseWeight;
582  uint64_t ExitWeight = 0, CurHeaderWeight = 0;
583  if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
584  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
585  // The # of times the loop body executes is the sum of the exit block
586  // weight and the # of times the backedges are taken.
587  CurHeaderWeight = TrueWeight + FalseWeight;
588  }
589 
590  // For each peeled-off iteration, make a copy of the loop.
591  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
593  ValueToValueMapTy VMap;
594 
595  // Subtract the exit weight from the current header weight -- the exit
596  // weight is exactly the weight of the previous iteration's header.
597  // FIXME: due to the way the distribution is constructed, we need a
598  // guard here to make sure we don't end up with non-positive weights.
599  if (ExitWeight < CurHeaderWeight)
600  CurHeaderWeight -= ExitWeight;
601  else
602  CurHeaderWeight = 1;
603 
604  cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
605  NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
606 
607  // Remap to use values from the current iteration instead of the
608  // previous one.
609  remapInstructionsInBlocks(NewBlocks, VMap);
610 
611  if (DT) {
612  // Latches of the cloned loops dominate over the loop exit, so idom of the
613  // latter is the first cloned loop body, as original PreHeader dominates
614  // the original loop body.
615  if (Iter == 0)
616  DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
617 #ifdef EXPENSIVE_CHECKS
619 #endif
620  }
621 
622  auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
623  updateBranchWeights(InsertBot, LatchBRCopy, Iter,
624  PeelCount, ExitWeight);
625  // Remove Loop metadata from the latch branch instruction
626  // because it is not the Loop's latch branch anymore.
627  LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
628 
629  InsertTop = InsertBot;
630  InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
631  InsertBot->setName(Header->getName() + ".peel.next");
632 
633  F->getBasicBlockList().splice(InsertTop->getIterator(),
634  F->getBasicBlockList(),
635  NewBlocks[0]->getIterator(), F->end());
636  }
637 
638  // Now adjust the phi nodes in the loop header to get their initial values
639  // from the last peeled-off iteration instead of the preheader.
640  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
641  PHINode *PHI = cast<PHINode>(I);
642  Value *NewVal = PHI->getIncomingValueForBlock(Latch);
643  Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
644  if (LatchInst && L->contains(LatchInst))
645  NewVal = LVMap[LatchInst];
646 
647  PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
648  }
649 
650  // Adjust the branch weights on the loop exit.
651  if (ExitWeight) {
652  // The backedge count is the difference of current header weight and
653  // current loop exit weight. If the current header weight is smaller than
654  // the current loop exit weight, we mark the loop backedge weight as 1.
655  uint64_t BackEdgeWeight = 0;
656  if (ExitWeight < CurHeaderWeight)
657  BackEdgeWeight = CurHeaderWeight - ExitWeight;
658  else
659  BackEdgeWeight = 1;
660  MDBuilder MDB(LatchBR->getContext());
661  MDNode *WeightNode =
662  HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
663  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
664  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
665  }
666 
667  if (Loop *ParentLoop = L->getParentLoop())
668  L = ParentLoop;
669 
670  // We modified the loop, update SE.
671  SE->forgetTopmostLoop(L);
672 
673  // FIXME: Incrementally update loop-simplify
674  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
675 
676  NumPeeled++;
677 
678  return true;
679 }
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator end()
Definition: Function.h:674
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
void push_back(const T &Elt)
Definition: SmallVector.h:211
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
A cache of @llvm.assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:720
BasicBlock * getSuccessor(unsigned i) const
bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing)
Return true if, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or...
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:863
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the edge connecting specified block.
static unsigned calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, unsigned > &IterationsToInvariance)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, unsigned IterNumber, unsigned AvgIters, uint64_t &PeeledHeaderWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
RPOIterator endRPO() const
Definition: LoopIterator.h:140
BlockT * getHeader() const
Definition: LoopInfo.h:102
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:250
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1061
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
NodeT * getBlock() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:327
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
self_iterator getIterator()
Definition: ilist_node.h:81
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:623
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:308
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:144
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:112
Iterator for intrusive lists based on ilist_node.
Type * getType() const
Return the LLVM type of this SCEV expression.
static const unsigned InfiniteIterationsToInvariance
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Exit, SmallVectorImpl< BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
bool canPeel(Loop *L)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:425
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
Parameters that control the generic loop unrolling transformation.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:649
bool empty() const
Definition: LoopInfo.h:148
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:847
This pass exposes codegen information to IR-level passes.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1311
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:158
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
const BasicBlock * getParent() const
Definition: Instruction.h:66
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
void forgetTopmostLoop(const Loop *L)
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.