LLVM  14.0.0git
LoopUnrollRuntime.cpp
Go to the documentation of this file.
1 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities for loops with run-time
10 // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
11 // trip counts.
12 //
13 // The functions in this file are used to generate extra code when the
14 // run-time trip count modulo the unroll factor is not 0. When this is the
15 // case, we need to generate code to execute these 'left over' iterations.
16 //
17 // The current strategy generates an if-then-else sequence prior to the
18 // unrolled loop to execute the 'left over' iterations before or after the
19 // unrolled loop.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/Module.h"
34 #include "llvm/Support/Debug.h"
36 #include "llvm/Transforms/Utils.h"
43 #include <algorithm>
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "loop-unroll"
48 
49 STATISTIC(NumRuntimeUnrolled,
50  "Number of loops unrolled with run-time trip counts");
52  "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
53  cl::desc("Allow runtime unrolling for loops with multiple exits, when "
54  "epilog is generated"));
56  "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
57  cl::desc("Assume the non latch exit block to be predictable"));
58 
59 /// Connect the unrolling prolog code to the original loop.
60 /// The unrolling prolog code contains code to execute the
61 /// 'extra' iterations if the run-time trip count modulo the
62 /// unroll count is non-zero.
63 ///
64 /// This function performs the following:
65 /// - Create PHI nodes at prolog end block to combine values
66 /// that exit the prolog code and jump around the prolog.
67 /// - Add a PHI operand to a PHI node at the loop exit block
68 /// for values that exit the prolog and go around the loop.
69 /// - Branch around the original loop if the trip count is less
70 /// than the unroll factor.
71 ///
72 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
73  BasicBlock *PrologExit,
74  BasicBlock *OriginalLoopLatchExit,
75  BasicBlock *PreHeader, BasicBlock *NewPreHeader,
77  LoopInfo *LI, bool PreserveLCSSA) {
78  // Loop structure should be the following:
79  // Preheader
80  // PrologHeader
81  // ...
82  // PrologLatch
83  // PrologExit
84  // NewPreheader
85  // Header
86  // ...
87  // Latch
88  // LatchExit
89  BasicBlock *Latch = L->getLoopLatch();
90  assert(Latch && "Loop must have a latch");
91  BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
92 
93  // Create a PHI node for each outgoing value from the original loop
94  // (which means it is an outgoing value from the prolog code too).
95  // The new PHI node is inserted in the prolog end basic block.
96  // The new PHI node value is added as an operand of a PHI node in either
97  // the loop header or the loop exit block.
98  for (BasicBlock *Succ : successors(Latch)) {
99  for (PHINode &PN : Succ->phis()) {
100  // Add a new PHI node to the prolog end block and add the
101  // appropriate incoming values.
102  // TODO: This code assumes that the PrologExit (or the LatchExit block for
103  // prolog loop) contains only one predecessor from the loop, i.e. the
104  // PrologLatch. When supporting multiple-exiting block loops, we can have
105  // two or more blocks that have the LatchExit as the target in the
106  // original loop.
107  PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
108  PrologExit->getFirstNonPHI());
109  // Adding a value to the new PHI node from the original loop preheader.
110  // This is the value that skips all the prolog code.
111  if (L->contains(&PN)) {
112  // Succ is loop header.
113  NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
114  PreHeader);
115  } else {
116  // Succ is LatchExit.
117  NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
118  }
119 
120  Value *V = PN.getIncomingValueForBlock(Latch);
121  if (Instruction *I = dyn_cast<Instruction>(V)) {
122  if (L->contains(I)) {
123  V = VMap.lookup(I);
124  }
125  }
126  // Adding a value to the new PHI node from the last prolog block
127  // that was created.
128  NewPN->addIncoming(V, PrologLatch);
129 
130  // Update the existing PHI node operand with the value from the
131  // new PHI node. How this is done depends on if the existing
132  // PHI node is in the original loop block, or the exit block.
133  if (L->contains(&PN))
134  PN.setIncomingValueForBlock(NewPreHeader, NewPN);
135  else
136  PN.addIncoming(NewPN, PrologExit);
137  }
138  }
139 
140  // Make sure that created prolog loop is in simplified form
141  SmallVector<BasicBlock *, 4> PrologExitPreds;
142  Loop *PrologLoop = LI->getLoopFor(PrologLatch);
143  if (PrologLoop) {
144  for (BasicBlock *PredBB : predecessors(PrologExit))
145  if (PrologLoop->contains(PredBB))
146  PrologExitPreds.push_back(PredBB);
147 
148  SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
149  nullptr, PreserveLCSSA);
150  }
151 
152  // Create a branch around the original loop, which is taken if there are no
153  // iterations remaining to be executed after running the prologue.
154  Instruction *InsertPt = PrologExit->getTerminator();
155  IRBuilder<> B(InsertPt);
156 
157  assert(Count != 0 && "nonsensical Count!");
158 
159  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
160  // This means %xtraiter is (BECount + 1) and all of the iterations of this
161  // loop were executed by the prologue. Note that if BECount <u (Count - 1)
162  // then (BECount + 1) cannot unsigned-overflow.
163  Value *BrLoopExit =
164  B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
165  // Split the exit to maintain loop canonicalization guarantees
166  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
167  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
168  nullptr, PreserveLCSSA);
169  // Add the branch to the exit block (around the unrolled loop)
170  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
171  InsertPt->eraseFromParent();
172  if (DT) {
173  auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
174  PrologExit);
175  DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
176  }
177 }
178 
179 /// Connect the unrolling epilog code to the original loop.
180 /// The unrolling epilog code contains code to execute the
181 /// 'extra' iterations if the run-time trip count modulo the
182 /// unroll count is non-zero.
183 ///
184 /// This function performs the following:
185 /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
186 /// - Create PHI nodes at the unrolling loop exit to combine
187 /// values that exit the unrolling loop code and jump around it.
188 /// - Update PHI operands in the epilog loop by the new PHI nodes
189 /// - Branch around the epilog loop if extra iters (ModVal) is zero.
190 ///
191 static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
192  BasicBlock *Exit, BasicBlock *PreHeader,
193  BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
194  ValueToValueMapTy &VMap, DominatorTree *DT,
195  LoopInfo *LI, bool PreserveLCSSA) {
196  BasicBlock *Latch = L->getLoopLatch();
197  assert(Latch && "Loop must have a latch");
198  BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
199 
200  // Loop structure should be the following:
201  //
202  // PreHeader
203  // NewPreHeader
204  // Header
205  // ...
206  // Latch
207  // NewExit (PN)
208  // EpilogPreHeader
209  // EpilogHeader
210  // ...
211  // EpilogLatch
212  // Exit (EpilogPN)
213 
214  // Update PHI nodes at NewExit and Exit.
215  for (PHINode &PN : NewExit->phis()) {
216  // PN should be used in another PHI located in Exit block as
217  // Exit was split by SplitBlockPredecessors into Exit and NewExit
218  // Basicaly it should look like:
219  // NewExit:
220  // PN = PHI [I, Latch]
221  // ...
222  // Exit:
223  // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
224  //
225  // Exits from non-latch blocks point to the original exit block and the
226  // epilogue edges have already been added.
227  //
228  // There is EpilogPreHeader incoming block instead of NewExit as
229  // NewExit was spilt 1 more time to get EpilogPreHeader.
230  assert(PN.hasOneUse() && "The phi should have 1 use");
231  PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
232  assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
233 
234  // Add incoming PreHeader from branch around the Loop
235  PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
236 
237  Value *V = PN.getIncomingValueForBlock(Latch);
238  Instruction *I = dyn_cast<Instruction>(V);
239  if (I && L->contains(I))
240  // If value comes from an instruction in the loop add VMap value.
241  V = VMap.lookup(I);
242  // For the instruction out of the loop, constant or undefined value
243  // insert value itself.
244  EpilogPN->addIncoming(V, EpilogLatch);
245 
246  assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
247  "EpilogPN should have EpilogPreHeader incoming block");
248  // Change EpilogPreHeader incoming block to NewExit.
249  EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
250  NewExit);
251  // Now PHIs should look like:
252  // NewExit:
253  // PN = PHI [I, Latch], [undef, PreHeader]
254  // ...
255  // Exit:
256  // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
257  }
258 
259  // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
260  // Update corresponding PHI nodes in epilog loop.
261  for (BasicBlock *Succ : successors(Latch)) {
262  // Skip this as we already updated phis in exit blocks.
263  if (!L->contains(Succ))
264  continue;
265  for (PHINode &PN : Succ->phis()) {
266  // Add new PHI nodes to the loop exit block and update epilog
267  // PHIs with the new PHI values.
268  PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
269  NewExit->getFirstNonPHI());
270  // Adding a value to the new PHI node from the unrolling loop preheader.
271  NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
272  // Adding a value to the new PHI node from the unrolling loop latch.
273  NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
274 
275  // Update the existing PHI node operand with the value from the new PHI
276  // node. Corresponding instruction in epilog loop should be PHI.
277  PHINode *VPN = cast<PHINode>(VMap[&PN]);
278  VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
279  }
280  }
281 
282  Instruction *InsertPt = NewExit->getTerminator();
283  IRBuilder<> B(InsertPt);
284  Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
285  assert(Exit && "Loop must have a single exit block only");
286  // Split the epilogue exit to maintain loop canonicalization guarantees
288  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
289  PreserveLCSSA);
290  // Add the branch to the exit block (around the unrolling loop)
291  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
292  InsertPt->eraseFromParent();
293  if (DT) {
294  auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
295  DT->changeImmediateDominator(Exit, NewDom);
296  }
297 
298  // Split the main loop exit to maintain canonicalization guarantees.
299  SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
300  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
301  PreserveLCSSA);
302 }
303 
304 /// Create a clone of the blocks in a loop and connect them together. A new
305 /// loop will be created including all cloned blocks, and the iterator of the
306 /// new loop switched to count NewIter down to 0.
307 /// The cloned blocks should be inserted between InsertTop and InsertBot.
308 /// InsertTop should be new preheader, InsertBot new loop exit.
309 /// Returns the new cloned loop that is created.
310 static Loop *
311 CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
312  const bool UnrollRemainder,
313  BasicBlock *InsertTop,
314  BasicBlock *InsertBot, BasicBlock *Preheader,
315  std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
316  ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
317  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
318  BasicBlock *Header = L->getHeader();
319  BasicBlock *Latch = L->getLoopLatch();
320  Function *F = Header->getParent();
321  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
322  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
323  Loop *ParentLoop = L->getParentLoop();
324  NewLoopsMap NewLoops;
325  NewLoops[ParentLoop] = ParentLoop;
326 
327  // For each block in the original loop, create a new copy,
328  // and update the value map with the newly created values.
329  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
330  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
331  NewBlocks.push_back(NewBB);
332 
333  addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
334 
335  VMap[*BB] = NewBB;
336  if (Header == *BB) {
337  // For the first block, add a CFG connection to this newly
338  // created block.
339  InsertTop->getTerminator()->setSuccessor(0, NewBB);
340  }
341 
342  if (DT) {
343  if (Header == *BB) {
344  // The header is dominated by the preheader.
345  DT->addNewBlock(NewBB, InsertTop);
346  } else {
347  // Copy information from original loop to unrolled loop.
348  BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
349  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
350  }
351  }
352 
353  if (Latch == *BB) {
354  // For the last block, create a loop back to cloned head.
355  VMap.erase((*BB)->getTerminator());
356  // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
357  // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
358  // thus we must compare the post-increment (wrapping) value.
359  BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
360  BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
361  IRBuilder<> Builder(LatchBR);
362  PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
363  suffix + ".iter",
364  FirstLoopBB->getFirstNonPHI());
365  auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
366  auto *One = ConstantInt::get(NewIdx->getType(), 1);
367  Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
368  Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
369  Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
370  NewIdx->addIncoming(Zero, InsertTop);
371  NewIdx->addIncoming(IdxNext, NewBB);
372  LatchBR->eraseFromParent();
373  }
374  }
375 
376  // Change the incoming values to the ones defined in the preheader or
377  // cloned loop.
378  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
379  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
380  unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
381  NewPHI->setIncomingBlock(idx, InsertTop);
382  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
383  idx = NewPHI->getBasicBlockIndex(Latch);
384  Value *InVal = NewPHI->getIncomingValue(idx);
385  NewPHI->setIncomingBlock(idx, NewLatch);
386  if (Value *V = VMap.lookup(InVal))
387  NewPHI->setIncomingValue(idx, V);
388  }
389 
390  Loop *NewLoop = NewLoops[L];
391  assert(NewLoop && "L should have been cloned");
392  MDNode *LoopID = NewLoop->getLoopID();
393 
394  // Only add loop metadata if the loop is not going to be completely
395  // unrolled.
396  if (UnrollRemainder)
397  return NewLoop;
398 
401  if (NewLoopID.hasValue()) {
402  NewLoop->setLoopID(NewLoopID.getValue());
403 
404  // Do not setLoopAlreadyUnrolled if loop attributes have been defined
405  // explicitly.
406  return NewLoop;
407  }
408 
409  // Add unroll disable metadata to disable future unrolling for this loop.
410  NewLoop->setLoopAlreadyUnrolled();
411  return NewLoop;
412 }
413 
414 /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
415 /// we return true only if UnrollRuntimeMultiExit is set to true.
417  Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
418  bool UseEpilogRemainder) {
419 
420  // Priority goes to UnrollRuntimeMultiExit if it's supplied.
422  return UnrollRuntimeMultiExit;
423 
424  // The main pain point with multi-exit loop unrolling is that once unrolled,
425  // we will not be able to merge all blocks into a straight line code.
426  // There are branches within the unrolled loop that go to the OtherExits.
427  // The second point is the increase in code size, but this is true
428  // irrespective of multiple exits.
429 
430  // Note: Both the heuristics below are coarse grained. We are essentially
431  // enabling unrolling of loops that have a single side exit other than the
432  // normal LatchExit (i.e. exiting into a deoptimize block).
433  // The heuristics considered are:
434  // 1. low number of branches in the unrolled version.
435  // 2. high predictability of these extra branches.
436  // We avoid unrolling loops that have more than two exiting blocks. This
437  // limits the total number of branches in the unrolled loop to be atmost
438  // the unroll factor (since one of the exiting blocks is the latch block).
439  SmallVector<BasicBlock*, 4> ExitingBlocks;
440  L->getExitingBlocks(ExitingBlocks);
441  if (ExitingBlocks.size() > 2)
442  return false;
443 
444  // Allow unrolling of loops with no non latch exit blocks.
445  if (OtherExits.size() == 0)
446  return true;
447 
448  // The second heuristic is that L has one exit other than the latchexit and
449  // that exit is a deoptimize block. We know that deoptimize blocks are rarely
450  // taken, which also implies the branch leading to the deoptimize block is
451  // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
452  // assume the other exit branch is predictable even if it has no deoptimize
453  // call.
454  return (OtherExits.size() == 1 &&
456  OtherExits[0]->getTerminatingDeoptimizeCall()));
457  // TODO: These can be fine-tuned further to consider code size or deopt states
458  // that are captured by the deoptimize exit block.
459  // Also, we can extend this to support more cases, if we actually
460  // know of kinds of multiexit loops that would benefit from unrolling.
461 }
462 
463 // Assign the maximum possible trip count as the back edge weight for the
464 // remainder loop if the original loop comes with a branch weight.
466  Loop *RemainderLoop,
467  uint64_t UnrollFactor) {
468  uint64_t TrueWeight, FalseWeight;
469  BranchInst *LatchBR =
470  cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
471  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
472  return;
473  uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
474  ? FalseWeight
475  : TrueWeight;
476  assert(UnrollFactor > 1);
477  uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
478  BasicBlock *Header = RemainderLoop->getHeader();
479  BasicBlock *Latch = RemainderLoop->getLoopLatch();
480  auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
481  unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
482  MDBuilder MDB(RemainderLatchBR->getContext());
483  MDNode *WeightNode =
484  HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
485  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
486  RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
487 }
488 
489 /// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
490 /// accounting for the possibility of unsigned overflow in the 2s complement
491 /// domain. Preconditions:
492 /// 1) TripCount = BECount + 1 (allowing overflow)
493 /// 2) Log2(Count) <= BitWidth(BECount)
495  Value *TripCount, unsigned Count) {
496  // Note that TripCount is BECount + 1.
497  if (isPowerOf2_32(Count))
498  // If the expression is zero, then either:
499  // 1. There are no iterations to be run in the prolog/epilog loop.
500  // OR
501  // 2. The addition computing TripCount overflowed.
502  //
503  // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
504  // the number of iterations that remain to be run in the original loop is a
505  // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
506  // precondition of this method).
507  return B.CreateAnd(TripCount, Count - 1, "xtraiter");
508 
509  // As (BECount + 1) can potentially unsigned overflow we count
510  // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
511  Constant *CountC = ConstantInt::get(BECount->getType(), Count);
512  Value *ModValTmp = B.CreateURem(BECount, CountC);
513  Value *ModValAdd = B.CreateAdd(ModValTmp,
514  ConstantInt::get(ModValTmp->getType(), 1));
515  // At that point (BECount % Count) + 1 could be equal to Count.
516  // To handle this case we need to take mod by Count one more time.
517  return B.CreateURem(ModValAdd, CountC, "xtraiter");
518 }
519 
520 
521 /// Insert code in the prolog/epilog code when unrolling a loop with a
522 /// run-time trip-count.
523 ///
524 /// This method assumes that the loop unroll factor is total number
525 /// of loop bodies in the loop after unrolling. (Some folks refer
526 /// to the unroll factor as the number of *extra* copies added).
527 /// We assume also that the loop unroll factor is a power-of-two. So, after
528 /// unrolling the loop, the number of loop bodies executed is 2,
529 /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
530 /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
531 /// the switch instruction is generated.
532 ///
533 /// ***Prolog case***
534 /// extraiters = tripcount % loopfactor
535 /// if (extraiters == 0) jump Loop:
536 /// else jump Prol:
537 /// Prol: LoopBody;
538 /// extraiters -= 1 // Omitted if unroll factor is 2.
539 /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
540 /// if (tripcount < loopfactor) jump End:
541 /// Loop:
542 /// ...
543 /// End:
544 ///
545 /// ***Epilog case***
546 /// extraiters = tripcount % loopfactor
547 /// if (tripcount < loopfactor) jump LoopExit:
548 /// unroll_iters = tripcount - extraiters
549 /// Loop: LoopBody; (executes unroll_iter times);
550 /// unroll_iter -= 1
551 /// if (unroll_iter != 0) jump Loop:
552 /// LoopExit:
553 /// if (extraiters == 0) jump EpilExit:
554 /// Epil: LoopBody; (executes extraiters times)
555 /// extraiters -= 1 // Omitted if unroll factor is 2.
556 /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
557 /// EpilExit:
558 
560  Loop *L, unsigned Count, bool AllowExpensiveTripCount,
561  bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
563  const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
564  LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
565  LLVM_DEBUG(L->dump());
566  LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
567  : dbgs() << "Using prolog remainder.\n");
568 
569  // Make sure the loop is in canonical form.
570  if (!L->isLoopSimplifyForm()) {
571  LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
572  return false;
573  }
574 
575  // Guaranteed by LoopSimplifyForm.
576  BasicBlock *Latch = L->getLoopLatch();
577  BasicBlock *Header = L->getHeader();
578 
579  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
580 
581  if (!LatchBR || LatchBR->isUnconditional()) {
582  // The loop-rotate pass can be helpful to avoid this in many cases.
583  LLVM_DEBUG(
584  dbgs()
585  << "Loop latch not terminated by a conditional branch.\n");
586  return false;
587  }
588 
589  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
590  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
591 
592  if (L->contains(LatchExit)) {
593  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
594  // targets of the Latch be an exit block out of the loop.
595  LLVM_DEBUG(
596  dbgs()
597  << "One of the loop latch successors must be the exit block.\n");
598  return false;
599  }
600 
601  // These are exit blocks other than the target of the latch exiting block.
602  SmallVector<BasicBlock *, 4> OtherExits;
603  L->getUniqueNonLatchExitBlocks(OtherExits);
604  // Support only single exit and exiting block unless multi-exit loop
605  // unrolling is enabled.
606  if (!L->getExitingBlock() || OtherExits.size()) {
607  // We rely on LCSSA form being preserved when the exit blocks are transformed.
608  // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
609  if (!PreserveLCSSA)
610  return false;
611 
612  if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
613  UseEpilogRemainder)) {
614  LLVM_DEBUG(
615  dbgs()
616  << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
617  "enabled!\n");
618  return false;
619  }
620  }
621  // Use Scalar Evolution to compute the trip count. This allows more loops to
622  // be unrolled than relying on induction var simplification.
623  if (!SE)
624  return false;
625 
626  // Only unroll loops with a computable trip count.
627  // We calculate the backedge count by using getExitCount on the Latch block,
628  // which is proven to be the only exiting block in this loop. This is same as
629  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
630  // exiting blocks).
631  const SCEV *BECountSC = SE->getExitCount(L, Latch);
632  if (isa<SCEVCouldNotCompute>(BECountSC)) {
633  LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
634  return false;
635  }
636 
637  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
638 
639  // Add 1 since the backedge count doesn't include the first loop iteration.
640  // (Note that overflow can occur, this is handled explicitly below)
641  const SCEV *TripCountSC =
642  SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
643  if (isa<SCEVCouldNotCompute>(TripCountSC)) {
644  LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
645  return false;
646  }
647 
648  BasicBlock *PreHeader = L->getLoopPreheader();
649  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
650  const DataLayout &DL = Header->getModule()->getDataLayout();
651  SCEVExpander Expander(*SE, DL, "loop-unroll");
652  if (!AllowExpensiveTripCount &&
653  Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
654  TTI, PreHeaderBR)) {
655  LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
656  return false;
657  }
658 
659  // This constraint lets us deal with an overflowing trip count easily; see the
660  // comment on ModVal below.
661  if (Log2_32(Count) > BEWidth) {
662  LLVM_DEBUG(
663  dbgs()
664  << "Count failed constraint on overflow trip count calculation.\n");
665  return false;
666  }
667 
668  // Loop structure is the following:
669  //
670  // PreHeader
671  // Header
672  // ...
673  // Latch
674  // LatchExit
675 
676  BasicBlock *NewPreHeader;
677  BasicBlock *NewExit = nullptr;
678  BasicBlock *PrologExit = nullptr;
679  BasicBlock *EpilogPreHeader = nullptr;
680  BasicBlock *PrologPreHeader = nullptr;
681 
682  if (UseEpilogRemainder) {
683  // If epilog remainder
684  // Split PreHeader to insert a branch around loop for unrolling.
685  NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
686  NewPreHeader->setName(PreHeader->getName() + ".new");
687  // Split LatchExit to create phi nodes from branch above.
688  NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
689  nullptr, PreserveLCSSA);
690  // NewExit gets its DebugLoc from LatchExit, which is not part of the
691  // original Loop.
692  // Fix this by setting Loop's DebugLoc to NewExit.
693  auto *NewExitTerminator = NewExit->getTerminator();
694  NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
695  // Split NewExit to insert epilog remainder loop.
696  EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
697  EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
698 
699  // If the latch exits from multiple level of nested loops, then
700  // by assumption there must be another loop exit which branches to the
701  // outer loop and we must adjust the loop for the newly inserted blocks
702  // to account for the fact that our epilogue is still in the same outer
703  // loop. Note that this leaves loopinfo temporarily out of sync with the
704  // CFG until the actual epilogue loop is inserted.
705  if (auto *ParentL = L->getParentLoop())
706  if (LI->getLoopFor(LatchExit) != ParentL) {
707  LI->removeBlock(NewExit);
708  ParentL->addBasicBlockToLoop(NewExit, *LI);
709  LI->removeBlock(EpilogPreHeader);
710  ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
711  }
712 
713  } else {
714  // If prolog remainder
715  // Split the original preheader twice to insert prolog remainder loop
716  PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
717  PrologPreHeader->setName(Header->getName() + ".prol.preheader");
718  PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
719  DT, LI);
720  PrologExit->setName(Header->getName() + ".prol.loopexit");
721  // Split PrologExit to get NewPreHeader.
722  NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
723  NewPreHeader->setName(PreHeader->getName() + ".new");
724  }
725  // Loop structure should be the following:
726  // Epilog Prolog
727  //
728  // PreHeader PreHeader
729  // *NewPreHeader *PrologPreHeader
730  // Header *PrologExit
731  // ... *NewPreHeader
732  // Latch Header
733  // *NewExit ...
734  // *EpilogPreHeader Latch
735  // LatchExit LatchExit
736 
737  // Calculate conditions for branch around loop for unrolling
738  // in epilog case and around prolog remainder loop in prolog case.
739  // Compute the number of extra iterations required, which is:
740  // extra iterations = run-time trip count % loop unroll factor
741  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
742  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
743  PreHeaderBR);
744  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
745  PreHeaderBR);
746  IRBuilder<> B(PreHeaderBR);
747  Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
748 
749  Value *BranchVal =
750  UseEpilogRemainder ? B.CreateICmpULT(BECount,
751  ConstantInt::get(BECount->getType(),
752  Count - 1)) :
753  B.CreateIsNotNull(ModVal, "lcmp.mod");
754  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
755  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
756  // Branch to either remainder (extra iterations) loop or unrolling loop.
757  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
758  PreHeaderBR->eraseFromParent();
759  if (DT) {
760  if (UseEpilogRemainder)
761  DT->changeImmediateDominator(NewExit, PreHeader);
762  else
763  DT->changeImmediateDominator(PrologExit, PreHeader);
764  }
765  Function *F = Header->getParent();
766  // Get an ordered list of blocks in the loop to help with the ordering of the
767  // cloned blocks in the prolog/epilog code
768  LoopBlocksDFS LoopBlocks(L);
769  LoopBlocks.perform(LI);
770 
771  //
772  // For each extra loop iteration, create a copy of the loop's basic blocks
773  // and generate a condition that branches to the copy depending on the
774  // number of 'left over' iterations.
775  //
776  std::vector<BasicBlock *> NewBlocks;
777  ValueToValueMapTy VMap;
778 
779  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
780  // the loop, otherwise we create a cloned loop to execute the extra
781  // iterations. This function adds the appropriate CFG connections.
782  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
783  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
784  Loop *remainderLoop = CloneLoopBlocks(
785  L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
786  NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
787 
788  // Assign the maximum possible trip count as the back edge weight for the
789  // remainder loop if the original loop comes with a branch weight.
790  if (remainderLoop && !UnrollRemainder)
791  updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
792 
793  // Insert the cloned blocks into the function.
794  F->getBasicBlockList().splice(InsertBot->getIterator(),
795  F->getBasicBlockList(),
796  NewBlocks[0]->getIterator(),
797  F->end());
798 
799  // Now the loop blocks are cloned and the other exiting blocks from the
800  // remainder are connected to the original Loop's exit blocks. The remaining
801  // work is to update the phi nodes in the original loop, and take in the
802  // values from the cloned region.
803  for (auto *BB : OtherExits) {
804  // Given we preserve LCSSA form, we know that the values used outside the
805  // loop will be used through these phi nodes at the exit blocks that are
806  // transformed below.
807  for (PHINode &PN : BB->phis()) {
808  unsigned oldNumOperands = PN.getNumIncomingValues();
809  // Add the incoming values from the remainder code to the end of the phi
810  // node.
811  for (unsigned i = 0; i < oldNumOperands; i++){
812  auto *PredBB =PN.getIncomingBlock(i);
813  if (PredBB == Latch)
814  // The latch exit is handled seperately, see connectX
815  continue;
816  if (!L->contains(PredBB))
817  // Even if we had dedicated exits, the code above inserted an
818  // extra branch which can reach the latch exit.
819  continue;
820 
821  auto *V = PN.getIncomingValue(i);
822  if (Instruction *I = dyn_cast<Instruction>(V))
823  if (L->contains(I))
824  V = VMap.lookup(I);
825  PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
826  }
827  }
828 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
829  for (BasicBlock *SuccBB : successors(BB)) {
830  assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
831  "Breaks the definition of dedicated exits!");
832  }
833 #endif
834  }
835 
836  // Update the immediate dominator of the exit blocks and blocks that are
837  // reachable from the exit blocks. This is needed because we now have paths
838  // from both the original loop and the remainder code reaching the exit
839  // blocks. While the IDom of these exit blocks were from the original loop,
840  // now the IDom is the preheader (which decides whether the original loop or
841  // remainder code should run).
842  if (DT && !L->getExitingBlock()) {
843  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
844  // NB! We have to examine the dom children of all loop blocks, not just
845  // those which are the IDom of the exit blocks. This is because blocks
846  // reachable from the exit blocks can have their IDom as the nearest common
847  // dominator of the exit blocks.
848  for (auto *BB : L->blocks()) {
849  auto *DomNodeBB = DT->getNode(BB);
850  for (auto *DomChild : DomNodeBB->children()) {
851  auto *DomChildBB = DomChild->getBlock();
852  if (!L->contains(LI->getLoopFor(DomChildBB)))
853  ChildrenToUpdate.push_back(DomChildBB);
854  }
855  }
856  for (auto *BB : ChildrenToUpdate)
857  DT->changeImmediateDominator(BB, PreHeader);
858  }
859 
860  // Loop structure should be the following:
861  // Epilog Prolog
862  //
863  // PreHeader PreHeader
864  // NewPreHeader PrologPreHeader
865  // Header PrologHeader
866  // ... ...
867  // Latch PrologLatch
868  // NewExit PrologExit
869  // EpilogPreHeader NewPreHeader
870  // EpilogHeader Header
871  // ... ...
872  // EpilogLatch Latch
873  // LatchExit LatchExit
874 
875  // Rewrite the cloned instruction operands to use the values created when the
876  // clone is created.
877  for (BasicBlock *BB : NewBlocks) {
878  for (Instruction &I : *BB) {
879  RemapInstruction(&I, VMap,
881  }
882  }
883 
884  if (UseEpilogRemainder) {
885  // Connect the epilog code to the original loop and update the
886  // PHI functions.
887  ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
888  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
889  PreserveLCSSA);
890 
891  // Update counter in loop for unrolling.
892  // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
893  // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
894  // thus we must compare the post-increment (wrapping) value.
895  IRBuilder<> B2(NewPreHeader->getTerminator());
896  Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
897  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
898  PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
899  Header->getFirstNonPHI());
900  B2.SetInsertPoint(LatchBR);
901  auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
902  auto *One = ConstantInt::get(NewIdx->getType(), 1);
903  Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
904  auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
905  Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
906  NewIdx->addIncoming(Zero, NewPreHeader);
907  NewIdx->addIncoming(IdxNext, Latch);
908  LatchBR->setCondition(IdxCmp);
909  } else {
910  // Connect the prolog code to the original loop and update the
911  // PHI functions.
912  ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
913  NewPreHeader, VMap, DT, LI, PreserveLCSSA);
914  }
915 
916  // If this loop is nested, then the loop unroller changes the code in the any
917  // of its parent loops, so the Scalar Evolution pass needs to be run again.
918  SE->forgetTopmostLoop(L);
919 
920  // Verify that the Dom Tree and Loop Info are correct.
921 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
922  if (DT) {
924  LI->verify(*DT);
925  }
926 #endif
927 
928  // For unroll factor 2 remainder loop will have 1 iteration.
929  if (Count == 2 && DT && LI && SE) {
930  // TODO: This code could probably be pulled out into a helper function
931  // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
932  BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
933  assert(RemainderLatch);
934  SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
935  remainderLoop->getBlocks().end());
936  breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
937  remainderLoop = nullptr;
938 
939  // Simplify loop values after breaking the backedge
940  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
942  for (BasicBlock *BB : RemainderBlocks) {
943  for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
944  if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
945  if (LI->replacementPreservesLCSSAForm(&Inst, V))
946  Inst.replaceAllUsesWith(V);
947  if (isInstructionTriviallyDead(&Inst))
948  DeadInsts.emplace_back(&Inst);
949  }
950  // We can't do recursive deletion until we're done iterating, as we might
951  // have a phi which (potentially indirectly) uses instructions later in
952  // the block we're iterating through.
954  }
955 
956  // Merge latch into exit block.
957  auto *ExitBB = RemainderLatch->getSingleSuccessor();
958  assert(ExitBB && "required after breaking cond br backedge");
960  MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
961  }
962 
963  // Canonicalize to LoopSimplifyForm both original and remainder loops. We
964  // cannot rely on the LoopUnrollPass to do this because it only does
965  // canonicalization for parent/subloops and not the sibling loops.
966  if (OtherExits.size() > 0) {
967  // Generate dedicated exit blocks for the original loop, to preserve
968  // LoopSimplifyForm.
969  formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
970  // Generate dedicated exit blocks for the remainder loop if one exists, to
971  // preserve LoopSimplifyForm.
972  if (remainderLoop)
973  formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
974  }
975 
976  auto UnrollResult = LoopUnrollResult::Unmodified;
977  if (remainderLoop && UnrollRemainder) {
978  LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
979  UnrollResult =
980  UnrollLoop(remainderLoop,
981  {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
982  /*AllowExpensiveTripCount*/ false,
983  /*UnrollRemainder*/ false, ForgetAllSCEV},
984  LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
985  }
986 
987  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
988  *ResultLoop = remainderLoop;
989  NumRuntimeUnrolled++;
990  return true;
991 }
UnrollRuntimeMultiExit
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
i
i
Definition: README.txt:29
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:523
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:184
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::addClonedBlockToLoopInfo
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:147
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:742
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
Metadata.h
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
ScalarEvolutionExpander.h
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:66
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:690
Statistic.h
llvm::LLVMLoopUnrollFollowupRemainder
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:743
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:272
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::SCEVExpander::expandCodeFor
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
Definition: ScalarEvolutionExpander.h:278
canProfitablyUnrollMultiExitLoop
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock * > &OtherExits, BasicBlock *LatchExit, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
Definition: LoopUnrollRuntime.cpp:416
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ConnectProlog
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling prolog code to the original loop.
Definition: LoopUnrollRuntime.cpp:72
ScalarEvolution.h
Module.h
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:298
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::Optional
Definition: APInt.h:33
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:31
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::JumpTable::Full
@ Full
Definition: TargetOptions.h:50
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2756
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:289
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:254
llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:540
llvm::SCEVExpander::isHighCostExpansion
bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
Definition: ScalarEvolutionExpander.h:241
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2753
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopInfo::replacementPreservesLCSSAForm
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:1118
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Instruction
Definition: Instruction.h:45
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
UnrollRuntimeOtherExitPredictable
static cl::opt< bool > UnrollRuntimeOtherExitPredictable("unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable"))
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3178
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6363
MDBuilder.h
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1782
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:932
LoopUtils.h
SmallPtrSet.h
llvm::SplitBlockPredecessors
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition: BasicBlockUtils.cpp:1174
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
Utils.h
llvm::Instruction::extractProfMetadata
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1405
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:71
LoopIterator.h
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
llvm::breakLoopBackedge
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:701
CreateTripRemainder
static Value * CreateTripRemainder(IRBuilder<> &B, Value *BECount, Value *TripCount, unsigned Count)
Calculate ModVal = (BECount + 1) % Count on the abstract integer domain accounting for the possibilit...
Definition: LoopUnrollRuntime.cpp:494
llvm::LLVMLoopUnrollFollowupAll
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::UnrollLoop
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:269
llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2203
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
uint64_t
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2792
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2807
llvm::ValueMap::erase
bool erase(const KeyT &Val)
Definition: ValueMap.h:191
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
I
#define I(x, y, z)
Definition: MD5.cpp:58
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:642
llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition: Instructions.h:2835
llvm::SCEVCheapExpansionBudget
cl::opt< unsigned > SCEVCheapExpansionBudget
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1714
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1212
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1221
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PHINode::setIncomingValueForBlock
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
Definition: Instructions.h:2849
updateLatchBranchWeightsForRemainderLoop
static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, Loop *RemainderLoop, uint64_t UnrollFactor)
Definition: LoopUnrollRuntime.cpp:465
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7907
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3170
llvm::UnrollRuntimeLoopRemainder
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
Definition: LoopUnrollRuntime.cpp:559
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:180
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:799
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:450
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::LoopBase::getUniqueNonLatchExitBlocks
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:129
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:89
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:518
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:63
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:528
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2699
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:504
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::Loop::dump
void dump() const
Definition: LoopInfo.cpp:670
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
Dominators.h
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
llvm::PHINode
Definition: Instructions.h:2657
llvm::SmallVectorImpl< BasicBlock * >
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:381
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2440
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
CloneLoopBlocks
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI)
Create a clone of the blocks in a loop and connect them together.
Definition: LoopUnrollRuntime.cpp:311
llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1228
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3092
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:839
ConnectEpilog
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling epilog code to the original loop.
Definition: LoopUnrollRuntime.cpp:191
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::ScalarEvolution::getExitCount
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
Definition: ScalarEvolution.cpp:7701
Debug.h
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:283
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3185
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:917