LLVM  9.0.0svn
LoopUnrollRuntime.cpp
Go to the documentation of this file.
1 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements some loop unrolling utilities for loops with run-time
11 // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
12 // trip counts.
13 //
14 // The functions in this file are used to generate extra code when the
15 // run-time trip count modulo the unroll factor is not 0. When this is the
16 // case, we need to generate code to execute these 'left over' iterations.
17 //
18 // The current strategy generates an if-then-else sequence prior to the
19 // unrolled loop to execute the 'left over' iterations before or after the
20 // unrolled loop.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/Statistic.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/Support/Debug.h"
36 #include "llvm/Transforms/Utils.h"
41 #include <algorithm>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "loop-unroll"
46 
47 STATISTIC(NumRuntimeUnrolled,
48  "Number of loops unrolled with run-time trip counts");
50  "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
51  cl::desc("Allow runtime unrolling for loops with multiple exits, when "
52  "epilog is generated"));
53 
54 /// Connect the unrolling prolog code to the original loop.
55 /// The unrolling prolog code contains code to execute the
56 /// 'extra' iterations if the run-time trip count modulo the
57 /// unroll count is non-zero.
58 ///
59 /// This function performs the following:
60 /// - Create PHI nodes at prolog end block to combine values
61 /// that exit the prolog code and jump around the prolog.
62 /// - Add a PHI operand to a PHI node at the loop exit block
63 /// for values that exit the prolog and go around the loop.
64 /// - Branch around the original loop if the trip count is less
65 /// than the unroll factor.
66 ///
67 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
68  BasicBlock *PrologExit,
69  BasicBlock *OriginalLoopLatchExit,
70  BasicBlock *PreHeader, BasicBlock *NewPreHeader,
72  LoopInfo *LI, bool PreserveLCSSA) {
73  // Loop structure should be the following:
74  // Preheader
75  // PrologHeader
76  // ...
77  // PrologLatch
78  // PrologExit
79  // NewPreheader
80  // Header
81  // ...
82  // Latch
83  // LatchExit
84  BasicBlock *Latch = L->getLoopLatch();
85  assert(Latch && "Loop must have a latch");
86  BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
87 
88  // Create a PHI node for each outgoing value from the original loop
89  // (which means it is an outgoing value from the prolog code too).
90  // The new PHI node is inserted in the prolog end basic block.
91  // The new PHI node value is added as an operand of a PHI node in either
92  // the loop header or the loop exit block.
93  for (BasicBlock *Succ : successors(Latch)) {
94  for (PHINode &PN : Succ->phis()) {
95  // Add a new PHI node to the prolog end block and add the
96  // appropriate incoming values.
97  // TODO: This code assumes that the PrologExit (or the LatchExit block for
98  // prolog loop) contains only one predecessor from the loop, i.e. the
99  // PrologLatch. When supporting multiple-exiting block loops, we can have
100  // two or more blocks that have the LatchExit as the target in the
101  // original loop.
102  PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
103  PrologExit->getFirstNonPHI());
104  // Adding a value to the new PHI node from the original loop preheader.
105  // This is the value that skips all the prolog code.
106  if (L->contains(&PN)) {
107  // Succ is loop header.
108  NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
109  PreHeader);
110  } else {
111  // Succ is LatchExit.
112  NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
113  }
114 
115  Value *V = PN.getIncomingValueForBlock(Latch);
116  if (Instruction *I = dyn_cast<Instruction>(V)) {
117  if (L->contains(I)) {
118  V = VMap.lookup(I);
119  }
120  }
121  // Adding a value to the new PHI node from the last prolog block
122  // that was created.
123  NewPN->addIncoming(V, PrologLatch);
124 
125  // Update the existing PHI node operand with the value from the
126  // new PHI node. How this is done depends on if the existing
127  // PHI node is in the original loop block, or the exit block.
128  if (L->contains(&PN)) {
129  PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN);
130  } else {
131  PN.addIncoming(NewPN, PrologExit);
132  }
133  }
134  }
135 
136  // Make sure that created prolog loop is in simplified form
137  SmallVector<BasicBlock *, 4> PrologExitPreds;
138  Loop *PrologLoop = LI->getLoopFor(PrologLatch);
139  if (PrologLoop) {
140  for (BasicBlock *PredBB : predecessors(PrologExit))
141  if (PrologLoop->contains(PredBB))
142  PrologExitPreds.push_back(PredBB);
143 
144  SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
145  nullptr, PreserveLCSSA);
146  }
147 
148  // Create a branch around the original loop, which is taken if there are no
149  // iterations remaining to be executed after running the prologue.
150  Instruction *InsertPt = PrologExit->getTerminator();
151  IRBuilder<> B(InsertPt);
152 
153  assert(Count != 0 && "nonsensical Count!");
154 
155  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
156  // This means %xtraiter is (BECount + 1) and all of the iterations of this
157  // loop were executed by the prologue. Note that if BECount <u (Count - 1)
158  // then (BECount + 1) cannot unsigned-overflow.
159  Value *BrLoopExit =
160  B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
161  // Split the exit to maintain loop canonicalization guarantees
162  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
163  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
164  nullptr, PreserveLCSSA);
165  // Add the branch to the exit block (around the unrolled loop)
166  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
167  InsertPt->eraseFromParent();
168  if (DT)
169  DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
170 }
171 
172 /// Connect the unrolling epilog code to the original loop.
173 /// The unrolling epilog code contains code to execute the
174 /// 'extra' iterations if the run-time trip count modulo the
175 /// unroll count is non-zero.
176 ///
177 /// This function performs the following:
178 /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
179 /// - Create PHI nodes at the unrolling loop exit to combine
180 /// values that exit the unrolling loop code and jump around it.
181 /// - Update PHI operands in the epilog loop by the new PHI nodes
182 /// - Branch around the epilog loop if extra iters (ModVal) is zero.
183 ///
184 static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
185  BasicBlock *Exit, BasicBlock *PreHeader,
186  BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
187  ValueToValueMapTy &VMap, DominatorTree *DT,
188  LoopInfo *LI, bool PreserveLCSSA) {
189  BasicBlock *Latch = L->getLoopLatch();
190  assert(Latch && "Loop must have a latch");
191  BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
192 
193  // Loop structure should be the following:
194  //
195  // PreHeader
196  // NewPreHeader
197  // Header
198  // ...
199  // Latch
200  // NewExit (PN)
201  // EpilogPreHeader
202  // EpilogHeader
203  // ...
204  // EpilogLatch
205  // Exit (EpilogPN)
206 
207  // Update PHI nodes at NewExit and Exit.
208  for (PHINode &PN : NewExit->phis()) {
209  // PN should be used in another PHI located in Exit block as
210  // Exit was split by SplitBlockPredecessors into Exit and NewExit
211  // Basicaly it should look like:
212  // NewExit:
213  // PN = PHI [I, Latch]
214  // ...
215  // Exit:
216  // EpilogPN = PHI [PN, EpilogPreHeader]
217  //
218  // There is EpilogPreHeader incoming block instead of NewExit as
219  // NewExit was spilt 1 more time to get EpilogPreHeader.
220  assert(PN.hasOneUse() && "The phi should have 1 use");
221  PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
222  assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
223 
224  // Add incoming PreHeader from branch around the Loop
225  PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
226 
227  Value *V = PN.getIncomingValueForBlock(Latch);
229  if (I && L->contains(I))
230  // If value comes from an instruction in the loop add VMap value.
231  V = VMap.lookup(I);
232  // For the instruction out of the loop, constant or undefined value
233  // insert value itself.
234  EpilogPN->addIncoming(V, EpilogLatch);
235 
236  assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
237  "EpilogPN should have EpilogPreHeader incoming block");
238  // Change EpilogPreHeader incoming block to NewExit.
239  EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
240  NewExit);
241  // Now PHIs should look like:
242  // NewExit:
243  // PN = PHI [I, Latch], [undef, PreHeader]
244  // ...
245  // Exit:
246  // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
247  }
248 
249  // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
250  // Update corresponding PHI nodes in epilog loop.
251  for (BasicBlock *Succ : successors(Latch)) {
252  // Skip this as we already updated phis in exit blocks.
253  if (!L->contains(Succ))
254  continue;
255  for (PHINode &PN : Succ->phis()) {
256  // Add new PHI nodes to the loop exit block and update epilog
257  // PHIs with the new PHI values.
258  PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
259  NewExit->getFirstNonPHI());
260  // Adding a value to the new PHI node from the unrolling loop preheader.
261  NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
262  // Adding a value to the new PHI node from the unrolling loop latch.
263  NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
264 
265  // Update the existing PHI node operand with the value from the new PHI
266  // node. Corresponding instruction in epilog loop should be PHI.
267  PHINode *VPN = cast<PHINode>(VMap[&PN]);
268  VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
269  }
270  }
271 
272  Instruction *InsertPt = NewExit->getTerminator();
273  IRBuilder<> B(InsertPt);
274  Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
275  assert(Exit && "Loop must have a single exit block only");
276  // Split the epilogue exit to maintain loop canonicalization guarantees
278  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
279  PreserveLCSSA);
280  // Add the branch to the exit block (around the unrolling loop)
281  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
282  InsertPt->eraseFromParent();
283  if (DT)
284  DT->changeImmediateDominator(Exit, NewExit);
285 
286  // Split the main loop exit to maintain canonicalization guarantees.
287  SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
288  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
289  PreserveLCSSA);
290 }
291 
292 /// Create a clone of the blocks in a loop and connect them together.
293 /// If CreateRemainderLoop is false, loop structure will not be cloned,
294 /// otherwise a new loop will be created including all cloned blocks, and the
295 /// iterator of it switches to count NewIter down to 0.
296 /// The cloned blocks should be inserted between InsertTop and InsertBot.
297 /// If loop structure is cloned InsertTop should be new preheader, InsertBot
298 /// new loop exit.
299 /// Return the new cloned loop that is created when CreateRemainderLoop is true.
300 static Loop *
301 CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
302  const bool UseEpilogRemainder, const bool UnrollRemainder,
303  BasicBlock *InsertTop,
304  BasicBlock *InsertBot, BasicBlock *Preheader,
305  std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
306  ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
307  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
308  BasicBlock *Header = L->getHeader();
309  BasicBlock *Latch = L->getLoopLatch();
310  Function *F = Header->getParent();
311  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
312  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
313  Loop *ParentLoop = L->getParentLoop();
314  NewLoopsMap NewLoops;
315  NewLoops[ParentLoop] = ParentLoop;
316  if (!CreateRemainderLoop)
317  NewLoops[L] = ParentLoop;
318 
319  // For each block in the original loop, create a new copy,
320  // and update the value map with the newly created values.
321  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
322  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
323  NewBlocks.push_back(NewBB);
324 
325  // If we're unrolling the outermost loop, there's no remainder loop,
326  // and this block isn't in a nested loop, then the new block is not
327  // in any loop. Otherwise, add it to loopinfo.
328  if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
329  addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
330 
331  VMap[*BB] = NewBB;
332  if (Header == *BB) {
333  // For the first block, add a CFG connection to this newly
334  // created block.
335  InsertTop->getTerminator()->setSuccessor(0, NewBB);
336  }
337 
338  if (DT) {
339  if (Header == *BB) {
340  // The header is dominated by the preheader.
341  DT->addNewBlock(NewBB, InsertTop);
342  } else {
343  // Copy information from original loop to unrolled loop.
344  BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
345  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
346  }
347  }
348 
349  if (Latch == *BB) {
350  // For the last block, if CreateRemainderLoop is false, create a direct
351  // jump to InsertBot. If not, create a loop back to cloned head.
352  VMap.erase((*BB)->getTerminator());
353  BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
354  BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
355  IRBuilder<> Builder(LatchBR);
356  if (!CreateRemainderLoop) {
357  Builder.CreateBr(InsertBot);
358  } else {
359  PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
360  suffix + ".iter",
361  FirstLoopBB->getFirstNonPHI());
362  Value *IdxSub =
363  Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
364  NewIdx->getName() + ".sub");
365  Value *IdxCmp =
366  Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
367  Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
368  NewIdx->addIncoming(NewIter, InsertTop);
369  NewIdx->addIncoming(IdxSub, NewBB);
370  }
371  LatchBR->eraseFromParent();
372  }
373  }
374 
375  // Change the incoming values to the ones defined in the preheader or
376  // cloned loop.
377  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
378  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
379  if (!CreateRemainderLoop) {
380  if (UseEpilogRemainder) {
381  unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
382  NewPHI->setIncomingBlock(idx, InsertTop);
383  NewPHI->removeIncomingValue(Latch, false);
384  } else {
385  VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
386  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
387  }
388  } else {
389  unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
390  NewPHI->setIncomingBlock(idx, InsertTop);
391  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
392  idx = NewPHI->getBasicBlockIndex(Latch);
393  Value *InVal = NewPHI->getIncomingValue(idx);
394  NewPHI->setIncomingBlock(idx, NewLatch);
395  if (Value *V = VMap.lookup(InVal))
396  NewPHI->setIncomingValue(idx, V);
397  }
398  }
399  if (CreateRemainderLoop) {
400  Loop *NewLoop = NewLoops[L];
401  MDNode *LoopID = NewLoop->getLoopID();
402  assert(NewLoop && "L should have been cloned");
403 
404  // Only add loop metadata if the loop is not going to be completely
405  // unrolled.
406  if (UnrollRemainder)
407  return NewLoop;
408 
411  if (NewLoopID.hasValue()) {
412  NewLoop->setLoopID(NewLoopID.getValue());
413 
414  // Do not setLoopAlreadyUnrolled if loop attributes have been defined
415  // explicitly.
416  return NewLoop;
417  }
418 
419  // Add unroll disable metadata to disable future unrolling for this loop.
420  NewLoop->setLoopAlreadyUnrolled();
421  return NewLoop;
422  }
423  else
424  return nullptr;
425 }
426 
427 /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
428 /// is populated with all the loop exit blocks other than the LatchExit block.
429 static bool
431  BasicBlock *LatchExit, bool PreserveLCSSA,
432  bool UseEpilogRemainder) {
433 
434  // We currently have some correctness constrains in unrolling a multi-exit
435  // loop. Check for these below.
436 
437  // We rely on LCSSA form being preserved when the exit blocks are transformed.
438  if (!PreserveLCSSA)
439  return false;
441  L->getUniqueExitBlocks(Exits);
442  for (auto *BB : Exits)
443  if (BB != LatchExit)
444  OtherExits.push_back(BB);
445 
446  // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
447  // UnrollRuntimeMultiExit is true. This will need updating the logic in
448  // connectEpilog/connectProlog.
449  if (!LatchExit->getSinglePredecessor()) {
450  LLVM_DEBUG(
451  dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
452  "predecessor.\n");
453  return false;
454  }
455  // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
456  // and L is an inner loop. This is because in presence of multiple exits, the
457  // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
458  // outer loop. This is automatically handled in the prolog case, so we do not
459  // have that bug in prolog generation.
460  if (UseEpilogRemainder && L->getParentLoop())
461  return false;
462 
463  // All constraints have been satisfied.
464  return true;
465 }
466 
467 /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
468 /// we return true only if UnrollRuntimeMultiExit is set to true.
470  Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
471  bool PreserveLCSSA, bool UseEpilogRemainder) {
472 
473 #if !defined(NDEBUG)
474  SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
475  assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
476  PreserveLCSSA, UseEpilogRemainder) &&
477  "Should be safe to unroll before checking profitability!");
478 #endif
479 
480  // Priority goes to UnrollRuntimeMultiExit if it's supplied.
481  if (UnrollRuntimeMultiExit.getNumOccurrences())
482  return UnrollRuntimeMultiExit;
483 
484  // The main pain point with multi-exit loop unrolling is that once unrolled,
485  // we will not be able to merge all blocks into a straight line code.
486  // There are branches within the unrolled loop that go to the OtherExits.
487  // The second point is the increase in code size, but this is true
488  // irrespective of multiple exits.
489 
490  // Note: Both the heuristics below are coarse grained. We are essentially
491  // enabling unrolling of loops that have a single side exit other than the
492  // normal LatchExit (i.e. exiting into a deoptimize block).
493  // The heuristics considered are:
494  // 1. low number of branches in the unrolled version.
495  // 2. high predictability of these extra branches.
496  // We avoid unrolling loops that have more than two exiting blocks. This
497  // limits the total number of branches in the unrolled loop to be atmost
498  // the unroll factor (since one of the exiting blocks is the latch block).
499  SmallVector<BasicBlock*, 4> ExitingBlocks;
500  L->getExitingBlocks(ExitingBlocks);
501  if (ExitingBlocks.size() > 2)
502  return false;
503 
504  // The second heuristic is that L has one exit other than the latchexit and
505  // that exit is a deoptimize block. We know that deoptimize blocks are rarely
506  // taken, which also implies the branch leading to the deoptimize block is
507  // highly predictable.
508  return (OtherExits.size() == 1 &&
509  OtherExits[0]->getTerminatingDeoptimizeCall());
510  // TODO: These can be fine-tuned further to consider code size or deopt states
511  // that are captured by the deoptimize exit block.
512  // Also, we can extend this to support more cases, if we actually
513  // know of kinds of multiexit loops that would benefit from unrolling.
514 }
515 
516 /// Insert code in the prolog/epilog code when unrolling a loop with a
517 /// run-time trip-count.
518 ///
519 /// This method assumes that the loop unroll factor is total number
520 /// of loop bodies in the loop after unrolling. (Some folks refer
521 /// to the unroll factor as the number of *extra* copies added).
522 /// We assume also that the loop unroll factor is a power-of-two. So, after
523 /// unrolling the loop, the number of loop bodies executed is 2,
524 /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
525 /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
526 /// the switch instruction is generated.
527 ///
528 /// ***Prolog case***
529 /// extraiters = tripcount % loopfactor
530 /// if (extraiters == 0) jump Loop:
531 /// else jump Prol:
532 /// Prol: LoopBody;
533 /// extraiters -= 1 // Omitted if unroll factor is 2.
534 /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
535 /// if (tripcount < loopfactor) jump End:
536 /// Loop:
537 /// ...
538 /// End:
539 ///
540 /// ***Epilog case***
541 /// extraiters = tripcount % loopfactor
542 /// if (tripcount < loopfactor) jump LoopExit:
543 /// unroll_iters = tripcount - extraiters
544 /// Loop: LoopBody; (executes unroll_iter times);
545 /// unroll_iter -= 1
546 /// if (unroll_iter != 0) jump Loop:
547 /// LoopExit:
548 /// if (extraiters == 0) jump EpilExit:
549 /// Epil: LoopBody; (executes extraiters times)
550 /// extraiters -= 1 // Omitted if unroll factor is 2.
551 /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
552 /// EpilExit:
553 
554 bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
555  bool AllowExpensiveTripCount,
556  bool UseEpilogRemainder,
557  bool UnrollRemainder, LoopInfo *LI,
559  AssumptionCache *AC, bool PreserveLCSSA,
560  Loop **ResultLoop) {
561  LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
562  LLVM_DEBUG(L->dump());
563  LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
564  : dbgs() << "Using prolog remainder.\n");
565 
566  // Make sure the loop is in canonical form.
567  if (!L->isLoopSimplifyForm()) {
568  LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
569  return false;
570  }
571 
572  // Guaranteed by LoopSimplifyForm.
573  BasicBlock *Latch = L->getLoopLatch();
574  BasicBlock *Header = L->getHeader();
575 
576  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
577 
578  if (!LatchBR || LatchBR->isUnconditional()) {
579  // The loop-rotate pass can be helpful to avoid this in many cases.
580  LLVM_DEBUG(
581  dbgs()
582  << "Loop latch not terminated by a conditional branch.\n");
583  return false;
584  }
585 
586  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
587  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
588 
589  if (L->contains(LatchExit)) {
590  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
591  // targets of the Latch be an exit block out of the loop.
592  LLVM_DEBUG(
593  dbgs()
594  << "One of the loop latch successors must be the exit block.\n");
595  return false;
596  }
597 
598  // These are exit blocks other than the target of the latch exiting block.
599  SmallVector<BasicBlock *, 4> OtherExits;
600  bool isMultiExitUnrollingEnabled =
601  canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
602  UseEpilogRemainder) &&
603  canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
604  UseEpilogRemainder);
605  // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
606  if (!isMultiExitUnrollingEnabled &&
607  (!L->getExitingBlock() || OtherExits.size())) {
608  LLVM_DEBUG(
609  dbgs()
610  << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
611  "enabled!\n");
612  return false;
613  }
614  // Use Scalar Evolution to compute the trip count. This allows more loops to
615  // be unrolled than relying on induction var simplification.
616  if (!SE)
617  return false;
618 
619  // Only unroll loops with a computable trip count, and the trip count needs
620  // to be an int value (allowing a pointer type is a TODO item).
621  // We calculate the backedge count by using getExitCount on the Latch block,
622  // which is proven to be the only exiting block in this loop. This is same as
623  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
624  // exiting blocks).
625  const SCEV *BECountSC = SE->getExitCount(L, Latch);
626  if (isa<SCEVCouldNotCompute>(BECountSC) ||
627  !BECountSC->getType()->isIntegerTy()) {
628  LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
629  return false;
630  }
631 
632  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
633 
634  // Add 1 since the backedge count doesn't include the first loop iteration.
635  const SCEV *TripCountSC =
636  SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
637  if (isa<SCEVCouldNotCompute>(TripCountSC)) {
638  LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
639  return false;
640  }
641 
642  BasicBlock *PreHeader = L->getLoopPreheader();
643  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
644  const DataLayout &DL = Header->getModule()->getDataLayout();
645  SCEVExpander Expander(*SE, DL, "loop-unroll");
646  if (!AllowExpensiveTripCount &&
647  Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
648  LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
649  return false;
650  }
651 
652  // This constraint lets us deal with an overflowing trip count easily; see the
653  // comment on ModVal below.
654  if (Log2_32(Count) > BEWidth) {
655  LLVM_DEBUG(
656  dbgs()
657  << "Count failed constraint on overflow trip count calculation.\n");
658  return false;
659  }
660 
661  // Loop structure is the following:
662  //
663  // PreHeader
664  // Header
665  // ...
666  // Latch
667  // LatchExit
668 
669  BasicBlock *NewPreHeader;
670  BasicBlock *NewExit = nullptr;
671  BasicBlock *PrologExit = nullptr;
672  BasicBlock *EpilogPreHeader = nullptr;
673  BasicBlock *PrologPreHeader = nullptr;
674 
675  if (UseEpilogRemainder) {
676  // If epilog remainder
677  // Split PreHeader to insert a branch around loop for unrolling.
678  NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
679  NewPreHeader->setName(PreHeader->getName() + ".new");
680  // Split LatchExit to create phi nodes from branch above.
681  SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
682  NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
683  nullptr, PreserveLCSSA);
684  // NewExit gets its DebugLoc from LatchExit, which is not part of the
685  // original Loop.
686  // Fix this by setting Loop's DebugLoc to NewExit.
687  auto *NewExitTerminator = NewExit->getTerminator();
688  NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
689  // Split NewExit to insert epilog remainder loop.
690  EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
691  EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
692  } else {
693  // If prolog remainder
694  // Split the original preheader twice to insert prolog remainder loop
695  PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
696  PrologPreHeader->setName(Header->getName() + ".prol.preheader");
697  PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
698  DT, LI);
699  PrologExit->setName(Header->getName() + ".prol.loopexit");
700  // Split PrologExit to get NewPreHeader.
701  NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
702  NewPreHeader->setName(PreHeader->getName() + ".new");
703  }
704  // Loop structure should be the following:
705  // Epilog Prolog
706  //
707  // PreHeader PreHeader
708  // *NewPreHeader *PrologPreHeader
709  // Header *PrologExit
710  // ... *NewPreHeader
711  // Latch Header
712  // *NewExit ...
713  // *EpilogPreHeader Latch
714  // LatchExit LatchExit
715 
716  // Calculate conditions for branch around loop for unrolling
717  // in epilog case and around prolog remainder loop in prolog case.
718  // Compute the number of extra iterations required, which is:
719  // extra iterations = run-time trip count % loop unroll factor
720  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
721  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
722  PreHeaderBR);
723  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
724  PreHeaderBR);
725  IRBuilder<> B(PreHeaderBR);
726  Value *ModVal;
727  // Calculate ModVal = (BECount + 1) % Count.
728  // Note that TripCount is BECount + 1.
729  if (isPowerOf2_32(Count)) {
730  // When Count is power of 2 we don't BECount for epilog case, however we'll
731  // need it for a branch around unrolling loop for prolog case.
732  ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
733  // 1. There are no iterations to be run in the prolog/epilog loop.
734  // OR
735  // 2. The addition computing TripCount overflowed.
736  //
737  // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
738  // the number of iterations that remain to be run in the original loop is a
739  // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
740  // explicitly check this above).
741  } else {
742  // As (BECount + 1) can potentially unsigned overflow we count
743  // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
744  Value *ModValTmp = B.CreateURem(BECount,
745  ConstantInt::get(BECount->getType(),
746  Count));
747  Value *ModValAdd = B.CreateAdd(ModValTmp,
748  ConstantInt::get(ModValTmp->getType(), 1));
749  // At that point (BECount % Count) + 1 could be equal to Count.
750  // To handle this case we need to take mod by Count one more time.
751  ModVal = B.CreateURem(ModValAdd,
752  ConstantInt::get(BECount->getType(), Count),
753  "xtraiter");
754  }
755  Value *BranchVal =
756  UseEpilogRemainder ? B.CreateICmpULT(BECount,
757  ConstantInt::get(BECount->getType(),
758  Count - 1)) :
759  B.CreateIsNotNull(ModVal, "lcmp.mod");
760  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
761  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
762  // Branch to either remainder (extra iterations) loop or unrolling loop.
763  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
764  PreHeaderBR->eraseFromParent();
765  if (DT) {
766  if (UseEpilogRemainder)
767  DT->changeImmediateDominator(NewExit, PreHeader);
768  else
769  DT->changeImmediateDominator(PrologExit, PreHeader);
770  }
771  Function *F = Header->getParent();
772  // Get an ordered list of blocks in the loop to help with the ordering of the
773  // cloned blocks in the prolog/epilog code
774  LoopBlocksDFS LoopBlocks(L);
775  LoopBlocks.perform(LI);
776 
777  //
778  // For each extra loop iteration, create a copy of the loop's basic blocks
779  // and generate a condition that branches to the copy depending on the
780  // number of 'left over' iterations.
781  //
782  std::vector<BasicBlock *> NewBlocks;
783  ValueToValueMapTy VMap;
784 
785  // For unroll factor 2 remainder loop will have 1 iterations.
786  // Do not create 1 iteration loop.
787  bool CreateRemainderLoop = (Count != 2);
788 
789  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
790  // the loop, otherwise we create a cloned loop to execute the extra
791  // iterations. This function adds the appropriate CFG connections.
792  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
793  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
794  Loop *remainderLoop = CloneLoopBlocks(
795  L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
796  InsertTop, InsertBot,
797  NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
798 
799  // Insert the cloned blocks into the function.
800  F->getBasicBlockList().splice(InsertBot->getIterator(),
801  F->getBasicBlockList(),
802  NewBlocks[0]->getIterator(),
803  F->end());
804 
805  // Now the loop blocks are cloned and the other exiting blocks from the
806  // remainder are connected to the original Loop's exit blocks. The remaining
807  // work is to update the phi nodes in the original loop, and take in the
808  // values from the cloned region.
809  for (auto *BB : OtherExits) {
810  for (auto &II : *BB) {
811 
812  // Given we preserve LCSSA form, we know that the values used outside the
813  // loop will be used through these phi nodes at the exit blocks that are
814  // transformed below.
815  if (!isa<PHINode>(II))
816  break;
817  PHINode *Phi = cast<PHINode>(&II);
818  unsigned oldNumOperands = Phi->getNumIncomingValues();
819  // Add the incoming values from the remainder code to the end of the phi
820  // node.
821  for (unsigned i =0; i < oldNumOperands; i++){
822  Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
823  // newVal can be a constant or derived from values outside the loop, and
824  // hence need not have a VMap value. Also, since lookup already generated
825  // a default "null" VMap entry for this value, we need to populate that
826  // VMap entry correctly, with the mapped entry being itself.
827  if (!newVal) {
828  newVal = Phi->getIncomingValue(i);
829  VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
830  }
831  Phi->addIncoming(newVal,
832  cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
833  }
834  }
835 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
836  for (BasicBlock *SuccBB : successors(BB)) {
837  assert(!(any_of(OtherExits,
838  [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
839  SuccBB == LatchExit) &&
840  "Breaks the definition of dedicated exits!");
841  }
842 #endif
843  }
844 
845  // Update the immediate dominator of the exit blocks and blocks that are
846  // reachable from the exit blocks. This is needed because we now have paths
847  // from both the original loop and the remainder code reaching the exit
848  // blocks. While the IDom of these exit blocks were from the original loop,
849  // now the IDom is the preheader (which decides whether the original loop or
850  // remainder code should run).
851  if (DT && !L->getExitingBlock()) {
852  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
853  // NB! We have to examine the dom children of all loop blocks, not just
854  // those which are the IDom of the exit blocks. This is because blocks
855  // reachable from the exit blocks can have their IDom as the nearest common
856  // dominator of the exit blocks.
857  for (auto *BB : L->blocks()) {
858  auto *DomNodeBB = DT->getNode(BB);
859  for (auto *DomChild : DomNodeBB->getChildren()) {
860  auto *DomChildBB = DomChild->getBlock();
861  if (!L->contains(LI->getLoopFor(DomChildBB)))
862  ChildrenToUpdate.push_back(DomChildBB);
863  }
864  }
865  for (auto *BB : ChildrenToUpdate)
866  DT->changeImmediateDominator(BB, PreHeader);
867  }
868 
869  // Loop structure should be the following:
870  // Epilog Prolog
871  //
872  // PreHeader PreHeader
873  // NewPreHeader PrologPreHeader
874  // Header PrologHeader
875  // ... ...
876  // Latch PrologLatch
877  // NewExit PrologExit
878  // EpilogPreHeader NewPreHeader
879  // EpilogHeader Header
880  // ... ...
881  // EpilogLatch Latch
882  // LatchExit LatchExit
883 
884  // Rewrite the cloned instruction operands to use the values created when the
885  // clone is created.
886  for (BasicBlock *BB : NewBlocks) {
887  for (Instruction &I : *BB) {
888  RemapInstruction(&I, VMap,
890  }
891  }
892 
893  if (UseEpilogRemainder) {
894  // Connect the epilog code to the original loop and update the
895  // PHI functions.
896  ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
897  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
898  PreserveLCSSA);
899 
900  // Update counter in loop for unrolling.
901  // I should be multiply of Count.
902  IRBuilder<> B2(NewPreHeader->getTerminator());
903  Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
904  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
905  B2.SetInsertPoint(LatchBR);
906  PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
907  Header->getFirstNonPHI());
908  Value *IdxSub =
909  B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
910  NewIdx->getName() + ".nsub");
911  Value *IdxCmp;
912  if (LatchBR->getSuccessor(0) == Header)
913  IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
914  else
915  IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
916  NewIdx->addIncoming(TestVal, NewPreHeader);
917  NewIdx->addIncoming(IdxSub, Latch);
918  LatchBR->setCondition(IdxCmp);
919  } else {
920  // Connect the prolog code to the original loop and update the
921  // PHI functions.
922  ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
923  NewPreHeader, VMap, DT, LI, PreserveLCSSA);
924  }
925 
926  // If this loop is nested, then the loop unroller changes the code in the any
927  // of its parent loops, so the Scalar Evolution pass needs to be run again.
928  SE->forgetTopmostLoop(L);
929 
930  // Verify that the Dom Tree is correct.
931 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
932  if (DT)
934 #endif
935 
936  // Canonicalize to LoopSimplifyForm both original and remainder loops. We
937  // cannot rely on the LoopUnrollPass to do this because it only does
938  // canonicalization for parent/subloops and not the sibling loops.
939  if (OtherExits.size() > 0) {
940  // Generate dedicated exit blocks for the original loop, to preserve
941  // LoopSimplifyForm.
942  formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
943  // Generate dedicated exit blocks for the remainder loop if one exists, to
944  // preserve LoopSimplifyForm.
945  if (remainderLoop)
946  formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
947  }
948 
949  auto UnrollResult = LoopUnrollResult::Unmodified;
950  if (remainderLoop && UnrollRemainder) {
951  LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
952  UnrollResult =
953  UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
954  /*Force*/ false, /*AllowRuntime*/ false,
955  /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
956  /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
957  /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
958  /*ORE*/ nullptr, PreserveLCSSA);
959  }
960 
961  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
962  *ResultLoop = remainderLoop;
963  NumRuntimeUnrolled++;
964  return true;
965 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:854
static bool canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can safely unroll a multi-exit/exiting loop.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return an i1 value testing if Arg is not null.
Definition: IRBuilder.h:2116
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1855
iterator end()
Definition: Function.h:658
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
A cache of @llvm.assume calls within a function.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:864
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:335
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the edge connecting specified block.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:690
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI)
Create a clone of the blocks in a loop and connect them together.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1014
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
RPOIterator endRPO() const
Definition: LoopIterator.h:141
BlockT * getHeader() const
Definition: LoopInfo.h:100
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:239
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:163
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:43
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:171
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:817
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:73
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling prolog code to the original loop.
The loop was fully unrolled into straight-line code.
NodeT * getBlock() const
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:190
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:308
void dump() const
Definition: LoopInfo.cpp:401
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:234
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:257
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:329
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:102
self_iterator getIterator()
Definition: ilist_node.h:82
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
size_t size() const
Definition: SmallVector.h:53
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling epilog code to the original loop.
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:40
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Iterator for intrusive lists based on ilist_node.
Type * getType() const
Return the LLVM type of this SCEV expression.
void setIncomingBlock(unsigned i, BasicBlock *BB)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:125
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1093
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:243
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:190
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:98
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM...
Definition: ValueMapper.h:251
This class uses information about analyze scalars to rewrite expressions in canonical form...
If this flag is set, the remapper ignores missing function-local entries (Argument, Instruction, BasicBlock) that are not in the value map.
Definition: ValueMapper.h:91
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
bool hasValue() const
Definition: Optional.h:167
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:193
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:215
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:633
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:325
bool isUnconditional() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1164
void setCondition(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(Instruction *I)
Definition: CFG.h:264
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:137
The loop was not modified.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:49
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:100
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const SCEV * getExitCount(const Loop *L, BasicBlock *ExitingBlock)
Get the expression for the number of loop iterations for which this loop is guaranteed not to exit vi...
void setIncomingValue(unsigned i, Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
bool erase(const KeyT &Val)
Definition: ValueMap.h:197
void forgetTopmostLoop(const Loop *L)