LLVM  13.0.0git
LoopUnrollRuntime.cpp
Go to the documentation of this file.
1 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities for loops with run-time
10 // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
11 // trip counts.
12 //
13 // The functions in this file are used to generate extra code when the
14 // run-time trip count modulo the unroll factor is not 0. When this is the
15 // case, we need to generate code to execute these 'left over' iterations.
16 //
17 // The current strategy generates an if-then-else sequence prior to the
18 // unrolled loop to execute the 'left over' iterations before or after the
19 // unrolled loop.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Module.h"
33 #include "llvm/Support/Debug.h"
35 #include "llvm/Transforms/Utils.h"
41 #include <algorithm>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "loop-unroll"
46 
47 STATISTIC(NumRuntimeUnrolled,
48  "Number of loops unrolled with run-time trip counts");
50  "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
51  cl::desc("Allow runtime unrolling for loops with multiple exits, when "
52  "epilog is generated"));
54  "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
55  cl::desc("Assume the non latch exit block to be predictable"));
56 
57 /// Connect the unrolling prolog code to the original loop.
58 /// The unrolling prolog code contains code to execute the
59 /// 'extra' iterations if the run-time trip count modulo the
60 /// unroll count is non-zero.
61 ///
62 /// This function performs the following:
63 /// - Create PHI nodes at prolog end block to combine values
64 /// that exit the prolog code and jump around the prolog.
65 /// - Add a PHI operand to a PHI node at the loop exit block
66 /// for values that exit the prolog and go around the loop.
67 /// - Branch around the original loop if the trip count is less
68 /// than the unroll factor.
69 ///
70 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
71  BasicBlock *PrologExit,
72  BasicBlock *OriginalLoopLatchExit,
73  BasicBlock *PreHeader, BasicBlock *NewPreHeader,
75  LoopInfo *LI, bool PreserveLCSSA) {
76  // Loop structure should be the following:
77  // Preheader
78  // PrologHeader
79  // ...
80  // PrologLatch
81  // PrologExit
82  // NewPreheader
83  // Header
84  // ...
85  // Latch
86  // LatchExit
87  BasicBlock *Latch = L->getLoopLatch();
88  assert(Latch && "Loop must have a latch");
89  BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
90 
91  // Create a PHI node for each outgoing value from the original loop
92  // (which means it is an outgoing value from the prolog code too).
93  // The new PHI node is inserted in the prolog end basic block.
94  // The new PHI node value is added as an operand of a PHI node in either
95  // the loop header or the loop exit block.
96  for (BasicBlock *Succ : successors(Latch)) {
97  for (PHINode &PN : Succ->phis()) {
98  // Add a new PHI node to the prolog end block and add the
99  // appropriate incoming values.
100  // TODO: This code assumes that the PrologExit (or the LatchExit block for
101  // prolog loop) contains only one predecessor from the loop, i.e. the
102  // PrologLatch. When supporting multiple-exiting block loops, we can have
103  // two or more blocks that have the LatchExit as the target in the
104  // original loop.
105  PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
106  PrologExit->getFirstNonPHI());
107  // Adding a value to the new PHI node from the original loop preheader.
108  // This is the value that skips all the prolog code.
109  if (L->contains(&PN)) {
110  // Succ is loop header.
111  NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
112  PreHeader);
113  } else {
114  // Succ is LatchExit.
115  NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
116  }
117 
118  Value *V = PN.getIncomingValueForBlock(Latch);
119  if (Instruction *I = dyn_cast<Instruction>(V)) {
120  if (L->contains(I)) {
121  V = VMap.lookup(I);
122  }
123  }
124  // Adding a value to the new PHI node from the last prolog block
125  // that was created.
126  NewPN->addIncoming(V, PrologLatch);
127 
128  // Update the existing PHI node operand with the value from the
129  // new PHI node. How this is done depends on if the existing
130  // PHI node is in the original loop block, or the exit block.
131  if (L->contains(&PN))
132  PN.setIncomingValueForBlock(NewPreHeader, NewPN);
133  else
134  PN.addIncoming(NewPN, PrologExit);
135  }
136  }
137 
138  // Make sure that created prolog loop is in simplified form
139  SmallVector<BasicBlock *, 4> PrologExitPreds;
140  Loop *PrologLoop = LI->getLoopFor(PrologLatch);
141  if (PrologLoop) {
142  for (BasicBlock *PredBB : predecessors(PrologExit))
143  if (PrologLoop->contains(PredBB))
144  PrologExitPreds.push_back(PredBB);
145 
146  SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
147  nullptr, PreserveLCSSA);
148  }
149 
150  // Create a branch around the original loop, which is taken if there are no
151  // iterations remaining to be executed after running the prologue.
152  Instruction *InsertPt = PrologExit->getTerminator();
153  IRBuilder<> B(InsertPt);
154 
155  assert(Count != 0 && "nonsensical Count!");
156 
157  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
158  // This means %xtraiter is (BECount + 1) and all of the iterations of this
159  // loop were executed by the prologue. Note that if BECount <u (Count - 1)
160  // then (BECount + 1) cannot unsigned-overflow.
161  Value *BrLoopExit =
162  B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
163  // Split the exit to maintain loop canonicalization guarantees
164  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
165  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
166  nullptr, PreserveLCSSA);
167  // Add the branch to the exit block (around the unrolled loop)
168  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
169  InsertPt->eraseFromParent();
170  if (DT)
171  DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
172 }
173 
174 /// Connect the unrolling epilog code to the original loop.
175 /// The unrolling epilog code contains code to execute the
176 /// 'extra' iterations if the run-time trip count modulo the
177 /// unroll count is non-zero.
178 ///
179 /// This function performs the following:
180 /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
181 /// - Create PHI nodes at the unrolling loop exit to combine
182 /// values that exit the unrolling loop code and jump around it.
183 /// - Update PHI operands in the epilog loop by the new PHI nodes
184 /// - Branch around the epilog loop if extra iters (ModVal) is zero.
185 ///
186 static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
187  BasicBlock *Exit, BasicBlock *PreHeader,
188  BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
189  ValueToValueMapTy &VMap, DominatorTree *DT,
190  LoopInfo *LI, bool PreserveLCSSA) {
191  BasicBlock *Latch = L->getLoopLatch();
192  assert(Latch && "Loop must have a latch");
193  BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
194 
195  // Loop structure should be the following:
196  //
197  // PreHeader
198  // NewPreHeader
199  // Header
200  // ...
201  // Latch
202  // NewExit (PN)
203  // EpilogPreHeader
204  // EpilogHeader
205  // ...
206  // EpilogLatch
207  // Exit (EpilogPN)
208 
209  // Update PHI nodes at NewExit and Exit.
210  for (PHINode &PN : NewExit->phis()) {
211  // PN should be used in another PHI located in Exit block as
212  // Exit was split by SplitBlockPredecessors into Exit and NewExit
213  // Basicaly it should look like:
214  // NewExit:
215  // PN = PHI [I, Latch]
216  // ...
217  // Exit:
218  // EpilogPN = PHI [PN, EpilogPreHeader]
219  //
220  // There is EpilogPreHeader incoming block instead of NewExit as
221  // NewExit was spilt 1 more time to get EpilogPreHeader.
222  assert(PN.hasOneUse() && "The phi should have 1 use");
223  PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
224  assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
225 
226  // Add incoming PreHeader from branch around the Loop
227  PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
228 
229  Value *V = PN.getIncomingValueForBlock(Latch);
230  Instruction *I = dyn_cast<Instruction>(V);
231  if (I && L->contains(I))
232  // If value comes from an instruction in the loop add VMap value.
233  V = VMap.lookup(I);
234  // For the instruction out of the loop, constant or undefined value
235  // insert value itself.
236  EpilogPN->addIncoming(V, EpilogLatch);
237 
238  assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
239  "EpilogPN should have EpilogPreHeader incoming block");
240  // Change EpilogPreHeader incoming block to NewExit.
241  EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
242  NewExit);
243  // Now PHIs should look like:
244  // NewExit:
245  // PN = PHI [I, Latch], [undef, PreHeader]
246  // ...
247  // Exit:
248  // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
249  }
250 
251  // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
252  // Update corresponding PHI nodes in epilog loop.
253  for (BasicBlock *Succ : successors(Latch)) {
254  // Skip this as we already updated phis in exit blocks.
255  if (!L->contains(Succ))
256  continue;
257  for (PHINode &PN : Succ->phis()) {
258  // Add new PHI nodes to the loop exit block and update epilog
259  // PHIs with the new PHI values.
260  PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
261  NewExit->getFirstNonPHI());
262  // Adding a value to the new PHI node from the unrolling loop preheader.
263  NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
264  // Adding a value to the new PHI node from the unrolling loop latch.
265  NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
266 
267  // Update the existing PHI node operand with the value from the new PHI
268  // node. Corresponding instruction in epilog loop should be PHI.
269  PHINode *VPN = cast<PHINode>(VMap[&PN]);
270  VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
271  }
272  }
273 
274  Instruction *InsertPt = NewExit->getTerminator();
275  IRBuilder<> B(InsertPt);
276  Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
277  assert(Exit && "Loop must have a single exit block only");
278  // Split the epilogue exit to maintain loop canonicalization guarantees
280  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
281  PreserveLCSSA);
282  // Add the branch to the exit block (around the unrolling loop)
283  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
284  InsertPt->eraseFromParent();
285  if (DT)
286  DT->changeImmediateDominator(Exit, NewExit);
287 
288  // Split the main loop exit to maintain canonicalization guarantees.
289  SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
290  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
291  PreserveLCSSA);
292 }
293 
294 /// Create a clone of the blocks in a loop and connect them together.
295 /// If CreateRemainderLoop is false, loop structure will not be cloned,
296 /// otherwise a new loop will be created including all cloned blocks, and the
297 /// iterator of it switches to count NewIter down to 0.
298 /// The cloned blocks should be inserted between InsertTop and InsertBot.
299 /// If loop structure is cloned InsertTop should be new preheader, InsertBot
300 /// new loop exit.
301 /// Return the new cloned loop that is created when CreateRemainderLoop is true.
302 static Loop *
303 CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
304  const bool UseEpilogRemainder, const bool UnrollRemainder,
305  BasicBlock *InsertTop,
306  BasicBlock *InsertBot, BasicBlock *Preheader,
307  std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
308  ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
309  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
310  BasicBlock *Header = L->getHeader();
311  BasicBlock *Latch = L->getLoopLatch();
312  Function *F = Header->getParent();
313  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
314  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
315  Loop *ParentLoop = L->getParentLoop();
316  NewLoopsMap NewLoops;
317  NewLoops[ParentLoop] = ParentLoop;
318  if (!CreateRemainderLoop)
319  NewLoops[L] = ParentLoop;
320 
321  // For each block in the original loop, create a new copy,
322  // and update the value map with the newly created values.
323  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
324  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
325  NewBlocks.push_back(NewBB);
326 
327  // If we're unrolling the outermost loop, there's no remainder loop,
328  // and this block isn't in a nested loop, then the new block is not
329  // in any loop. Otherwise, add it to loopinfo.
330  if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
331  addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
332 
333  VMap[*BB] = NewBB;
334  if (Header == *BB) {
335  // For the first block, add a CFG connection to this newly
336  // created block.
337  InsertTop->getTerminator()->setSuccessor(0, NewBB);
338  }
339 
340  if (DT) {
341  if (Header == *BB) {
342  // The header is dominated by the preheader.
343  DT->addNewBlock(NewBB, InsertTop);
344  } else {
345  // Copy information from original loop to unrolled loop.
346  BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
347  DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
348  }
349  }
350 
351  if (Latch == *BB) {
352  // For the last block, if CreateRemainderLoop is false, create a direct
353  // jump to InsertBot. If not, create a loop back to cloned head.
354  VMap.erase((*BB)->getTerminator());
355  BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
356  BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
357  IRBuilder<> Builder(LatchBR);
358  if (!CreateRemainderLoop) {
359  Builder.CreateBr(InsertBot);
360  } else {
361  PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
362  suffix + ".iter",
363  FirstLoopBB->getFirstNonPHI());
364  Value *IdxSub =
365  Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
366  NewIdx->getName() + ".sub");
367  Value *IdxCmp =
368  Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
369  Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
370  NewIdx->addIncoming(NewIter, InsertTop);
371  NewIdx->addIncoming(IdxSub, NewBB);
372  }
373  LatchBR->eraseFromParent();
374  }
375  }
376 
377  // Change the incoming values to the ones defined in the preheader or
378  // cloned loop.
379  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
380  PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
381  if (!CreateRemainderLoop) {
382  if (UseEpilogRemainder) {
383  unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
384  NewPHI->setIncomingBlock(idx, InsertTop);
385  NewPHI->removeIncomingValue(Latch, false);
386  } else {
387  VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
388  cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
389  }
390  } else {
391  unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
392  NewPHI->setIncomingBlock(idx, InsertTop);
393  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
394  idx = NewPHI->getBasicBlockIndex(Latch);
395  Value *InVal = NewPHI->getIncomingValue(idx);
396  NewPHI->setIncomingBlock(idx, NewLatch);
397  if (Value *V = VMap.lookup(InVal))
398  NewPHI->setIncomingValue(idx, V);
399  }
400  }
401  if (CreateRemainderLoop) {
402  Loop *NewLoop = NewLoops[L];
403  assert(NewLoop && "L should have been cloned");
404  MDNode *LoopID = NewLoop->getLoopID();
405 
406  // Only add loop metadata if the loop is not going to be completely
407  // unrolled.
408  if (UnrollRemainder)
409  return NewLoop;
410 
413  if (NewLoopID.hasValue()) {
414  NewLoop->setLoopID(NewLoopID.getValue());
415 
416  // Do not setLoopAlreadyUnrolled if loop attributes have been defined
417  // explicitly.
418  return NewLoop;
419  }
420 
421  // Add unroll disable metadata to disable future unrolling for this loop.
422  NewLoop->setLoopAlreadyUnrolled();
423  return NewLoop;
424  }
425  else
426  return nullptr;
427 }
428 
429 /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
430 /// is populated with all the loop exit blocks other than the LatchExit block.
431 static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
432  bool PreserveLCSSA,
433  bool UseEpilogRemainder) {
434 
435  // We currently have some correctness constrains in unrolling a multi-exit
436  // loop. Check for these below.
437 
438  // We rely on LCSSA form being preserved when the exit blocks are transformed.
439  if (!PreserveLCSSA)
440  return false;
441 
442  // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
443  // UnrollRuntimeMultiExit is true. This will need updating the logic in
444  // connectEpilog/connectProlog.
445  if (!LatchExit->getSinglePredecessor()) {
446  LLVM_DEBUG(
447  dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
448  "predecessor.\n");
449  return false;
450  }
451  // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
452  // and L is an inner loop. This is because in presence of multiple exits, the
453  // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
454  // outer loop. This is automatically handled in the prolog case, so we do not
455  // have that bug in prolog generation.
456  if (UseEpilogRemainder && L->getParentLoop())
457  return false;
458 
459  // All constraints have been satisfied.
460  return true;
461 }
462 
463 /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
464 /// we return true only if UnrollRuntimeMultiExit is set to true.
466  Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
467  bool PreserveLCSSA, bool UseEpilogRemainder) {
468 
469 #if !defined(NDEBUG)
470  assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
471  UseEpilogRemainder) &&
472  "Should be safe to unroll before checking profitability!");
473 #endif
474 
475  // Priority goes to UnrollRuntimeMultiExit if it's supplied.
477  return UnrollRuntimeMultiExit;
478 
479  // The main pain point with multi-exit loop unrolling is that once unrolled,
480  // we will not be able to merge all blocks into a straight line code.
481  // There are branches within the unrolled loop that go to the OtherExits.
482  // The second point is the increase in code size, but this is true
483  // irrespective of multiple exits.
484 
485  // Note: Both the heuristics below are coarse grained. We are essentially
486  // enabling unrolling of loops that have a single side exit other than the
487  // normal LatchExit (i.e. exiting into a deoptimize block).
488  // The heuristics considered are:
489  // 1. low number of branches in the unrolled version.
490  // 2. high predictability of these extra branches.
491  // We avoid unrolling loops that have more than two exiting blocks. This
492  // limits the total number of branches in the unrolled loop to be atmost
493  // the unroll factor (since one of the exiting blocks is the latch block).
494  SmallVector<BasicBlock*, 4> ExitingBlocks;
495  L->getExitingBlocks(ExitingBlocks);
496  if (ExitingBlocks.size() > 2)
497  return false;
498 
499  // Allow unrolling of loops with no non latch exit blocks.
500  if (OtherExits.size() == 0)
501  return true;
502 
503  // The second heuristic is that L has one exit other than the latchexit and
504  // that exit is a deoptimize block. We know that deoptimize blocks are rarely
505  // taken, which also implies the branch leading to the deoptimize block is
506  // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
507  // assume the other exit branch is predictable even if it has no deoptimize
508  // call.
509  return (OtherExits.size() == 1 &&
511  OtherExits[0]->getTerminatingDeoptimizeCall()));
512  // TODO: These can be fine-tuned further to consider code size or deopt states
513  // that are captured by the deoptimize exit block.
514  // Also, we can extend this to support more cases, if we actually
515  // know of kinds of multiexit loops that would benefit from unrolling.
516 }
517 
518 // Assign the maximum possible trip count as the back edge weight for the
519 // remainder loop if the original loop comes with a branch weight.
521  Loop *RemainderLoop,
522  uint64_t UnrollFactor) {
523  uint64_t TrueWeight, FalseWeight;
524  BranchInst *LatchBR =
525  cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
526  if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
527  uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
528  ? FalseWeight
529  : TrueWeight;
530  assert(UnrollFactor > 1);
531  uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
532  BasicBlock *Header = RemainderLoop->getHeader();
533  BasicBlock *Latch = RemainderLoop->getLoopLatch();
534  auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
535  unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
536  MDBuilder MDB(RemainderLatchBR->getContext());
537  MDNode *WeightNode =
538  HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
539  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
540  RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
541  }
542 }
543 
544 /// Insert code in the prolog/epilog code when unrolling a loop with a
545 /// run-time trip-count.
546 ///
547 /// This method assumes that the loop unroll factor is total number
548 /// of loop bodies in the loop after unrolling. (Some folks refer
549 /// to the unroll factor as the number of *extra* copies added).
550 /// We assume also that the loop unroll factor is a power-of-two. So, after
551 /// unrolling the loop, the number of loop bodies executed is 2,
552 /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
553 /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
554 /// the switch instruction is generated.
555 ///
556 /// ***Prolog case***
557 /// extraiters = tripcount % loopfactor
558 /// if (extraiters == 0) jump Loop:
559 /// else jump Prol:
560 /// Prol: LoopBody;
561 /// extraiters -= 1 // Omitted if unroll factor is 2.
562 /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
563 /// if (tripcount < loopfactor) jump End:
564 /// Loop:
565 /// ...
566 /// End:
567 ///
568 /// ***Epilog case***
569 /// extraiters = tripcount % loopfactor
570 /// if (tripcount < loopfactor) jump LoopExit:
571 /// unroll_iters = tripcount - extraiters
572 /// Loop: LoopBody; (executes unroll_iter times);
573 /// unroll_iter -= 1
574 /// if (unroll_iter != 0) jump Loop:
575 /// LoopExit:
576 /// if (extraiters == 0) jump EpilExit:
577 /// Epil: LoopBody; (executes extraiters times)
578 /// extraiters -= 1 // Omitted if unroll factor is 2.
579 /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
580 /// EpilExit:
581 
583  Loop *L, unsigned Count, bool AllowExpensiveTripCount,
584  bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
586  const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
587  LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
588  LLVM_DEBUG(L->dump());
589  LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
590  : dbgs() << "Using prolog remainder.\n");
591 
592  // Make sure the loop is in canonical form.
593  if (!L->isLoopSimplifyForm()) {
594  LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
595  return false;
596  }
597 
598  // Guaranteed by LoopSimplifyForm.
599  BasicBlock *Latch = L->getLoopLatch();
600  BasicBlock *Header = L->getHeader();
601 
602  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
603 
604  if (!LatchBR || LatchBR->isUnconditional()) {
605  // The loop-rotate pass can be helpful to avoid this in many cases.
606  LLVM_DEBUG(
607  dbgs()
608  << "Loop latch not terminated by a conditional branch.\n");
609  return false;
610  }
611 
612  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
613  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
614 
615  if (L->contains(LatchExit)) {
616  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
617  // targets of the Latch be an exit block out of the loop.
618  LLVM_DEBUG(
619  dbgs()
620  << "One of the loop latch successors must be the exit block.\n");
621  return false;
622  }
623 
624  // These are exit blocks other than the target of the latch exiting block.
625  SmallVector<BasicBlock *, 4> OtherExits;
626  L->getUniqueNonLatchExitBlocks(OtherExits);
627  bool isMultiExitUnrollingEnabled =
628  canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
629  UseEpilogRemainder) &&
630  canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
631  UseEpilogRemainder);
632  // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
633  if (!isMultiExitUnrollingEnabled &&
634  (!L->getExitingBlock() || OtherExits.size())) {
635  LLVM_DEBUG(
636  dbgs()
637  << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
638  "enabled!\n");
639  return false;
640  }
641  // Use Scalar Evolution to compute the trip count. This allows more loops to
642  // be unrolled than relying on induction var simplification.
643  if (!SE)
644  return false;
645 
646  // Only unroll loops with a computable trip count, and the trip count needs
647  // to be an int value (allowing a pointer type is a TODO item).
648  // We calculate the backedge count by using getExitCount on the Latch block,
649  // which is proven to be the only exiting block in this loop. This is same as
650  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
651  // exiting blocks).
652  const SCEV *BECountSC = SE->getExitCount(L, Latch);
653  if (isa<SCEVCouldNotCompute>(BECountSC) ||
654  !BECountSC->getType()->isIntegerTy()) {
655  LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
656  return false;
657  }
658 
659  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
660 
661  // Add 1 since the backedge count doesn't include the first loop iteration.
662  const SCEV *TripCountSC =
663  SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
664  if (isa<SCEVCouldNotCompute>(TripCountSC)) {
665  LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
666  return false;
667  }
668 
669  BasicBlock *PreHeader = L->getLoopPreheader();
670  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
671  const DataLayout &DL = Header->getModule()->getDataLayout();
672  SCEVExpander Expander(*SE, DL, "loop-unroll");
673  if (!AllowExpensiveTripCount &&
674  Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
675  TTI, PreHeaderBR)) {
676  LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
677  return false;
678  }
679 
680  // This constraint lets us deal with an overflowing trip count easily; see the
681  // comment on ModVal below.
682  if (Log2_32(Count) > BEWidth) {
683  LLVM_DEBUG(
684  dbgs()
685  << "Count failed constraint on overflow trip count calculation.\n");
686  return false;
687  }
688 
689  // Loop structure is the following:
690  //
691  // PreHeader
692  // Header
693  // ...
694  // Latch
695  // LatchExit
696 
697  BasicBlock *NewPreHeader;
698  BasicBlock *NewExit = nullptr;
699  BasicBlock *PrologExit = nullptr;
700  BasicBlock *EpilogPreHeader = nullptr;
701  BasicBlock *PrologPreHeader = nullptr;
702 
703  if (UseEpilogRemainder) {
704  // If epilog remainder
705  // Split PreHeader to insert a branch around loop for unrolling.
706  NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
707  NewPreHeader->setName(PreHeader->getName() + ".new");
708  // Split LatchExit to create phi nodes from branch above.
709  SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
710  NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
711  nullptr, PreserveLCSSA);
712  // NewExit gets its DebugLoc from LatchExit, which is not part of the
713  // original Loop.
714  // Fix this by setting Loop's DebugLoc to NewExit.
715  auto *NewExitTerminator = NewExit->getTerminator();
716  NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
717  // Split NewExit to insert epilog remainder loop.
718  EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
719  EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
720  } else {
721  // If prolog remainder
722  // Split the original preheader twice to insert prolog remainder loop
723  PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
724  PrologPreHeader->setName(Header->getName() + ".prol.preheader");
725  PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
726  DT, LI);
727  PrologExit->setName(Header->getName() + ".prol.loopexit");
728  // Split PrologExit to get NewPreHeader.
729  NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
730  NewPreHeader->setName(PreHeader->getName() + ".new");
731  }
732  // Loop structure should be the following:
733  // Epilog Prolog
734  //
735  // PreHeader PreHeader
736  // *NewPreHeader *PrologPreHeader
737  // Header *PrologExit
738  // ... *NewPreHeader
739  // Latch Header
740  // *NewExit ...
741  // *EpilogPreHeader Latch
742  // LatchExit LatchExit
743 
744  // Calculate conditions for branch around loop for unrolling
745  // in epilog case and around prolog remainder loop in prolog case.
746  // Compute the number of extra iterations required, which is:
747  // extra iterations = run-time trip count % loop unroll factor
748  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
749  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
750  PreHeaderBR);
751  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
752  PreHeaderBR);
753  IRBuilder<> B(PreHeaderBR);
754  Value *ModVal;
755  // Calculate ModVal = (BECount + 1) % Count.
756  // Note that TripCount is BECount + 1.
757  if (isPowerOf2_32(Count)) {
758  // When Count is power of 2 we don't BECount for epilog case, however we'll
759  // need it for a branch around unrolling loop for prolog case.
760  ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
761  // 1. There are no iterations to be run in the prolog/epilog loop.
762  // OR
763  // 2. The addition computing TripCount overflowed.
764  //
765  // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
766  // the number of iterations that remain to be run in the original loop is a
767  // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
768  // explicitly check this above).
769  } else {
770  // As (BECount + 1) can potentially unsigned overflow we count
771  // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
772  Value *ModValTmp = B.CreateURem(BECount,
773  ConstantInt::get(BECount->getType(),
774  Count));
775  Value *ModValAdd = B.CreateAdd(ModValTmp,
776  ConstantInt::get(ModValTmp->getType(), 1));
777  // At that point (BECount % Count) + 1 could be equal to Count.
778  // To handle this case we need to take mod by Count one more time.
779  ModVal = B.CreateURem(ModValAdd,
780  ConstantInt::get(BECount->getType(), Count),
781  "xtraiter");
782  }
783  Value *BranchVal =
784  UseEpilogRemainder ? B.CreateICmpULT(BECount,
785  ConstantInt::get(BECount->getType(),
786  Count - 1)) :
787  B.CreateIsNotNull(ModVal, "lcmp.mod");
788  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
789  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
790  // Branch to either remainder (extra iterations) loop or unrolling loop.
791  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
792  PreHeaderBR->eraseFromParent();
793  if (DT) {
794  if (UseEpilogRemainder)
795  DT->changeImmediateDominator(NewExit, PreHeader);
796  else
797  DT->changeImmediateDominator(PrologExit, PreHeader);
798  }
799  Function *F = Header->getParent();
800  // Get an ordered list of blocks in the loop to help with the ordering of the
801  // cloned blocks in the prolog/epilog code
802  LoopBlocksDFS LoopBlocks(L);
803  LoopBlocks.perform(LI);
804 
805  //
806  // For each extra loop iteration, create a copy of the loop's basic blocks
807  // and generate a condition that branches to the copy depending on the
808  // number of 'left over' iterations.
809  //
810  std::vector<BasicBlock *> NewBlocks;
811  ValueToValueMapTy VMap;
812 
813  // For unroll factor 2 remainder loop will have 1 iterations.
814  // Do not create 1 iteration loop.
815  bool CreateRemainderLoop = (Count != 2);
816 
817  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
818  // the loop, otherwise we create a cloned loop to execute the extra
819  // iterations. This function adds the appropriate CFG connections.
820  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
821  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
822  Loop *remainderLoop = CloneLoopBlocks(
823  L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
824  InsertTop, InsertBot,
825  NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
826 
827  // Assign the maximum possible trip count as the back edge weight for the
828  // remainder loop if the original loop comes with a branch weight.
829  if (remainderLoop && !UnrollRemainder)
830  updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
831 
832  // Insert the cloned blocks into the function.
833  F->getBasicBlockList().splice(InsertBot->getIterator(),
834  F->getBasicBlockList(),
835  NewBlocks[0]->getIterator(),
836  F->end());
837 
838  // Now the loop blocks are cloned and the other exiting blocks from the
839  // remainder are connected to the original Loop's exit blocks. The remaining
840  // work is to update the phi nodes in the original loop, and take in the
841  // values from the cloned region.
842  for (auto *BB : OtherExits) {
843  for (auto &II : *BB) {
844 
845  // Given we preserve LCSSA form, we know that the values used outside the
846  // loop will be used through these phi nodes at the exit blocks that are
847  // transformed below.
848  if (!isa<PHINode>(II))
849  break;
850  PHINode *Phi = cast<PHINode>(&II);
851  unsigned oldNumOperands = Phi->getNumIncomingValues();
852  // Add the incoming values from the remainder code to the end of the phi
853  // node.
854  for (unsigned i =0; i < oldNumOperands; i++){
855  Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
856  // newVal can be a constant or derived from values outside the loop, and
857  // hence need not have a VMap value. Also, since lookup already generated
858  // a default "null" VMap entry for this value, we need to populate that
859  // VMap entry correctly, with the mapped entry being itself.
860  if (!newVal) {
861  newVal = Phi->getIncomingValue(i);
862  VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
863  }
864  Phi->addIncoming(newVal,
865  cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
866  }
867  }
868 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
869  for (BasicBlock *SuccBB : successors(BB)) {
870  assert(!(any_of(OtherExits,
871  [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
872  SuccBB == LatchExit) &&
873  "Breaks the definition of dedicated exits!");
874  }
875 #endif
876  }
877 
878  // Update the immediate dominator of the exit blocks and blocks that are
879  // reachable from the exit blocks. This is needed because we now have paths
880  // from both the original loop and the remainder code reaching the exit
881  // blocks. While the IDom of these exit blocks were from the original loop,
882  // now the IDom is the preheader (which decides whether the original loop or
883  // remainder code should run).
884  if (DT && !L->getExitingBlock()) {
885  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
886  // NB! We have to examine the dom children of all loop blocks, not just
887  // those which are the IDom of the exit blocks. This is because blocks
888  // reachable from the exit blocks can have their IDom as the nearest common
889  // dominator of the exit blocks.
890  for (auto *BB : L->blocks()) {
891  auto *DomNodeBB = DT->getNode(BB);
892  for (auto *DomChild : DomNodeBB->children()) {
893  auto *DomChildBB = DomChild->getBlock();
894  if (!L->contains(LI->getLoopFor(DomChildBB)))
895  ChildrenToUpdate.push_back(DomChildBB);
896  }
897  }
898  for (auto *BB : ChildrenToUpdate)
899  DT->changeImmediateDominator(BB, PreHeader);
900  }
901 
902  // Loop structure should be the following:
903  // Epilog Prolog
904  //
905  // PreHeader PreHeader
906  // NewPreHeader PrologPreHeader
907  // Header PrologHeader
908  // ... ...
909  // Latch PrologLatch
910  // NewExit PrologExit
911  // EpilogPreHeader NewPreHeader
912  // EpilogHeader Header
913  // ... ...
914  // EpilogLatch Latch
915  // LatchExit LatchExit
916 
917  // Rewrite the cloned instruction operands to use the values created when the
918  // clone is created.
919  for (BasicBlock *BB : NewBlocks) {
920  for (Instruction &I : *BB) {
921  RemapInstruction(&I, VMap,
923  }
924  }
925 
926  if (UseEpilogRemainder) {
927  // Connect the epilog code to the original loop and update the
928  // PHI functions.
929  ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
930  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
931  PreserveLCSSA);
932 
933  // Update counter in loop for unrolling.
934  // I should be multiply of Count.
935  IRBuilder<> B2(NewPreHeader->getTerminator());
936  Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
937  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
938  B2.SetInsertPoint(LatchBR);
939  PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
940  Header->getFirstNonPHI());
941  Value *IdxSub =
942  B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
943  NewIdx->getName() + ".nsub");
944  Value *IdxCmp;
945  if (LatchBR->getSuccessor(0) == Header)
946  IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
947  else
948  IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
949  NewIdx->addIncoming(TestVal, NewPreHeader);
950  NewIdx->addIncoming(IdxSub, Latch);
951  LatchBR->setCondition(IdxCmp);
952  } else {
953  // Connect the prolog code to the original loop and update the
954  // PHI functions.
955  ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
956  NewPreHeader, VMap, DT, LI, PreserveLCSSA);
957  }
958 
959  // If this loop is nested, then the loop unroller changes the code in the any
960  // of its parent loops, so the Scalar Evolution pass needs to be run again.
961  SE->forgetTopmostLoop(L);
962 
963  // Verify that the Dom Tree is correct.
964 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
965  if (DT)
967 #endif
968 
969  // Canonicalize to LoopSimplifyForm both original and remainder loops. We
970  // cannot rely on the LoopUnrollPass to do this because it only does
971  // canonicalization for parent/subloops and not the sibling loops.
972  if (OtherExits.size() > 0) {
973  // Generate dedicated exit blocks for the original loop, to preserve
974  // LoopSimplifyForm.
975  formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
976  // Generate dedicated exit blocks for the remainder loop if one exists, to
977  // preserve LoopSimplifyForm.
978  if (remainderLoop)
979  formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
980  }
981 
982  auto UnrollResult = LoopUnrollResult::Unmodified;
983  if (remainderLoop && UnrollRemainder) {
984  LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
985  UnrollResult =
986  UnrollLoop(remainderLoop,
987  {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
988  /*Force*/ false, /*AllowRuntime*/ false,
989  /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
990  /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
991  /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
992  LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
993  }
994 
995  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
996  *ResultLoop = remainderLoop;
997  NumRuntimeUnrolled++;
998  return true;
999 }
UnrollRuntimeMultiExit
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
i
i
Definition: README.txt:29
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:184
llvm
Definition: AllocatorList.h:23
llvm::addClonedBlockToLoopInfo
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:137
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
Metadata.h
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
ScalarEvolutionExpander.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::LLVMLoopUnrollFollowupRemainder
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::PHINode::removeIncomingValue
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Definition: Instructions.cpp:109
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:328
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::SCEVExpander::expandCodeFor
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
Definition: ScalarEvolutionExpander.h:270
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
ConnectProlog
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling prolog code to the original loop.
Definition: LoopUnrollRuntime.cpp:70
ScalarEvolution.h
Module.h
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::Optional
Definition: APInt.h:34
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:264
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::JumpTable::Full
@ Full
Definition: TargetOptions.h:50
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2669
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:286
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
CommandLine.h
canSafelyUnrollMultiExitLoop
static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can safely unroll a multi-exit/exiting loop.
Definition: LoopUnrollRuntime.cpp:431
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:253
llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:532
llvm::SCEVExpander::isHighCostExpansion
bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
Definition: ScalarEvolutionExpander.h:233
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2666
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2755
llvm::Instruction
Definition: Instruction.h:45
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
UnrollRuntimeOtherExitPredictable
static cl::opt< bool > UnrollRuntimeOtherExitPredictable("unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable"))
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3091
MDBuilder.h
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:371
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:401
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
LoopUtils.h
SmallPtrSet.h
llvm::SplitBlockPredecessors
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition: BasicBlockUtils.cpp:1154
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:144
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:212
Utils.h
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2662
llvm::Instruction::extractProfMetadata
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1390
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:72
LoopIterator.h
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
llvm::LLVMLoopUnrollFollowupAll
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:471
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::UnrollLoop
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:284
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2705
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2720
llvm::ValueMap::erase
bool erase(const KeyT &Val)
Definition: ValueMap.h:191
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:964
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::IRBuilderBase::CreateIsNotNull
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return an i1 value testing if Arg is not null.
Definition: IRBuilder.h:2487
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition: Instructions.h:2748
llvm::SCEVCheapExpansionBudget
cl::opt< unsigned > SCEVCheapExpansionBudget
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1142
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PHINode::setIncomingValueForBlock
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
Definition: Instructions.h:2762
updateLatchBranchWeightsForRemainderLoop
static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, Loop *RemainderLoop, uint64_t UnrollFactor)
Definition: LoopUnrollRuntime.cpp:520
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7221
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3083
llvm::UnrollRuntimeLoopRemainder
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
Definition: LoopUnrollRuntime.cpp:582
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:760
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
CloneLoopBlocks
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI)
Create a clone of the blocks in a loop and connect them together.
Definition: LoopUnrollRuntime.cpp:303
llvm::LoopBase::getUniqueNonLatchExitBlocks
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Definition: LoopInfoImpl.h:129
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:90
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:501
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:63
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:520
llvm::IRBuilderBase::CreateIsNull
Value * CreateIsNull(Value *Arg, const Twine &Name="")
Return an i1 value testing if Arg is null.
Definition: IRBuilder.h:2481
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2612
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:496
canProfitablyUnrollMultiExitLoop
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock * > &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
Definition: LoopUnrollRuntime.cpp:465
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::Loop::dump
void dump() const
Definition: LoopInfo.cpp:662
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
Dominators.h
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2686
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
llvm::PHINode
Definition: Instructions.h:2572
llvm::SmallVectorImpl< BasicBlock * >
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2300
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1207
llvm::cl::desc
Definition: CommandLine.h:411
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:820
ConnectEpilog
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling epilog code to the original loop.
Definition: LoopUnrollRuntime.cpp:186
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::ScalarEvolution::getExitCount
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
Definition: ScalarEvolution.cpp:6968
Debug.h
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:280
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3098