LLVM  13.0.0git
LoopUnroll.cpp
Go to the documentation of this file.
1 //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities. It does not define any
10 // actual pass or policy, but provides a single function to perform loop
11 // unrolling.
12 //
13 // The process of unrolling can produce extraneous basic blocks linked with
14 // unconditional branches. This will be corrected in the future.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/Twine.h"
32 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DebugLoc.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Use.h"
50 #include "llvm/IR/User.h"
51 #include "llvm/IR/ValueHandle.h"
52 #include "llvm/IR/ValueMap.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Debug.h"
68 #include <algorithm>
69 #include <assert.h>
70 #include <type_traits>
71 #include <vector>
72 
73 namespace llvm {
74 class DataLayout;
75 class Value;
76 } // namespace llvm
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "loop-unroll"
81 
82 // TODO: Should these be here or in LoopUnroll?
83 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
84 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
85 STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
86  "latch (completely or otherwise)");
87 
88 static cl::opt<bool>
89 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
90  cl::desc("Allow runtime unrolled loops to be unrolled "
91  "with epilog instead of prolog."));
92 
93 static cl::opt<bool>
94 UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
95  cl::desc("Verify domtree after unrolling"),
96 #ifdef EXPENSIVE_CHECKS
97  cl::init(true)
98 #else
99  cl::init(false)
100 #endif
101  );
102 
103 /// Check if unrolling created a situation where we need to insert phi nodes to
104 /// preserve LCSSA form.
105 /// \param Blocks is a vector of basic blocks representing unrolled loop.
106 /// \param L is the outer loop.
107 /// It's possible that some of the blocks are in L, and some are not. In this
108 /// case, if there is a use is outside L, and definition is inside L, we need to
109 /// insert a phi-node, otherwise LCSSA will be broken.
110 /// The function is just a helper function for llvm::UnrollLoop that returns
111 /// true if this situation occurs, indicating that LCSSA needs to be fixed.
113  const std::vector<BasicBlock *> &Blocks,
114  LoopInfo *LI) {
115  for (BasicBlock *BB : Blocks) {
116  if (LI->getLoopFor(BB) == L)
117  continue;
118  for (Instruction &I : *BB) {
119  for (Use &U : I.operands()) {
120  if (const auto *Def = dyn_cast<Instruction>(U)) {
121  Loop *DefLoop = LI->getLoopFor(Def->getParent());
122  if (!DefLoop)
123  continue;
124  if (DefLoop->contains(L))
125  return true;
126  }
127  }
128  }
129  }
130  return false;
131 }
132 
133 /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
134 /// and adds a mapping from the original loop to the new loop to NewLoops.
135 /// Returns nullptr if no new loop was created and a pointer to the
136 /// original loop OriginalBB was part of otherwise.
138  BasicBlock *ClonedBB, LoopInfo *LI,
139  NewLoopsMap &NewLoops) {
140  // Figure out which loop New is in.
141  const Loop *OldLoop = LI->getLoopFor(OriginalBB);
142  assert(OldLoop && "Should (at least) be in the loop being unrolled!");
143 
144  Loop *&NewLoop = NewLoops[OldLoop];
145  if (!NewLoop) {
146  // Found a new sub-loop.
147  assert(OriginalBB == OldLoop->getHeader() &&
148  "Header should be first in RPO");
149 
150  NewLoop = LI->AllocateLoop();
151  Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
152 
153  if (NewLoopParent)
154  NewLoopParent->addChildLoop(NewLoop);
155  else
156  LI->addTopLevelLoop(NewLoop);
157 
158  NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
159  return OldLoop;
160  } else {
161  NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
162  return nullptr;
163  }
164 }
165 
166 /// The function chooses which type of unroll (epilog or prolog) is more
167 /// profitabale.
168 /// Epilog unroll is more profitable when there is PHI that starts from
169 /// constant. In this case epilog will leave PHI start from constant,
170 /// but prolog will convert it to non-constant.
171 ///
172 /// loop:
173 /// PN = PHI [I, Latch], [CI, PreHeader]
174 /// I = foo(PN)
175 /// ...
176 ///
177 /// Epilog unroll case.
178 /// loop:
179 /// PN = PHI [I2, Latch], [CI, PreHeader]
180 /// I1 = foo(PN)
181 /// I2 = foo(I1)
182 /// ...
183 /// Prolog unroll case.
184 /// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
185 /// loop:
186 /// PN = PHI [I2, Latch], [NewPN, PreHeader]
187 /// I1 = foo(PN)
188 /// I2 = foo(I1)
189 /// ...
190 ///
191 static bool isEpilogProfitable(Loop *L) {
192  BasicBlock *PreHeader = L->getLoopPreheader();
193  BasicBlock *Header = L->getHeader();
194  assert(PreHeader && Header);
195  for (const PHINode &PN : Header->phis()) {
196  if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
197  return true;
198  }
199  return false;
200 }
201 
202 /// Perform some cleanup and simplifications on loops after unrolling. It is
203 /// useful to simplify the IV's in the new loop, as well as do a quick
204 /// simplify/dce pass of the instructions.
205 void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
207  AssumptionCache *AC,
208  const TargetTransformInfo *TTI) {
209  // Simplify any new induction variables in the partially unrolled loop.
210  if (SE && SimplifyIVs) {
212  simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);
213 
214  // Aggressively clean up dead instructions that simplifyLoopIVs already
215  // identified. Any remaining should be cleaned up below.
217  }
218 
219  // At this point, the code is well formed. Perform constprop, instsimplify,
220  // and dce.
221  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
223  for (BasicBlock *BB : L->getBlocks()) {
224  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
225  Instruction *Inst = &*I++;
226  if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
227  if (LI->replacementPreservesLCSSAForm(Inst, V))
228  Inst->replaceAllUsesWith(V);
229  if (isInstructionTriviallyDead(Inst))
230  DeadInsts.emplace_back(Inst);
231  }
232  // We can't do recursive deletion until we're done iterating, as we might
233  // have a phi which (potentially indirectly) uses instructions later in
234  // the block we're iterating through.
236  }
237 }
238 
239 /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
240 /// can only fail when the loop's latch block is not terminated by a conditional
241 /// branch instruction. However, if the trip count (and multiple) are not known,
242 /// loop unrolling will mostly produce more code that is no faster.
243 ///
244 /// TripCount is the upper bound of the iteration on which control exits
245 /// LatchBlock. Control may exit the loop prior to TripCount iterations either
246 /// via an early branch in other loop block or via LatchBlock terminator. This
247 /// is relaxed from the general definition of trip count which is the number of
248 /// times the loop header executes. Note that UnrollLoop assumes that the loop
249 /// counter test is in LatchBlock in order to remove unnecesssary instances of
250 /// the test. If control can exit the loop from the LatchBlock's terminator
251 /// prior to TripCount iterations, flag PreserveCondBr needs to be set.
252 ///
253 /// PreserveCondBr indicates whether the conditional branch of the LatchBlock
254 /// needs to be preserved. It is needed when we use trip count upper bound to
255 /// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
256 /// conditional branch needs to be preserved.
257 ///
258 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
259 /// execute without exiting the loop.
260 ///
261 /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
262 /// have a runtime (i.e. not compile time constant) trip count. Unrolling these
263 /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
264 /// iterations before branching into the unrolled loop. UnrollLoop will not
265 /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
266 /// AllowExpensiveTripCount is false.
267 ///
268 /// If we want to perform PGO-based loop peeling, PeelCount is set to the
269 /// number of iterations we want to peel off.
270 ///
271 /// The LoopInfo Analysis that is passed will be kept consistent.
272 ///
273 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
274 /// DominatorTree if they are non-null.
275 ///
276 /// If RemainderLoop is non-null, it will receive the remainder loop (if
277 /// required and not fully unrolled).
280  AssumptionCache *AC,
281  const TargetTransformInfo *TTI,
283  bool PreserveLCSSA, Loop **RemainderLoop) {
284 
285  if (!L->getLoopPreheader()) {
286  LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
288  }
289 
290  if (!L->getLoopLatch()) {
291  LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
293  }
294 
295  // Loops with indirectbr cannot be cloned.
296  if (!L->isSafeToClone()) {
297  LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
299  }
300 
301  if (L->getHeader()->hasAddressTaken()) {
302  // The loop-rotate pass can be helpful to avoid this in many cases.
303  LLVM_DEBUG(
304  dbgs() << " Won't unroll loop: address of header block is taken.\n");
306  }
307 
308  if (ULO.TripCount != 0)
309  LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n");
310  if (ULO.TripMultiple != 1)
311  LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n");
312 
313  // Effectively "DCE" unrolled iterations that are beyond the tripcount
314  // and will never be executed.
315  if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
316  ULO.Count = ULO.TripCount;
317 
318  // Don't enter the unroll code if there is nothing to do.
319  if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
320  LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
322  }
323 
324  assert(ULO.Count > 0);
325  assert(ULO.TripMultiple > 0);
326  assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
327 
328  // Are we eliminating the loop control altogether?
329  bool CompletelyUnroll = ULO.Count == ULO.TripCount;
330 
331  // We assume a run-time trip count if the compiler cannot
332  // figure out the loop trip count and the unroll-runtime
333  // flag is specified.
334  bool RuntimeTripCount =
335  (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
336 
337  assert((!RuntimeTripCount || !ULO.PeelCount) &&
338  "Did not expect runtime trip-count unrolling "
339  "and peeling for the same loop");
340 
341  bool Peeled = false;
342  if (ULO.PeelCount) {
343  Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
344 
345  // Successful peeling may result in a change in the loop preheader/trip
346  // counts. If we later unroll the loop, we want these to be updated.
347  if (Peeled) {
348  // According to our guards and profitability checks the only
349  // meaningful exit should be latch block. Other exits go to deopt,
350  // so we do not worry about them.
351  BasicBlock *ExitingBlock = L->getLoopLatch();
352  assert(ExitingBlock && "Loop without exiting block?");
353  assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
354  ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
355  ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
356  }
357  }
358 
359  // All these values should be taken only after peeling because they might have
360  // changed.
361  BasicBlock *Preheader = L->getLoopPreheader();
362  BasicBlock *Header = L->getHeader();
363  BasicBlock *LatchBlock = L->getLoopLatch();
364  SmallVector<BasicBlock *, 4> ExitBlocks;
365  L->getExitBlocks(ExitBlocks);
366  std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
367 
368  // Go through all exits of L and see if there are any phi-nodes there. We just
369  // conservatively assume that they're inserted to preserve LCSSA form, which
370  // means that complete unrolling might break this form. We need to either fix
371  // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
372  // now we just recompute LCSSA for the outer loop, but it should be possible
373  // to fix it in-place.
374  bool NeedToFixLCSSA =
375  PreserveLCSSA && CompletelyUnroll &&
376  any_of(ExitBlocks,
377  [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
378 
379  // The current loop unroll pass can unroll loops that have
380  // (1) single latch; and
381  // (2a) latch is unconditional; or
382  // (2b) latch is conditional and is an exiting block
383  // FIXME: The implementation can be extended to work with more complicated
384  // cases, e.g. loops with multiple latches.
385  BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
386 
387  // A conditional branch which exits the loop, which can be optimized to an
388  // unconditional branch in the unrolled loop in some cases.
389  BranchInst *ExitingBI = nullptr;
390  bool LatchIsExiting = L->isLoopExiting(LatchBlock);
391  if (LatchIsExiting)
392  ExitingBI = LatchBI;
393  else if (BasicBlock *ExitingBlock = L->getExitingBlock())
394  ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
395  if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
396  // If the peeling guard is changed this assert may be relaxed or even
397  // deleted.
398  assert(!Peeled && "Peeling guard changed!");
399  LLVM_DEBUG(
400  dbgs() << "Can't unroll; a conditional latch must exit the loop");
402  }
403  LLVM_DEBUG({
404  if (ExitingBI)
405  dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
406  << "\n";
407  else
408  dbgs() << " No single exiting block\n";
409  });
410 
411  // Loops containing convergent instructions must have a count that divides
412  // their TripMultiple.
413  LLVM_DEBUG(
414  {
415  bool HasConvergent = false;
416  for (auto &BB : L->blocks())
417  for (auto &I : *BB)
418  if (auto *CB = dyn_cast<CallBase>(&I))
419  HasConvergent |= CB->isConvergent();
420  assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
421  "Unroll count must divide trip multiple if loop contains a "
422  "convergent operation.");
423  });
424 
425  bool EpilogProfitability =
427  : isEpilogProfitable(L);
428 
429  if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
431  EpilogProfitability, ULO.UnrollRemainder,
432  ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
433  PreserveLCSSA, RemainderLoop)) {
434  if (ULO.Force)
435  RuntimeTripCount = false;
436  else {
437  LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
438  "generated when assuming runtime trip count\n");
440  }
441  }
442 
443  // If we know the trip count, we know the multiple...
444  unsigned BreakoutTrip = 0;
445  if (ULO.TripCount != 0) {
446  BreakoutTrip = ULO.TripCount % ULO.Count;
447  ULO.TripMultiple = 0;
448  } else {
449  // Figure out what multiple to use.
450  BreakoutTrip = ULO.TripMultiple =
451  (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple);
452  }
453 
454  using namespace ore;
455  // Report the unrolling decision.
456  if (CompletelyUnroll) {
457  LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
458  << " with trip count " << ULO.TripCount << "!\n");
459  if (ORE)
460  ORE->emit([&]() {
461  return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
462  L->getHeader())
463  << "completely unrolled loop with "
464  << NV("UnrollCount", ULO.TripCount) << " iterations";
465  });
466  } else if (ULO.PeelCount) {
467  LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
468  << " with iteration count " << ULO.PeelCount << "!\n");
469  if (ORE)
470  ORE->emit([&]() {
471  return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
472  L->getHeader())
473  << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
474  << " iterations";
475  });
476  } else {
477  auto DiagBuilder = [&]() {
478  OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
479  L->getHeader());
480  return Diag << "unrolled loop by a factor of "
481  << NV("UnrollCount", ULO.Count);
482  };
483 
484  LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
485  << ULO.Count);
486  if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) {
487  LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
488  if (ORE)
489  ORE->emit([&]() {
490  return DiagBuilder() << " with a breakout at trip "
491  << NV("BreakoutTrip", BreakoutTrip);
492  });
493  } else if (ULO.TripMultiple != 1) {
494  LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch");
495  if (ORE)
496  ORE->emit([&]() {
497  return DiagBuilder()
498  << " with " << NV("TripMultiple", ULO.TripMultiple)
499  << " trips per branch";
500  });
501  } else if (RuntimeTripCount) {
502  LLVM_DEBUG(dbgs() << " with run-time trip count");
503  if (ORE)
504  ORE->emit(
505  [&]() { return DiagBuilder() << " with run-time trip count"; });
506  }
507  LLVM_DEBUG(dbgs() << "!\n");
508  }
509 
510  // We are going to make changes to this loop. SCEV may be keeping cached info
511  // about it, in particular about backedge taken count. The changes we make
512  // are guaranteed to invalidate this information for our loop. It is tempting
513  // to only invalidate the loop being unrolled, but it is incorrect as long as
514  // all exiting branches from all inner loops have impact on the outer loops,
515  // and if something changes inside them then any of outer loops may also
516  // change. When we forget outermost loop, we also forget all contained loops
517  // and this is what we need here.
518  if (SE) {
519  if (ULO.ForgetAllSCEV)
520  SE->forgetAllLoops();
521  else
522  SE->forgetTopmostLoop(L);
523  }
524 
525  if (!LatchIsExiting)
526  ++NumUnrolledNotLatch;
527  Optional<bool> ContinueOnTrue = None;
528  BasicBlock *LoopExit = nullptr;
529  if (ExitingBI) {
530  ContinueOnTrue = L->contains(ExitingBI->getSuccessor(0));
531  LoopExit = ExitingBI->getSuccessor(*ContinueOnTrue);
532  }
533 
534  // For the first iteration of the loop, we should use the precloned values for
535  // PHI nodes. Insert associations now.
536  ValueToValueMapTy LastValueMap;
537  std::vector<PHINode*> OrigPHINode;
538  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
539  OrigPHINode.push_back(cast<PHINode>(I));
540  }
541 
542  std::vector<BasicBlock *> Headers;
543  std::vector<BasicBlock *> ExitingBlocks;
544  std::vector<BasicBlock *> ExitingSucc;
545  std::vector<BasicBlock *> Latches;
546  Headers.push_back(Header);
547  Latches.push_back(LatchBlock);
548  if (ExitingBI) {
549  ExitingBlocks.push_back(ExitingBI->getParent());
550  ExitingSucc.push_back(ExitingBI->getSuccessor(!(*ContinueOnTrue)));
551  }
552 
553  // The current on-the-fly SSA update requires blocks to be processed in
554  // reverse postorder so that LastValueMap contains the correct value at each
555  // exit.
556  LoopBlocksDFS DFS(L);
557  DFS.perform(LI);
558 
559  // Stash the DFS iterators before adding blocks to the loop.
560  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
561  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
562 
563  std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
564 
565  // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
566  // might break loop-simplified form for these loops (as they, e.g., would
567  // share the same exit blocks). We'll keep track of loops for which we can
568  // break this so that later we can re-simplify them.
569  SmallSetVector<Loop *, 4> LoopsToSimplify;
570  for (Loop *SubLoop : *L)
571  LoopsToSimplify.insert(SubLoop);
572 
573  if (Header->getParent()->isDebugInfoForProfiling())
574  for (BasicBlock *BB : L->getBlocks())
575  for (Instruction &I : *BB)
576  if (!isa<DbgInfoIntrinsic>(&I))
577  if (const DILocation *DIL = I.getDebugLoc()) {
578  auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
579  if (NewDIL)
580  I.setDebugLoc(NewDIL.getValue());
581  else
582  LLVM_DEBUG(dbgs()
583  << "Failed to create new discriminator: "
584  << DIL->getFilename() << " Line: " << DIL->getLine());
585  }
586 
587  // Identify what noalias metadata is inside the loop: if it is inside the
588  // loop, the associated metadata must be cloned for each iteration.
589  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
590  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
591 
592  for (unsigned It = 1; It != ULO.Count; ++It) {
595  NewLoops[L] = L;
596 
597  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
598  ValueToValueMapTy VMap;
599  BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
600  Header->getParent()->getBasicBlockList().push_back(New);
601 
602  assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
603  "Header should not be in a sub-loop");
604  // Tell LI about New.
605  const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
606  if (OldLoop)
607  LoopsToSimplify.insert(NewLoops[OldLoop]);
608 
609  if (*BB == Header)
610  // Loop over all of the PHI nodes in the block, changing them to use
611  // the incoming values from the previous block.
612  for (PHINode *OrigPHI : OrigPHINode) {
613  PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
614  Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
615  if (Instruction *InValI = dyn_cast<Instruction>(InVal))
616  if (It > 1 && L->contains(InValI))
617  InVal = LastValueMap[InValI];
618  VMap[OrigPHI] = InVal;
619  New->getInstList().erase(NewPHI);
620  }
621 
622  // Update our running map of newest clones
623  LastValueMap[*BB] = New;
624  for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
625  VI != VE; ++VI)
626  LastValueMap[VI->first] = VI->second;
627 
628  // Add phi entries for newly created values to all exit blocks.
629  for (BasicBlock *Succ : successors(*BB)) {
630  if (L->contains(Succ))
631  continue;
632  for (PHINode &PHI : Succ->phis()) {
633  Value *Incoming = PHI.getIncomingValueForBlock(*BB);
634  ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
635  if (It != LastValueMap.end())
636  Incoming = It->second;
637  PHI.addIncoming(Incoming, New);
638  }
639  }
640  // Keep track of new headers and latches as we create them, so that
641  // we can insert the proper branches later.
642  if (*BB == Header)
643  Headers.push_back(New);
644  if (*BB == LatchBlock)
645  Latches.push_back(New);
646 
647  // Keep track of the exiting block and its successor block contained in
648  // the loop for the current iteration.
649  if (ExitingBI) {
650  if (*BB == ExitingBlocks[0])
651  ExitingBlocks.push_back(New);
652  if (*BB == ExitingSucc[0])
653  ExitingSucc.push_back(New);
654  }
655 
656  NewBlocks.push_back(New);
657  UnrolledLoopBlocks.push_back(New);
658 
659  // Update DomTree: since we just copy the loop body, and each copy has a
660  // dedicated entry block (copy of the header block), this header's copy
661  // dominates all copied blocks. That means, dominance relations in the
662  // copied body are the same as in the original body.
663  if (DT) {
664  if (*BB == Header)
665  DT->addNewBlock(New, Latches[It - 1]);
666  else {
667  auto BBDomNode = DT->getNode(*BB);
668  auto BBIDom = BBDomNode->getIDom();
669  BasicBlock *OriginalBBIDom = BBIDom->getBlock();
670  DT->addNewBlock(
671  New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
672  }
673  }
674  }
675 
676  // Remap all instructions in the most recent iteration
677  remapInstructionsInBlocks(NewBlocks, LastValueMap);
678  for (BasicBlock *NewBlock : NewBlocks)
679  for (Instruction &I : *NewBlock)
680  if (auto *II = dyn_cast<AssumeInst>(&I))
681  AC->registerAssumption(II);
682 
683  {
684  // Identify what other metadata depends on the cloned version. After
685  // cloning, replace the metadata with the corrected version for both
686  // memory instructions and noalias intrinsics.
687  std::string ext = (Twine("It") + Twine(It)).str();
688  cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
689  Header->getContext(), ext);
690  }
691  }
692 
693  // Loop over the PHI nodes in the original block, setting incoming values.
694  for (PHINode *PN : OrigPHINode) {
695  if (CompletelyUnroll) {
696  PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
697  Header->getInstList().erase(PN);
698  } else if (ULO.Count > 1) {
699  Value *InVal = PN->removeIncomingValue(LatchBlock, false);
700  // If this value was defined in the loop, take the value defined by the
701  // last iteration of the loop.
702  if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
703  if (L->contains(InValI))
704  InVal = LastValueMap[InVal];
705  }
706  assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
707  PN->addIncoming(InVal, Latches.back());
708  }
709  }
710 
711  auto setDest = [](BasicBlock *Src, BasicBlock *Dest, BasicBlock *BlockInLoop,
712  bool NeedConditional, Optional<bool> ContinueOnTrue,
713  bool IsDestLoopExit) {
714  auto *Term = cast<BranchInst>(Src->getTerminator());
715  if (NeedConditional) {
716  // Update the conditional branch's successor for the following
717  // iteration.
718  assert(ContinueOnTrue.hasValue() &&
719  "Expecting valid ContinueOnTrue when NeedConditional is true");
720  Term->setSuccessor(!(*ContinueOnTrue), Dest);
721  } else {
722  // Remove phi operands at this loop exit
723  if (!IsDestLoopExit) {
724  BasicBlock *BB = Src;
725  for (BasicBlock *Succ : successors(BB)) {
726  // Preserve the incoming value from BB if we are jumping to the block
727  // in the current loop.
728  if (Succ == BlockInLoop)
729  continue;
730  for (PHINode &Phi : Succ->phis())
731  Phi.removeIncomingValue(BB, false);
732  }
733  }
734  // Replace the conditional branch with an unconditional one.
735  BranchInst::Create(Dest, Term);
736  Term->eraseFromParent();
737  }
738  };
739 
740  // Connect latches of the unrolled iterations to the headers of the next
741  // iteration. If the latch is also the exiting block, the conditional branch
742  // may have to be preserved.
743  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
744  // The branch destination.
745  unsigned j = (i + 1) % e;
746  BasicBlock *Dest = Headers[j];
747  bool NeedConditional = LatchIsExiting;
748 
749  if (LatchIsExiting) {
750  if (RuntimeTripCount && j != 0)
751  NeedConditional = false;
752 
753  // For a complete unroll, make the last iteration end with a branch
754  // to the exit block.
755  if (CompletelyUnroll) {
756  if (j == 0)
757  Dest = LoopExit;
758  // If using trip count upper bound to completely unroll, we need to
759  // keep the conditional branch except the last one because the loop
760  // may exit after any iteration.
761  assert(NeedConditional &&
762  "NeedCondition cannot be modified by both complete "
763  "unrolling and runtime unrolling");
764  NeedConditional =
765  (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
766  } else if (j != BreakoutTrip &&
767  (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
768  // If we know the trip count or a multiple of it, we can safely use an
769  // unconditional branch for some iterations.
770  NeedConditional = false;
771  }
772  }
773 
774  setDest(Latches[i], Dest, Headers[i], NeedConditional, ContinueOnTrue,
775  Dest == LoopExit);
776  }
777 
778  if (!LatchIsExiting) {
779  // If the latch is not exiting, we may be able to simplify the conditional
780  // branches in the unrolled exiting blocks.
781  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
782  // The branch destination.
783  unsigned j = (i + 1) % e;
784  bool NeedConditional = true;
785 
786  if (RuntimeTripCount && j != 0)
787  NeedConditional = false;
788 
789  if (CompletelyUnroll)
790  // We cannot drop the conditional branch for the last condition, as we
791  // may have to execute the loop body depending on the condition.
792  NeedConditional = j == 0 || ULO.PreserveCondBr;
793  else if (j != BreakoutTrip &&
794  (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
795  // If we know the trip count or a multiple of it, we can safely use an
796  // unconditional branch for some iterations.
797  NeedConditional = false;
798 
799  // Conditional branches from non-latch exiting block have successors
800  // either in the same loop iteration or outside the loop. The branches are
801  // already correct.
802  if (NeedConditional)
803  continue;
804  setDest(ExitingBlocks[i], ExitingSucc[i], ExitingSucc[i], NeedConditional,
805  None, false);
806  }
807 
808  // When completely unrolling, the last latch becomes unreachable.
809  if (CompletelyUnroll) {
810  BranchInst *Term = cast<BranchInst>(Latches.back()->getTerminator());
811  new UnreachableInst(Term->getContext(), Term);
812  Term->eraseFromParent();
813  }
814  }
815 
816  // Update dominators of blocks we might reach through exits.
817  // Immediate dominator of such block might change, because we add more
818  // routes which can lead to the exit: we can now reach it from the copied
819  // iterations too.
820  if (DT && ULO.Count > 1) {
821  for (auto *BB : OriginalLoopBlocks) {
822  auto *BBDomNode = DT->getNode(BB);
823  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
824  for (auto *ChildDomNode : BBDomNode->children()) {
825  auto *ChildBB = ChildDomNode->getBlock();
826  if (!L->contains(ChildBB))
827  ChildrenToUpdate.push_back(ChildBB);
828  }
829  BasicBlock *NewIDom;
830  if (ExitingBI && BB == ExitingBlocks[0]) {
831  // The latch is special because we emit unconditional branches in
832  // some cases where the original loop contained a conditional branch.
833  // Since the latch is always at the bottom of the loop, if the latch
834  // dominated an exit before unrolling, the new dominator of that exit
835  // must also be a latch. Specifically, the dominator is the first
836  // latch which ends in a conditional branch, or the last latch if
837  // there is no such latch.
838  // For loops exiting from non latch exiting block, we limit the
839  // branch simplification to single exiting block loops.
840  NewIDom = ExitingBlocks.back();
841  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
842  Instruction *Term = ExitingBlocks[i]->getTerminator();
843  if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
844  NewIDom =
845  DT->findNearestCommonDominator(ExitingBlocks[i], Latches[i]);
846  break;
847  }
848  }
849  } else {
850  // The new idom of the block will be the nearest common dominator
851  // of all copies of the previous idom. This is equivalent to the
852  // nearest common dominator of the previous idom and the first latch,
853  // which dominates all copies of the previous idom.
854  NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
855  }
856  for (auto *ChildBB : ChildrenToUpdate)
857  DT->changeImmediateDominator(ChildBB, NewIDom);
858  }
859  }
860 
861  assert(!DT || !UnrollVerifyDomtree ||
863 
865  // Merge adjacent basic blocks, if possible.
866  for (BasicBlock *Latch : Latches) {
867  BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
868  assert((Term ||
869  (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
870  "Need a branch as terminator, except when fully unrolling with "
871  "unconditional latch");
872  if (Term && Term->isUnconditional()) {
873  BasicBlock *Dest = Term->getSuccessor(0);
874  BasicBlock *Fold = Dest->getUniquePredecessor();
875  if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
876  // Dest has been folded into Fold. Update our worklists accordingly.
877  std::replace(Latches.begin(), Latches.end(), Dest, Fold);
878  llvm::erase_value(UnrolledLoopBlocks, Dest);
879  }
880  }
881  }
882  // Apply updates to the DomTree.
883  DT = &DTU.getDomTree();
884 
885  // At this point, the code is well formed. We now simplify the unrolled loop,
886  // doing constant propagation and dead code elimination as we go.
887  simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
888  SE, DT, AC, TTI);
889 
890  NumCompletelyUnrolled += CompletelyUnroll;
891  ++NumUnrolled;
892 
893  Loop *OuterL = L->getParentLoop();
894  // Update LoopInfo if the loop is completely removed.
895  if (CompletelyUnroll)
896  LI->erase(L);
897 
898  // After complete unrolling most of the blocks should be contained in OuterL.
899  // However, some of them might happen to be out of OuterL (e.g. if they
900  // precede a loop exit). In this case we might need to insert PHI nodes in
901  // order to preserve LCSSA form.
902  // We don't need to check this if we already know that we need to fix LCSSA
903  // form.
904  // TODO: For now we just recompute LCSSA for the outer loop in this case, but
905  // it should be possible to fix it in-place.
906  if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
907  NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
908 
909  // If we have a pass and a DominatorTree we should re-simplify impacted loops
910  // to ensure subsequent analyses can rely on this form. We want to simplify
911  // at least one layer outside of the loop that was unrolled so that any
912  // changes to the parent loop exposed by the unrolling are considered.
913  if (DT) {
914  if (OuterL) {
915  // OuterL includes all loops for which we can break loop-simplify, so
916  // it's sufficient to simplify only it (it'll recursively simplify inner
917  // loops too).
918  if (NeedToFixLCSSA) {
919  // LCSSA must be performed on the outermost affected loop. The unrolled
920  // loop's last loop latch is guaranteed to be in the outermost loop
921  // after LoopInfo's been updated by LoopInfo::erase.
922  Loop *LatchLoop = LI->getLoopFor(Latches.back());
923  Loop *FixLCSSALoop = OuterL;
924  if (!FixLCSSALoop->contains(LatchLoop))
925  while (FixLCSSALoop->getParentLoop() != LatchLoop)
926  FixLCSSALoop = FixLCSSALoop->getParentLoop();
927 
928  formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
929  } else if (PreserveLCSSA) {
930  assert(OuterL->isLCSSAForm(*DT) &&
931  "Loops should be in LCSSA form after loop-unroll.");
932  }
933 
934  // TODO: That potentially might be compile-time expensive. We should try
935  // to fix the loop-simplified form incrementally.
936  simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
937  } else {
938  // Simplify loops for which we might've broken loop-simplify form.
939  for (Loop *SubLoop : LoopsToSimplify)
940  simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
941  }
942  }
943 
944  return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
946 }
947 
948 /// Given an llvm.loop loop id metadata node, returns the loop hint metadata
949 /// node with the given name (for example, "llvm.loop.unroll.count"). If no
950 /// such metadata node exists, then nullptr is returned.
952  // First operand should refer to the loop id itself.
953  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
954  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
955 
956  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
957  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
958  if (!MD)
959  continue;
960 
961  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
962  if (!S)
963  continue;
964 
965  if (Name.equals(S->getString()))
966  return MD;
967  }
968  return nullptr;
969 }
i
i
Definition: README.txt:29
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:496
AssumptionCache.h
MathExtras.h
llvm
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
LoopSimplify.h
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::addClonedBlockToLoopInfo
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:137
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:217
Optional.h
llvm::UnrollLoopOptions::AllowRuntime
bool AllowRuntime
Definition: UnrollLoop.h:71
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:667
Metadata.h
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
DebugInfoMetadata.h
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
llvm::Function::getBasicBlockList
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:755
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
StringRef.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::UnrollLoopOptions::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Definition: UnrollLoop.h:72
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
DomTreeUpdater.h
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::UnrollLoopOptions::Force
bool Force
Definition: UnrollLoop.h:70
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopUnroll.cpp:80
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1558
ScalarEvolution.h
DenseMap.h
Module.h
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::GreatestCommonDivisor64
uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B)
Definition: MathExtras.h:630
llvm::Optional< bool >
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:403
UnrollRuntimeEpilog
static cl::opt< bool > UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog."))
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:128
STLExtras.h
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
replace
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
Definition: ConstantMerge.cpp:116
Use.h
endif
__FakeVCSRevision h endif() endif() set(generated_files "$
Definition: CMakeLists.txt:16
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
llvm::DomTreeUpdater::UpdateStrategy::Lazy
@ Lazy
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Instruction.h
CommandLine.h
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:951
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Twine.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:784
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1021
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopInfo::replacementPreservesLCSSAForm
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:1112
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2757
llvm::simplifyLoopAfterUnroll
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:205
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Instruction
Definition: Instruction.h:45
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:5816
llvm::UnrollLoopOptions::PreserveOnlyFirst
bool PreserveOnlyFirst
Definition: UnrollLoop.h:74
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
isEpilogProfitable
static bool isEpilogProfitable(Loop *L)
The function chooses which type of unroll (epilog or prolog) is more profitabale.
Definition: LoopUnroll.cpp:191
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
LoopUtils.h
DebugLoc.h
llvm::ValueMap::begin
iterator begin()
Definition: ValueMap.h:135
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:144
DebugLocVerifyLevel::None
@ None
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
LoopIterator.h
llvm::erase_value
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:1659
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
CFG.h
LoopInfo.h
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:928
llvm::UnrollLoopOptions::TripMultiple
unsigned TripMultiple
Definition: UnrollLoop.h:75
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
Definition: ScalarEvolution.cpp:6944
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1102
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::cloneAndAdaptNoAliasScopes
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
Definition: CloneFunction.cpp:988
llvm::BasicBlock::hasAddressTaken
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:448
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
BasicBlock.h
llvm::cl::opt< bool >
VI
@ VI
Definition: SIInstrInfo.cpp:7476
llvm::UnrollLoop
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:278
llvm::Function::isDebugInfoForProfiling
bool isDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
Definition: Metadata.cpp:1530
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DomTreeUpdater::getDomTree
DominatorTree & getDomTree()
Flush DomTree updates and return DomTree.
Definition: DomTreeUpdater.cpp:303
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3063
llvm::UnrollLoopOptions::TripCount
unsigned TripCount
Definition: UnrollLoop.h:69
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:964
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53
ArrayRef.h
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1149
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::ValueMapIterator::ValueTypeProxy::second
ValueT & second
Definition: ValueMap.h:346
llvm::UnrollLoopOptions::ForgetAllSCEV
bool ForgetAllSCEV
Definition: UnrollLoop.h:78
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
iterator_range.h
llvm::LoopInfo::erase
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:878
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7266
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::UnrollLoopOptions::PreserveCondBr
bool PreserveCondBr
Definition: UnrollLoop.h:73
llvm::UnrollRuntimeLoopRemainder
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
Definition: LoopUnrollRuntime.cpp:582
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:398
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:173
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1489
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1025
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:272
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:527
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
needToInsertPhisForLCSSA
static bool needToInsertPhisForLCSSA(Loop *L, const std::vector< BasicBlock * > &Blocks, LoopInfo *LI)
Check if unrolling created a situation where we need to insert phi nodes to preserve LCSSA form.
Definition: LoopUnroll.cpp:112
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
llvm::ValueMap< const Value *, WeakTrackingVH >
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
ValueHandle.h
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
ValueMap.h
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
j
return j(j<< 16)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::ValueMapIterator
Definition: ValueMap.h:49
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:461
llvm::UnrollLoopOptions::Count
unsigned Count
Definition: UnrollLoop.h:68
llvm::simplifyLoopIVs
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead)
SimplifyLoopIVs - Simplify users of induction variables within this loop.
Definition: SimplifyIndVar.cpp:973
GenericDomTree.h
llvm::UnrollLoopOptions::UnrollRemainder
bool UnrollRemainder
Definition: UnrollLoop.h:77
llvm::UnrollLoopOptions::PeelCount
unsigned PeelCount
Definition: UnrollLoop.h:76
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:719
llvm::BasicBlock::back
const Instruction & back() const
Definition: BasicBlock.h:310
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
Instructions.h
SmallVector.h
User.h
llvm::ScalarEvolution::forgetAllLoops
void forgetAllLoops()
Definition: ScalarEvolution.cpp:7187
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
Dominators.h
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::Loop::isSafeToClone
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
Definition: LoopInfo.cpp:485
InstructionSimplify.h
llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
Definition: ScalarEvolution.cpp:6969
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
llvm::PHINode
Definition: Instructions.h:2572
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4652
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3007
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
LoopPeel.h
BasicBlockUtils.h
llvm::UnrollLoopOptions
Definition: UnrollLoop.h:67
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
ilist_iterator.h
UnrollVerifyDomtree
static cl::opt< bool > UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, cl::desc("Verify domtree after unrolling"), cl::init(false))
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3100
SimplifyIndVar.h
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908