LLVM  16.0.0git
LoopUnroll.cpp
Go to the documentation of this file.
1 //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities. It does not define any
10 // actual pass or policy, but provides a single function to perform loop
11 // unrolling.
12 //
13 // The process of unrolling can produce extraneous basic blocks linked with
14 // unconditional branches. This will be corrected in the future.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/Twine.h"
32 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DebugLoc.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Use.h"
50 #include "llvm/IR/User.h"
51 #include "llvm/IR/ValueHandle.h"
52 #include "llvm/IR/ValueMap.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Debug.h"
67 #include <algorithm>
68 #include <assert.h>
69 #include <numeric>
70 #include <type_traits>
71 #include <vector>
72 
73 namespace llvm {
74 class DataLayout;
75 class Value;
76 } // namespace llvm
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "loop-unroll"
81 
82 // TODO: Should these be here or in LoopUnroll?
83 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
84 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
85 STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
86  "latch (completely or otherwise)");
87 
88 static cl::opt<bool>
89 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
90  cl::desc("Allow runtime unrolled loops to be unrolled "
91  "with epilog instead of prolog."));
92 
93 static cl::opt<bool>
94 UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
95  cl::desc("Verify domtree after unrolling"),
96 #ifdef EXPENSIVE_CHECKS
97  cl::init(true)
98 #else
99  cl::init(false)
100 #endif
101  );
102 
103 static cl::opt<bool>
104 UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
105  cl::desc("Verify loopinfo after unrolling"),
106 #ifdef EXPENSIVE_CHECKS
107  cl::init(true)
108 #else
109  cl::init(false)
110 #endif
111  );
112 
113 
114 /// Check if unrolling created a situation where we need to insert phi nodes to
115 /// preserve LCSSA form.
116 /// \param Blocks is a vector of basic blocks representing unrolled loop.
117 /// \param L is the outer loop.
118 /// It's possible that some of the blocks are in L, and some are not. In this
119 /// case, if there is a use is outside L, and definition is inside L, we need to
120 /// insert a phi-node, otherwise LCSSA will be broken.
121 /// The function is just a helper function for llvm::UnrollLoop that returns
122 /// true if this situation occurs, indicating that LCSSA needs to be fixed.
124  const std::vector<BasicBlock *> &Blocks,
125  LoopInfo *LI) {
126  for (BasicBlock *BB : Blocks) {
127  if (LI->getLoopFor(BB) == L)
128  continue;
129  for (Instruction &I : *BB) {
130  for (Use &U : I.operands()) {
131  if (const auto *Def = dyn_cast<Instruction>(U)) {
132  Loop *DefLoop = LI->getLoopFor(Def->getParent());
133  if (!DefLoop)
134  continue;
135  if (DefLoop->contains(L))
136  return true;
137  }
138  }
139  }
140  }
141  return false;
142 }
143 
144 /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
145 /// and adds a mapping from the original loop to the new loop to NewLoops.
146 /// Returns nullptr if no new loop was created and a pointer to the
147 /// original loop OriginalBB was part of otherwise.
149  BasicBlock *ClonedBB, LoopInfo *LI,
150  NewLoopsMap &NewLoops) {
151  // Figure out which loop New is in.
152  const Loop *OldLoop = LI->getLoopFor(OriginalBB);
153  assert(OldLoop && "Should (at least) be in the loop being unrolled!");
154 
155  Loop *&NewLoop = NewLoops[OldLoop];
156  if (!NewLoop) {
157  // Found a new sub-loop.
158  assert(OriginalBB == OldLoop->getHeader() &&
159  "Header should be first in RPO");
160 
161  NewLoop = LI->AllocateLoop();
162  Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
163 
164  if (NewLoopParent)
165  NewLoopParent->addChildLoop(NewLoop);
166  else
167  LI->addTopLevelLoop(NewLoop);
168 
169  NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
170  return OldLoop;
171  } else {
172  NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
173  return nullptr;
174  }
175 }
176 
177 /// The function chooses which type of unroll (epilog or prolog) is more
178 /// profitabale.
179 /// Epilog unroll is more profitable when there is PHI that starts from
180 /// constant. In this case epilog will leave PHI start from constant,
181 /// but prolog will convert it to non-constant.
182 ///
183 /// loop:
184 /// PN = PHI [I, Latch], [CI, PreHeader]
185 /// I = foo(PN)
186 /// ...
187 ///
188 /// Epilog unroll case.
189 /// loop:
190 /// PN = PHI [I2, Latch], [CI, PreHeader]
191 /// I1 = foo(PN)
192 /// I2 = foo(I1)
193 /// ...
194 /// Prolog unroll case.
195 /// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
196 /// loop:
197 /// PN = PHI [I2, Latch], [NewPN, PreHeader]
198 /// I1 = foo(PN)
199 /// I2 = foo(I1)
200 /// ...
201 ///
202 static bool isEpilogProfitable(Loop *L) {
203  BasicBlock *PreHeader = L->getLoopPreheader();
204  BasicBlock *Header = L->getHeader();
205  assert(PreHeader && Header);
206  for (const PHINode &PN : Header->phis()) {
207  if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
208  return true;
209  }
210  return false;
211 }
212 
213 /// Perform some cleanup and simplifications on loops after unrolling. It is
214 /// useful to simplify the IV's in the new loop, as well as do a quick
215 /// simplify/dce pass of the instructions.
216 void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
218  AssumptionCache *AC,
219  const TargetTransformInfo *TTI) {
220  // Simplify any new induction variables in the partially unrolled loop.
221  if (SE && SimplifyIVs) {
223  simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);
224 
225  // Aggressively clean up dead instructions that simplifyLoopIVs already
226  // identified. Any remaining should be cleaned up below.
227  while (!DeadInsts.empty()) {
228  Value *V = DeadInsts.pop_back_val();
229  if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
231  }
232  }
233 
234  // At this point, the code is well formed. Perform constprop, instsimplify,
235  // and dce.
236  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
238  for (BasicBlock *BB : L->getBlocks()) {
239  for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
240  if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
241  if (LI->replacementPreservesLCSSAForm(&Inst, V))
242  Inst.replaceAllUsesWith(V);
243  if (isInstructionTriviallyDead(&Inst))
244  DeadInsts.emplace_back(&Inst);
245  }
246  // We can't do recursive deletion until we're done iterating, as we might
247  // have a phi which (potentially indirectly) uses instructions later in
248  // the block we're iterating through.
250  }
251 }
252 
253 /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
254 /// can only fail when the loop's latch block is not terminated by a conditional
255 /// branch instruction. However, if the trip count (and multiple) are not known,
256 /// loop unrolling will mostly produce more code that is no faster.
257 ///
258 /// If Runtime is true then UnrollLoop will try to insert a prologue or
259 /// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop
260 /// will not runtime-unroll the loop if computing the run-time trip count will
261 /// be expensive and AllowExpensiveTripCount is false.
262 ///
263 /// The LoopInfo Analysis that is passed will be kept consistent.
264 ///
265 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
266 /// DominatorTree if they are non-null.
267 ///
268 /// If RemainderLoop is non-null, it will receive the remainder loop (if
269 /// required and not fully unrolled).
272  AssumptionCache *AC,
273  const TargetTransformInfo *TTI,
275  bool PreserveLCSSA, Loop **RemainderLoop) {
276  assert(DT && "DomTree is required");
277 
278  if (!L->getLoopPreheader()) {
279  LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
281  }
282 
283  if (!L->getLoopLatch()) {
284  LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
286  }
287 
288  // Loops with indirectbr cannot be cloned.
289  if (!L->isSafeToClone()) {
290  LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
292  }
293 
294  if (L->getHeader()->hasAddressTaken()) {
295  // The loop-rotate pass can be helpful to avoid this in many cases.
296  LLVM_DEBUG(
297  dbgs() << " Won't unroll loop: address of header block is taken.\n");
299  }
300 
301  assert(ULO.Count > 0);
302 
303  // All these values should be taken only after peeling because they might have
304  // changed.
305  BasicBlock *Preheader = L->getLoopPreheader();
306  BasicBlock *Header = L->getHeader();
307  BasicBlock *LatchBlock = L->getLoopLatch();
308  SmallVector<BasicBlock *, 4> ExitBlocks;
309  L->getExitBlocks(ExitBlocks);
310  std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
311 
312  const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
313  const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
314 
315  // Effectively "DCE" unrolled iterations that are beyond the max tripcount
316  // and will never be executed.
317  if (MaxTripCount && ULO.Count > MaxTripCount)
318  ULO.Count = MaxTripCount;
319 
320  struct ExitInfo {
321  unsigned TripCount;
322  unsigned TripMultiple;
323  unsigned BreakoutTrip;
324  bool ExitOnTrue;
325  SmallVector<BasicBlock *> ExitingBlocks;
326  };
328  SmallVector<BasicBlock *, 4> ExitingBlocks;
329  L->getExitingBlocks(ExitingBlocks);
330  for (auto *ExitingBlock : ExitingBlocks) {
331  // The folding code is not prepared to deal with non-branch instructions
332  // right now.
333  auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
334  if (!BI)
335  continue;
336 
337  ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second;
338  Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
339  Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
340  if (Info.TripCount != 0) {
341  Info.BreakoutTrip = Info.TripCount % ULO.Count;
342  Info.TripMultiple = 0;
343  } else {
344  Info.BreakoutTrip = Info.TripMultiple =
345  (unsigned)std::gcd(ULO.Count, Info.TripMultiple);
346  }
347  Info.ExitOnTrue = !L->contains(BI->getSuccessor(0));
348  Info.ExitingBlocks.push_back(ExitingBlock);
349  LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName()
350  << ": TripCount=" << Info.TripCount
351  << ", TripMultiple=" << Info.TripMultiple
352  << ", BreakoutTrip=" << Info.BreakoutTrip << "\n");
353  }
354 
355  // Are we eliminating the loop control altogether? Note that we can know
356  // we're eliminating the backedge without knowing exactly which iteration
357  // of the unrolled body exits.
358  const bool CompletelyUnroll = ULO.Count == MaxTripCount;
359 
360  const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
361 
362  // There's no point in performing runtime unrolling if this unroll count
363  // results in a full unroll.
364  if (CompletelyUnroll)
365  ULO.Runtime = false;
366 
367  // Go through all exits of L and see if there are any phi-nodes there. We just
368  // conservatively assume that they're inserted to preserve LCSSA form, which
369  // means that complete unrolling might break this form. We need to either fix
370  // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
371  // now we just recompute LCSSA for the outer loop, but it should be possible
372  // to fix it in-place.
373  bool NeedToFixLCSSA =
374  PreserveLCSSA && CompletelyUnroll &&
375  any_of(ExitBlocks,
376  [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
377 
378  // The current loop unroll pass can unroll loops that have
379  // (1) single latch; and
380  // (2a) latch is unconditional; or
381  // (2b) latch is conditional and is an exiting block
382  // FIXME: The implementation can be extended to work with more complicated
383  // cases, e.g. loops with multiple latches.
384  BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
385 
386  // A conditional branch which exits the loop, which can be optimized to an
387  // unconditional branch in the unrolled loop in some cases.
388  bool LatchIsExiting = L->isLoopExiting(LatchBlock);
389  if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
390  LLVM_DEBUG(
391  dbgs() << "Can't unroll; a conditional latch must exit the loop");
393  }
394 
395  // Loops containing convergent instructions cannot use runtime unrolling,
396  // as the prologue/epilogue may add additional control-dependencies to
397  // convergent operations.
398  LLVM_DEBUG(
399  {
400  bool HasConvergent = false;
401  for (auto &BB : L->blocks())
402  for (auto &I : *BB)
403  if (auto *CB = dyn_cast<CallBase>(&I))
404  HasConvergent |= CB->isConvergent();
405  assert((!HasConvergent || !ULO.Runtime) &&
406  "Can't runtime unroll if loop contains a convergent operation.");
407  });
408 
409  bool EpilogProfitability =
411  : isEpilogProfitable(L);
412 
413  if (ULO.Runtime &&
415  EpilogProfitability, ULO.UnrollRemainder,
416  ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
417  PreserveLCSSA, RemainderLoop)) {
418  if (ULO.Force)
419  ULO.Runtime = false;
420  else {
421  LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
422  "generated when assuming runtime trip count\n");
424  }
425  }
426 
427  using namespace ore;
428  // Report the unrolling decision.
429  if (CompletelyUnroll) {
430  LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
431  << " with trip count " << ULO.Count << "!\n");
432  if (ORE)
433  ORE->emit([&]() {
434  return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
435  L->getHeader())
436  << "completely unrolled loop with "
437  << NV("UnrollCount", ULO.Count) << " iterations";
438  });
439  } else {
440  LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
441  << ULO.Count);
442  if (ULO.Runtime)
443  LLVM_DEBUG(dbgs() << " with run-time trip count");
444  LLVM_DEBUG(dbgs() << "!\n");
445 
446  if (ORE)
447  ORE->emit([&]() {
448  OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
449  L->getHeader());
450  Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count);
451  if (ULO.Runtime)
452  Diag << " with run-time trip count";
453  return Diag;
454  });
455  }
456 
457  // We are going to make changes to this loop. SCEV may be keeping cached info
458  // about it, in particular about backedge taken count. The changes we make
459  // are guaranteed to invalidate this information for our loop. It is tempting
460  // to only invalidate the loop being unrolled, but it is incorrect as long as
461  // all exiting branches from all inner loops have impact on the outer loops,
462  // and if something changes inside them then any of outer loops may also
463  // change. When we forget outermost loop, we also forget all contained loops
464  // and this is what we need here.
465  if (SE) {
466  if (ULO.ForgetAllSCEV)
467  SE->forgetAllLoops();
468  else {
469  SE->forgetTopmostLoop(L);
471  }
472  }
473 
474  if (!LatchIsExiting)
475  ++NumUnrolledNotLatch;
476 
477  // For the first iteration of the loop, we should use the precloned values for
478  // PHI nodes. Insert associations now.
479  ValueToValueMapTy LastValueMap;
480  std::vector<PHINode*> OrigPHINode;
481  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
482  OrigPHINode.push_back(cast<PHINode>(I));
483  }
484 
485  std::vector<BasicBlock *> Headers;
486  std::vector<BasicBlock *> Latches;
487  Headers.push_back(Header);
488  Latches.push_back(LatchBlock);
489 
490  // The current on-the-fly SSA update requires blocks to be processed in
491  // reverse postorder so that LastValueMap contains the correct value at each
492  // exit.
493  LoopBlocksDFS DFS(L);
494  DFS.perform(LI);
495 
496  // Stash the DFS iterators before adding blocks to the loop.
497  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
498  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
499 
500  std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
501 
502  // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
503  // might break loop-simplified form for these loops (as they, e.g., would
504  // share the same exit blocks). We'll keep track of loops for which we can
505  // break this so that later we can re-simplify them.
506  SmallSetVector<Loop *, 4> LoopsToSimplify;
507  for (Loop *SubLoop : *L)
508  LoopsToSimplify.insert(SubLoop);
509 
510  // When a FSDiscriminator is enabled, we don't need to add the multiply
511  // factors to the discriminators.
512  if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
513  for (BasicBlock *BB : L->getBlocks())
514  for (Instruction &I : *BB)
515  if (!isa<DbgInfoIntrinsic>(&I))
516  if (const DILocation *DIL = I.getDebugLoc()) {
517  auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
518  if (NewDIL)
519  I.setDebugLoc(*NewDIL);
520  else
521  LLVM_DEBUG(dbgs()
522  << "Failed to create new discriminator: "
523  << DIL->getFilename() << " Line: " << DIL->getLine());
524  }
525 
526  // Identify what noalias metadata is inside the loop: if it is inside the
527  // loop, the associated metadata must be cloned for each iteration.
528  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
529  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
530 
531  // We place the unrolled iterations immediately after the original loop
532  // latch. This is a reasonable default placement if we don't have block
533  // frequencies, and if we do, well the layout will be adjusted later.
534  auto BlockInsertPt = std::next(LatchBlock->getIterator());
535  for (unsigned It = 1; It != ULO.Count; ++It) {
538  NewLoops[L] = L;
539 
540  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
541  ValueToValueMapTy VMap;
542  BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
543  Header->getParent()->getBasicBlockList().insert(BlockInsertPt, New);
544 
545  assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
546  "Header should not be in a sub-loop");
547  // Tell LI about New.
548  const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
549  if (OldLoop)
550  LoopsToSimplify.insert(NewLoops[OldLoop]);
551 
552  if (*BB == Header)
553  // Loop over all of the PHI nodes in the block, changing them to use
554  // the incoming values from the previous block.
555  for (PHINode *OrigPHI : OrigPHINode) {
556  PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
557  Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
558  if (Instruction *InValI = dyn_cast<Instruction>(InVal))
559  if (It > 1 && L->contains(InValI))
560  InVal = LastValueMap[InValI];
561  VMap[OrigPHI] = InVal;
562  NewPHI->eraseFromParent();
563  }
564 
565  // Update our running map of newest clones
566  LastValueMap[*BB] = New;
567  for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
568  VI != VE; ++VI)
569  LastValueMap[VI->first] = VI->second;
570 
571  // Add phi entries for newly created values to all exit blocks.
572  for (BasicBlock *Succ : successors(*BB)) {
573  if (L->contains(Succ))
574  continue;
575  for (PHINode &PHI : Succ->phis()) {
576  Value *Incoming = PHI.getIncomingValueForBlock(*BB);
577  ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
578  if (It != LastValueMap.end())
579  Incoming = It->second;
580  PHI.addIncoming(Incoming, New);
581  SE->forgetValue(&PHI);
582  }
583  }
584  // Keep track of new headers and latches as we create them, so that
585  // we can insert the proper branches later.
586  if (*BB == Header)
587  Headers.push_back(New);
588  if (*BB == LatchBlock)
589  Latches.push_back(New);
590 
591  // Keep track of the exiting block and its successor block contained in
592  // the loop for the current iteration.
593  auto ExitInfoIt = ExitInfos.find(*BB);
594  if (ExitInfoIt != ExitInfos.end())
595  ExitInfoIt->second.ExitingBlocks.push_back(New);
596 
597  NewBlocks.push_back(New);
598  UnrolledLoopBlocks.push_back(New);
599 
600  // Update DomTree: since we just copy the loop body, and each copy has a
601  // dedicated entry block (copy of the header block), this header's copy
602  // dominates all copied blocks. That means, dominance relations in the
603  // copied body are the same as in the original body.
604  if (*BB == Header)
605  DT->addNewBlock(New, Latches[It - 1]);
606  else {
607  auto BBDomNode = DT->getNode(*BB);
608  auto BBIDom = BBDomNode->getIDom();
609  BasicBlock *OriginalBBIDom = BBIDom->getBlock();
610  DT->addNewBlock(
611  New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
612  }
613  }
614 
615  // Remap all instructions in the most recent iteration
616  remapInstructionsInBlocks(NewBlocks, LastValueMap);
617  for (BasicBlock *NewBlock : NewBlocks)
618  for (Instruction &I : *NewBlock)
619  if (auto *II = dyn_cast<AssumeInst>(&I))
620  AC->registerAssumption(II);
621 
622  {
623  // Identify what other metadata depends on the cloned version. After
624  // cloning, replace the metadata with the corrected version for both
625  // memory instructions and noalias intrinsics.
626  std::string ext = (Twine("It") + Twine(It)).str();
627  cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
628  Header->getContext(), ext);
629  }
630  }
631 
632  // Loop over the PHI nodes in the original block, setting incoming values.
633  for (PHINode *PN : OrigPHINode) {
634  if (CompletelyUnroll) {
635  PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
636  PN->eraseFromParent();
637  } else if (ULO.Count > 1) {
638  Value *InVal = PN->removeIncomingValue(LatchBlock, false);
639  // If this value was defined in the loop, take the value defined by the
640  // last iteration of the loop.
641  if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
642  if (L->contains(InValI))
643  InVal = LastValueMap[InVal];
644  }
645  assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
646  PN->addIncoming(InVal, Latches.back());
647  }
648  }
649 
650  // Connect latches of the unrolled iterations to the headers of the next
651  // iteration. Currently they point to the header of the same iteration.
652  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
653  unsigned j = (i + 1) % e;
654  Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]);
655  }
656 
657  // Update dominators of blocks we might reach through exits.
658  // Immediate dominator of such block might change, because we add more
659  // routes which can lead to the exit: we can now reach it from the copied
660  // iterations too.
661  if (ULO.Count > 1) {
662  for (auto *BB : OriginalLoopBlocks) {
663  auto *BBDomNode = DT->getNode(BB);
664  SmallVector<BasicBlock *, 16> ChildrenToUpdate;
665  for (auto *ChildDomNode : BBDomNode->children()) {
666  auto *ChildBB = ChildDomNode->getBlock();
667  if (!L->contains(ChildBB))
668  ChildrenToUpdate.push_back(ChildBB);
669  }
670  // The new idom of the block will be the nearest common dominator
671  // of all copies of the previous idom. This is equivalent to the
672  // nearest common dominator of the previous idom and the first latch,
673  // which dominates all copies of the previous idom.
674  BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
675  for (auto *ChildBB : ChildrenToUpdate)
676  DT->changeImmediateDominator(ChildBB, NewIDom);
677  }
678  }
679 
682 
684 
685  auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) {
686  auto *Term = cast<BranchInst>(Src->getTerminator());
687  const unsigned Idx = ExitOnTrue ^ WillExit;
688  BasicBlock *Dest = Term->getSuccessor(Idx);
689  BasicBlock *DeadSucc = Term->getSuccessor(1-Idx);
690 
691  // Remove predecessors from all non-Dest successors.
692  DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true);
693 
694  // Replace the conditional branch with an unconditional one.
695  BranchInst::Create(Dest, Term);
696  Term->eraseFromParent();
697 
698  DTU.applyUpdates({{DominatorTree::Delete, Src, DeadSucc}});
699  };
700 
701  auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j,
702  bool IsLatch) -> Optional<bool> {
703  if (CompletelyUnroll) {
704  if (PreserveOnlyFirst) {
705  if (i == 0)
706  return std::nullopt;
707  return j == 0;
708  }
709  // Complete (but possibly inexact) unrolling
710  if (j == 0)
711  return true;
712  if (Info.TripCount && j != Info.TripCount)
713  return false;
714  return std::nullopt;
715  }
716 
717  if (ULO.Runtime) {
718  // If runtime unrolling inserts a prologue, information about non-latch
719  // exits may be stale.
720  if (IsLatch && j != 0)
721  return false;
722  return std::nullopt;
723  }
724 
725  if (j != Info.BreakoutTrip &&
726  (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) {
727  // If we know the trip count or a multiple of it, we can safely use an
728  // unconditional branch for some iterations.
729  return false;
730  }
731  return std::nullopt;
732  };
733 
734  // Fold branches for iterations where we know that they will exit or not
735  // exit.
736  for (const auto &Pair : ExitInfos) {
737  const ExitInfo &Info = Pair.second;
738  for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) {
739  // The branch destination.
740  unsigned j = (i + 1) % e;
741  bool IsLatch = Pair.first == LatchBlock;
742  Optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch);
743  if (!KnownWillExit)
744  continue;
745 
746  // We don't fold known-exiting branches for non-latch exits here,
747  // because this ensures that both all loop blocks and all exit blocks
748  // remain reachable in the CFG.
749  // TODO: We could fold these branches, but it would require much more
750  // sophisticated updates to LoopInfo.
751  if (*KnownWillExit && !IsLatch)
752  continue;
753 
754  SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue);
755  }
756  }
757 
758  // When completely unrolling, the last latch becomes unreachable.
759  if (!LatchIsExiting && CompletelyUnroll)
760  changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA, &DTU);
761 
762  // Merge adjacent basic blocks, if possible.
763  for (BasicBlock *Latch : Latches) {
764  BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
765  assert((Term ||
766  (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
767  "Need a branch as terminator, except when fully unrolling with "
768  "unconditional latch");
769  if (Term && Term->isUnconditional()) {
770  BasicBlock *Dest = Term->getSuccessor(0);
771  BasicBlock *Fold = Dest->getUniquePredecessor();
772  if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
773  // Dest has been folded into Fold. Update our worklists accordingly.
774  std::replace(Latches.begin(), Latches.end(), Dest, Fold);
775  llvm::erase_value(UnrolledLoopBlocks, Dest);
776  }
777  }
778  }
779  // Apply updates to the DomTree.
780  DT = &DTU.getDomTree();
781 
784 
785  // At this point, the code is well formed. We now simplify the unrolled loop,
786  // doing constant propagation and dead code elimination as we go.
787  simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
788  TTI);
789 
790  NumCompletelyUnrolled += CompletelyUnroll;
791  ++NumUnrolled;
792 
793  Loop *OuterL = L->getParentLoop();
794  // Update LoopInfo if the loop is completely removed.
795  if (CompletelyUnroll)
796  LI->erase(L);
797 
798  // LoopInfo should not be valid, confirm that.
800  LI->verify(*DT);
801 
802  // After complete unrolling most of the blocks should be contained in OuterL.
803  // However, some of them might happen to be out of OuterL (e.g. if they
804  // precede a loop exit). In this case we might need to insert PHI nodes in
805  // order to preserve LCSSA form.
806  // We don't need to check this if we already know that we need to fix LCSSA
807  // form.
808  // TODO: For now we just recompute LCSSA for the outer loop in this case, but
809  // it should be possible to fix it in-place.
810  if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
811  NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
812 
813  // Make sure that loop-simplify form is preserved. We want to simplify
814  // at least one layer outside of the loop that was unrolled so that any
815  // changes to the parent loop exposed by the unrolling are considered.
816  if (OuterL) {
817  // OuterL includes all loops for which we can break loop-simplify, so
818  // it's sufficient to simplify only it (it'll recursively simplify inner
819  // loops too).
820  if (NeedToFixLCSSA) {
821  // LCSSA must be performed on the outermost affected loop. The unrolled
822  // loop's last loop latch is guaranteed to be in the outermost loop
823  // after LoopInfo's been updated by LoopInfo::erase.
824  Loop *LatchLoop = LI->getLoopFor(Latches.back());
825  Loop *FixLCSSALoop = OuterL;
826  if (!FixLCSSALoop->contains(LatchLoop))
827  while (FixLCSSALoop->getParentLoop() != LatchLoop)
828  FixLCSSALoop = FixLCSSALoop->getParentLoop();
829 
830  formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
831  } else if (PreserveLCSSA) {
832  assert(OuterL->isLCSSAForm(*DT) &&
833  "Loops should be in LCSSA form after loop-unroll.");
834  }
835 
836  // TODO: That potentially might be compile-time expensive. We should try
837  // to fix the loop-simplified form incrementally.
838  simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
839  } else {
840  // Simplify loops for which we might've broken loop-simplify form.
841  for (Loop *SubLoop : LoopsToSimplify)
842  simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
843  }
844 
845  return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
847 }
848 
849 /// Given an llvm.loop loop id metadata node, returns the loop hint metadata
850 /// node with the given name (for example, "llvm.loop.unroll.count"). If no
851 /// such metadata node exists, then nullptr is returned.
853  // First operand should refer to the loop id itself.
854  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
855  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
856 
857  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
858  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
859  if (!MD)
860  continue;
861 
862  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
863  if (!S)
864  continue;
865 
866  if (Name.equals(S->getString()))
867  return MD;
868  }
869  return nullptr;
870 }
i
i
Definition: README.txt:29
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:520
AssumptionCache.h
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
LoopSimplify.h
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::addClonedBlockToLoopInfo
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:148
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:64
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:224
UnrollVerifyLoopInfo
static cl::opt< bool > UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, cl::desc("Verify loopinfo after unrolling"), cl::init(false))
Optional.h
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
Metadata.h
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
IntrinsicInst.h
DebugInfoMetadata.h
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:230
llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Definition: ScalarEvolution.cpp:8172
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
StringRef.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::UnrollLoopOptions::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Definition: UnrollLoop.h:72
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:699
Statistic.h
llvm::UnrollLoopOptions::Runtime
bool Runtime
Definition: UnrollLoop.h:71
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
DomTreeUpdater.h
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::UnrollLoopOptions::Force
bool Force
Definition: UnrollLoop.h:70
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::EnableFSDiscriminator
cl::opt< bool > EnableFSDiscriminator
Definition: TargetPassConfig.cpp:391
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopUnroll.cpp:80
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1600
ScalarEvolution.h
DenseMap.h
Module.h
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::Optional< bool >
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:410
UnrollRuntimeEpilog
static cl::opt< bool > UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog."))
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
STLExtras.h
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
replace
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
Definition: ConstantMerge.cpp:116
Use.h
endif
__FakeVCSRevision h endif() endif() set(generated_files "$
Definition: CMakeLists.txt:16
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
llvm::DomTreeUpdater::UpdateStrategy::Lazy
@ Lazy
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:852
Constants.h
Twine.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:412
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:926
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1049
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::LoopInfo::replacementPreservesLCSSAForm
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:1140
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2882
llvm::simplifyLoopAfterUnroll
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:216
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
llvm::Instruction
Definition: Instruction.h:42
llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
isEpilogProfitable
static bool isEpilogProfitable(Loop *L)
The function chooses which type of unroll (epilog or prolog) is more profitabale.
Definition: LoopUnroll.cpp:202
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:403
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
LoopUtils.h
DebugLoc.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::ValueMap::begin
iterator begin()
Definition: ValueMap.h:135
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
LoopIterator.h
llvm::erase_value
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2005
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
CFG.h
gcd
APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2)
Definition: ScalarEvolution.cpp:3539
LoopInfo.h
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:956
llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:8016
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1292
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::cloneAndAdaptNoAliasScopes
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
Definition: CloneFunction.cpp:1130
llvm::BasicBlock::hasAddressTaken
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:484
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:42
BasicBlock.h
llvm::cl::opt< bool >
llvm::ScalarEvolution::forgetBlockAndLoopDispositions
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
Definition: ScalarEvolution.cpp:8461
VI
@ VI
Definition: SIInstrInfo.cpp:7967
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:81
llvm::UnrollLoop
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:270
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DomTreeUpdater::getDomTree
DominatorTree & getDomTree()
Flush DomTree updates and return DomTree.
Definition: DomTreeUpdater.cpp:303
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3188
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:8032
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:54
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6600
ArrayRef.h
llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:8432
llvm::LoopBlocksDFS::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1222
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::ValueMapIterator::ValueTypeProxy::second
ValueT & second
Definition: ValueMap.h:346
llvm::UnrollLoopOptions::ForgetAllSCEV
bool ForgetAllSCEV
Definition: UnrollLoop.h:74
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
iterator_range.h
llvm::LoopInfo::erase
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:876
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:8428
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::UnrollRuntimeLoopRemainder
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
Definition: LoopUnrollRuntime.cpp:563
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:396
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:179
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1167
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:293
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
needToInsertPhisForLCSSA
static bool needToInsertPhisForLCSSA(Loop *L, const std::vector< BasicBlock * > &Blocks, LoopInfo *LI)
Check if unrolling created a situation where we need to insert phi nodes to preserve LCSSA form.
Definition: LoopUnroll.cpp:123
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::ValueMap< const Value *, WeakTrackingVH >
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
ValueHandle.h
ValueMap.h
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
j
return j(j<< 16)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::ValueMapIterator
Definition: ValueMap.h:49
llvm::UnrollLoopOptions::Count
unsigned Count
Definition: UnrollLoop.h:69
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::simplifyLoopIVs
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead)
SimplifyLoopIVs - Simplify users of induction variables within this loop.
Definition: SimplifyIndVar.cpp:973
GenericDomTree.h
llvm::UnrollLoopOptions::UnrollRemainder
bool UnrollRemainder
Definition: UnrollLoop.h:73
llvm::ScalarEvolution::isBackedgeTakenCountMaxOrZero
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
Definition: ScalarEvolution.cpp:8255
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:708
llvm::changeToUnreachable
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:2217
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:689
Instructions.h
SmallVector.h
User.h
llvm::ScalarEvolution::forgetAllLoops
void forgetAllLoops()
Definition: ScalarEvolution.cpp:8348
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
Dominators.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
UnrollLoop.h
llvm::Loop::isSafeToClone
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
Definition: LoopInfo.cpp:486
InstructionSimplify.h
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
llvm::PHINode
Definition: Instructions.h:2697
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:342
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens=true) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:462
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:399
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:242
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3132
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
BasicBlockUtils.h
llvm::UnrollLoopOptions
Definition: UnrollLoop.h:68
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
ilist_iterator.h
UnrollVerifyDomtree
static cl::opt< bool > UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, cl::desc("Verify domtree after unrolling"), cl::init(false))
SimplifyIndVar.h
SetVector.h
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941