Go to the documentation of this file.
69 #include <type_traits>
79 #define DEBUG_TYPE "loop-unroll"
82 STATISTIC(NumCompletelyUnrolled,
"Number of loops completely unrolled");
83 STATISTIC(NumUnrolled,
"Number of loops unrolled (completely or otherwise)");
84 STATISTIC(NumUnrolledNotLatch,
"Number of loops unrolled without a conditional "
85 "latch (completely or otherwise)");
89 cl::desc(
"Allow runtime unrolled loops to be unrolled "
90 "with epilog instead of prolog."));
94 cl::desc(
"Verify domtree after unrolling"),
95 #ifdef EXPENSIVE_CHECKS
104 cl::desc(
"Verify loopinfo after unrolling"),
105 #ifdef EXPENSIVE_CHECKS
123 const std::vector<BasicBlock *> &Blocks,
129 for (
Use &U :
I.operands()) {
130 if (
const auto *
Def = dyn_cast<Instruction>(U)) {
152 assert(OldLoop &&
"Should (at least) be in the loop being unrolled!");
154 Loop *&NewLoop = NewLoops[OldLoop];
158 "Header should be first in RPO");
204 assert(PreHeader && Header);
206 if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
220 if (SE && SimplifyIVs) {
226 while (!DeadInsts.empty()) {
228 if (
Instruction *Inst = dyn_cast_or_null<Instruction>(V))
241 Inst.replaceAllUsesWith(V);
274 bool PreserveLCSSA,
Loop **RemainderLoop) {
275 assert(DT &&
"DomTree is required");
278 LLVM_DEBUG(
dbgs() <<
" Can't unroll; loop preheader-insertion failed.\n");
283 LLVM_DEBUG(
dbgs() <<
" Can't unroll; loop exit-block-insertion failed.\n");
289 LLVM_DEBUG(
dbgs() <<
" Can't unroll; Loop body cannot be cloned.\n");
296 dbgs() <<
" Won't unroll loop: address of header block is taken.\n");
309 std::vector<BasicBlock *> OriginalLoopBlocks = L->
getBlocks();
316 if (MaxTripCount && ULO.
Count > MaxTripCount)
317 ULO.
Count = MaxTripCount;
321 unsigned TripMultiple;
322 unsigned BreakoutTrip;
329 for (
auto *ExitingBlock : ExitingBlocks) {
332 auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
339 if (
Info.TripCount != 0) {
341 Info.TripMultiple = 0;
343 Info.BreakoutTrip =
Info.TripMultiple =
347 Info.ExitingBlocks.push_back(ExitingBlock);
348 LLVM_DEBUG(
dbgs() <<
" Exiting block %" << ExitingBlock->getName()
349 <<
": TripCount=" <<
Info.TripCount
350 <<
", TripMultiple=" <<
Info.TripMultiple
351 <<
", BreakoutTrip=" <<
Info.BreakoutTrip <<
"\n");
357 const bool CompletelyUnroll = ULO.
Count == MaxTripCount;
359 const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
363 if (CompletelyUnroll)
372 bool NeedToFixLCSSA =
373 PreserveLCSSA && CompletelyUnroll &&
388 if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
390 dbgs() <<
"Can't unroll; a conditional latch must exit the loop");
399 bool HasConvergent =
false;
402 if (
auto *CB = dyn_cast<CallBase>(&
I))
403 HasConvergent |= CB->isConvergent();
405 "Can't runtime unroll if loop contains a convergent operation.");
408 bool EpilogProfitability =
416 PreserveLCSSA, RemainderLoop)) {
421 "generated when assuming runtime trip count\n");
428 if (CompletelyUnroll) {
430 <<
" with trip count " << ULO.
Count <<
"!\n");
435 <<
"completely unrolled loop with "
436 <<
NV(
"UnrollCount", ULO.
Count) <<
" iterations";
449 Diag <<
"unrolled loop by a factor of " <<
NV(
"UnrollCount", ULO.
Count);
451 Diag <<
" with run-time trip count";
472 ++NumUnrolledNotLatch;
477 std::vector<PHINode*> OrigPHINode;
479 OrigPHINode.push_back(cast<PHINode>(
I));
482 std::vector<BasicBlock *> Headers;
483 std::vector<BasicBlock *> Latches;
484 Headers.push_back(Header);
485 Latches.push_back(LatchBlock);
497 std::vector<BasicBlock*> UnrolledLoopBlocks = L->
getBlocks();
504 for (
Loop *SubLoop : *L)
505 LoopsToSimplify.
insert(SubLoop);
512 if (!isa<DbgInfoIntrinsic>(&
I))
514 auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.
Count);
516 I.setDebugLoc(*NewDIL);
519 <<
"Failed to create new discriminator: "
520 << DIL->getFilename() <<
" Line: " << DIL->getLine());
531 auto BlockInsertPt = std::next(LatchBlock->
getIterator());
532 for (
unsigned It = 1; It != ULO.
Count; ++It) {
543 "Header should not be in a sub-loop");
547 LoopsToSimplify.
insert(NewLoops[OldLoop]);
552 for (
PHINode *OrigPHI : OrigPHINode) {
553 PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
555 if (
Instruction *InValI = dyn_cast<Instruction>(InVal))
556 if (It > 1 && L->contains(InValI))
557 InVal = LastValueMap[InValI];
558 VMap[OrigPHI] = InVal;
559 New->getInstList().erase(NewPHI);
563 LastValueMap[*
BB] = New;
566 LastValueMap[
VI->first] =
VI->second;
570 if (L->contains(Succ))
572 for (
PHINode &PHI : Succ->phis()) {
573 Value *Incoming = PHI.getIncomingValueForBlock(*
BB);
575 if (It != LastValueMap.
end())
577 PHI.addIncoming(Incoming, New);
583 Headers.push_back(New);
584 if (*
BB == LatchBlock)
585 Latches.push_back(New);
589 auto ExitInfoIt = ExitInfos.
find(*
BB);
590 if (ExitInfoIt != ExitInfos.
end())
591 ExitInfoIt->second.ExitingBlocks.push_back(New);
593 NewBlocks.push_back(New);
594 UnrolledLoopBlocks.push_back(New);
604 auto BBIDom = BBDomNode->
getIDom();
605 BasicBlock *OriginalBBIDom = BBIDom->getBlock();
607 New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
615 if (
auto *II = dyn_cast<AssumeInst>(&
I))
622 std::string ext = (
Twine(
"It") +
Twine(It)).str();
629 for (
PHINode *PN : OrigPHINode) {
630 if (CompletelyUnroll) {
631 PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
633 }
else if (ULO.
Count > 1) {
634 Value *InVal = PN->removeIncomingValue(LatchBlock,
false);
637 if (
Instruction *InValI = dyn_cast<Instruction>(InVal)) {
638 if (L->contains(InValI))
639 InVal = LastValueMap[InVal];
641 assert(Latches.back() == LastValueMap[LatchBlock] &&
"bad last latch");
642 PN->addIncoming(InVal, Latches.back());
648 for (
unsigned i = 0,
e = Latches.size();
i !=
e; ++
i) {
649 unsigned j = (
i + 1) %
e;
650 Latches[
i]->getTerminator()->replaceSuccessorWith(Headers[
i], Headers[
j]);
658 for (
auto *
BB : OriginalLoopBlocks) {
661 for (
auto *ChildDomNode : BBDomNode->children()) {
662 auto *ChildBB = ChildDomNode->getBlock();
663 if (!L->contains(ChildBB))
664 ChildrenToUpdate.push_back(ChildBB);
671 for (
auto *ChildBB : ChildrenToUpdate)
681 auto SetDest = [&](
BasicBlock *Src,
bool WillExit,
bool ExitOnTrue) {
682 auto *
Term = cast<BranchInst>(Src->getTerminator());
683 const unsigned Idx = ExitOnTrue ^ WillExit;
692 Term->eraseFromParent();
697 auto WillExit = [&](
const ExitInfo &
Info,
unsigned i,
unsigned j,
699 if (CompletelyUnroll) {
700 if (PreserveOnlyFirst) {
708 if (
Info.TripCount &&
j !=
Info.TripCount)
716 if (IsLatch &&
j != 0)
721 if (
j !=
Info.BreakoutTrip &&
722 (
Info.TripMultiple == 0 ||
j %
Info.TripMultiple != 0)) {
732 for (
const auto &Pair : ExitInfos) {
733 const ExitInfo &
Info = Pair.second;
734 for (
unsigned i = 0,
e =
Info.ExitingBlocks.size();
i !=
e; ++
i) {
736 unsigned j = (
i + 1) %
e;
737 bool IsLatch = Pair.first == LatchBlock;
747 if (*KnownWillExit && !IsLatch)
750 SetDest(
Info.ExitingBlocks[
i], *KnownWillExit,
Info.ExitOnTrue);
755 if (!LatchIsExiting && CompletelyUnroll)
762 (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
763 "Need a branch as terminator, except when fully unrolling with "
764 "unconditional latch");
765 if (
Term &&
Term->isUnconditional()) {
770 std::replace(Latches.begin(), Latches.end(), Dest, Fold);
786 NumCompletelyUnrolled += CompletelyUnroll;
791 if (CompletelyUnroll)
806 if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
816 if (NeedToFixLCSSA) {
821 Loop *FixLCSSALoop = OuterL;
822 if (!FixLCSSALoop->
contains(LatchLoop))
827 }
else if (PreserveLCSSA) {
829 "Loops should be in LCSSA form after loop-unroll.");
834 simplifyLoop(OuterL, DT, LI, SE, AC,
nullptr, PreserveLCSSA);
837 for (
Loop *SubLoop : LoopsToSimplify)
838 simplifyLoop(SubLoop, DT, LI, SE, AC,
nullptr, PreserveLCSSA);
862 if (
Name.equals(
S->getString()))
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
This is an optimization pass for GlobalISel generic memory operations.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
static cl::opt< bool > UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, cl::desc("Verify loopinfo after unrolling"), cl::init(false))
A parsed version of the target data layout string in and methods for querying it.
InstListType::iterator iterator
Instruction iterators...
const Function * getParent() const
Return the enclosing method, or null if none.
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Represents a single loop in the control flow graph.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool AllowExpensiveTripCount
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
The main scalar evolution driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
cl::opt< bool > EnableFSDiscriminator
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
static cl::opt< bool > UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog."))
DiagnosticInfoOptimizationBase::Argument NV
auto successors(MachineBasicBlock *BB)
LLVM_NODISCARD T pop_back_val()
DomTreeNodeBase * getIDom() const
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
__FakeVCSRevision h endif() endif() set(generated_files "$
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
LLVM Basic Block Representation.
unsigned getNumOperands() const
Return number of MDNode operands.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
iterator begin()
Instruction iterator methods.
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator_range< block_iterator > blocks() const
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI)
Perform some cleanup and simplifications on loops after unrolling.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
RPOIterator endRPO() const
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
STATISTIC(NumFunctions, "Total number of functions")
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
static bool isEpilogProfitable(Loop *L)
The function chooses which type of unroll (epilog or prolog) is more profitabale.
int getNumOccurrences() const
Analysis containing CSE Info
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
LoopT * AllocateLoop(ArgsTy &&... Args)
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
const MDOperand & getOperand(unsigned I) const
Store the result of a depth first search within basic blocks contained by a single loop.
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
bool isDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
DominatorTree & getDomTree()
Flush DomTree updates and return DomTree.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
void forgetTopmostLoop(const Loop *L)
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
StringRef - Represent a constant reference to a string, i.e.
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
A cache of @llvm.assume calls within a function.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool needToInsertPhisForLCSSA(Loop *L, const std::vector< BasicBlock * > &Blocks, LoopInfo *LI)
Check if unrolling created a situation where we need to insert phi nodes to preserve LCSSA form.
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
StringRef getName() const
Return a constant reference to the value's name.
this could be done in SelectionDAGISel along with other special for
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
LLVMContext & getContext() const
Get the context in which this basic block lives.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead)
SimplifyLoopIVs - Simplify users of induction variables within this loop.
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
BlockT * getHeader() const
iterator find(const KeyT &Val)
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
const InstListType & getInstList() const
Return the underlying instruction list container.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
A SetVector that performs no allocations if smaller than a certain size.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Conditional or Unconditional Branch instruction.
@ Unmodified
The loop was not modified.
LLVM Value Representation.
static cl::opt< bool > UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, cl::desc("Verify domtree after unrolling"), cl::init(false))
static constexpr UpdateKind Delete
A Use represents the edge between a Value definition and its users.
reference emplace_back(ArgTypes &&... Args)