38#define DEBUG_TYPE "loop-rotate"
41 "Number of loops not rotated due to the header size");
43 "Number of instructions hoisted into loop preheader");
45 "Number of instructions cloned into loop preheader");
50 cl::desc(
"Allow loop rotation multiple times in order to reach "
51 "a better latch exit"));
56 const unsigned MaxHeaderSize;
69 LoopRotate(
unsigned MaxHeaderSize,
LoopInfo *LI,
74 : MaxHeaderSize(MaxHeaderSize), LI(LI),
TTI(
TTI), AC(AC), DT(DT), SE(SE),
75 MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
76 IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
77 bool processLoop(
Loop *L);
80 bool rotateLoop(
Loop *L,
bool SimplifiedLatch);
81 bool simplifyLoopLatch(
Loop *L);
88 bool Inserted = VM.
insert({K, V}).second;
104 PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
109 for (
I = OrigHeader->
begin();
I !=
E; ++
I) {
110 Value *OrigHeaderVal = &*
I;
126 SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
127 SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
133 Instruction *UserInst = cast<Instruction>(U.getUser());
134 if (!isa<PHINode>(UserInst)) {
139 if (UserBB == OrigHeader)
144 if (UserBB == OrigPreheader) {
145 U = OrigPreHeaderVal;
162 if (UserBB == OrigHeader)
170 if (UserBB == OrigPreheader)
171 NewVal = OrigPreHeaderVal;
172 else if (
SSA.HasValueForBlock(UserBB))
173 NewVal =
SSA.GetValueInMiddleOfBlock(UserBB);
176 DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
186 BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator());
189 if (L->contains(HeaderExit))
192 for (
auto &Phi : Header->phis()) {
195 return cast<Instruction>(U)->getParent() != HeaderExit;
211 assert(Latch &&
"need latch");
218 if (L->contains(Exit))
226 L->getUniqueExitBlocks(Exits);
227 if (!Exits.
empty()) {
260bool LoopRotate::rotateLoop(
Loop *L,
bool SimplifiedLatch) {
262 if (
L->getBlocks().size() == 1)
265 bool Rotated =
false;
277 if (!
L->isLoopExiting(OrigHeader))
287 if (
L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode ==
false &&
299 Metrics.analyzeBasicBlock(OrigHeader, *
TTI, EphValues, PrepareForLTO);
302 dbgs() <<
"LoopRotation: NOT rotating - contains non-duplicatable"
303 <<
" instructions: ";
308 LLVM_DEBUG(
dbgs() <<
"LoopRotation: NOT rotating - contains convergent "
313 if (!
Metrics.NumInsts.isValid()) {
314 LLVM_DEBUG(
dbgs() <<
"LoopRotation: NOT rotating - contains instructions"
315 " with invalid cost: ";
319 if (
Metrics.NumInsts > MaxHeaderSize) {
322 <<
" instructions, which is more than the threshold ("
323 << MaxHeaderSize <<
" instructions): ";
325 ++NumNotRotatedDueToHeaderSize;
331 if (PrepareForLTO &&
Metrics.NumInlineCandidates > 0)
340 if (!OrigPreheader || !
L->hasDedicatedExits())
349 SE->forgetTopmostLoop(L);
354 SE->forgetBlockAndLoopDispositions();
359 MSSAU->getMemorySSA()->verifyMemorySSA();
366 if (
L->contains(Exit))
368 assert(NewHeader &&
"Unable to determine new loop header");
369 assert(
L->contains(NewHeader) && !
L->contains(Exit) &&
370 "Unable to determine loop header and exit blocks");
375 "New header doesn't have one pred!");
385 for (;
PHINode *PN = dyn_cast<PHINode>(
I); ++
I)
395 using DbgIntrinsicHash =
396 std::pair<std::pair<hash_code, DILocalVariable *>,
DIExpression *>;
398 auto VarLocOps =
D->location_ops();
405 if (
auto *DII = dyn_cast<DbgVariableIntrinsic>(&
I))
406 DbgIntrinsics.
insert(makeHash(DII));
416 if (
auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&
I))
430 !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
438 ++NumInstrsDuplicated;
445 if (
auto *DII = dyn_cast<DbgVariableIntrinsic>(
C))
446 if (DbgIntrinsics.
count(makeHash(DII))) {
455 if (V && LI->replacementPreservesLCSSAForm(
C, V)) {
459 if (!
C->mayHaveSideEffects()) {
469 C->insertBefore(LoopEntryBranch);
471 if (
auto *II = dyn_cast<AssumeInst>(
C))
472 AC->registerAssumption(II);
480 if (!NoAliasDeclInstructions.
empty()) {
505 LLVM_DEBUG(
dbgs() <<
" Cloning llvm.experimental.noalias.scope.decl:"
518 NoAliasDeclScopes.
push_back(NAD->getScopeList());
532 cast<Instruction>(
ValueMap[*NoAliasDeclInstructions.begin()]);
533 auto *LastInst = &OrigPreheader->
back();
548 PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
560 MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
573 if (!InsertedPHIs.
empty())
577 L->moveToHeader(NewHeader);
578 assert(
L->getHeader() == NewHeader &&
"Latch block is our new header");
585 Updates.
push_back({DominatorTree::Insert, OrigPreheader, Exit});
586 Updates.
push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
587 Updates.
push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
590 MSSAU->applyUpdates(Updates, *DT,
true);
592 MSSAU->getMemorySSA()->verifyMemorySSA();
594 DT->applyUpdates(Updates);
617 OrigPreheader, NewHeader,
626 bool SplitLatchEdge =
false;
629 Loop *PredLoop = LI->getLoopFor(ExitPred);
630 if (!PredLoop || PredLoop->
contains(Exit) ||
631 isa<IndirectBrInst>(ExitPred->getTerminator()))
633 SplitLatchEdge |=
L->getLoopLatch() == ExitPred;
640 "Despite splitting all preds, failed to split latch exit?");
641 (void)SplitLatchEdge;
651 if (DT) DT->deleteEdge(OrigPreheader, Exit);
655 MSSAU->removeEdge(OrigPreheader, Exit);
658 assert(
L->getLoopPreheader() &&
"Invalid loop preheader after loop rotation");
659 assert(
L->getLoopLatch() &&
"Invalid loop latch after loop rotation");
662 MSSAU->getMemorySSA()->verifyMemorySSA();
675 MSSAU->getMemorySSA()->verifyMemorySSA();
682 SimplifiedLatch =
false;
700 bool seenIncrement =
false;
701 bool MultiExitLoop =
false;
703 if (!L->getExitingBlock())
704 MultiExitLoop =
true;
711 if (isa<DbgInfoIntrinsic>(
I))
714 switch (
I->getOpcode()) {
717 case Instruction::GetElementPtr:
719 if (!cast<GEPOperator>(
I)->hasAllConstantIndices())
723 case Instruction::Add:
724 case Instruction::Sub:
725 case Instruction::And:
726 case Instruction::Or:
727 case Instruction::Xor:
728 case Instruction::Shl:
729 case Instruction::LShr:
730 case Instruction::AShr: {
732 !isa<Constant>(
I->getOperand(0))
734 : !isa<Constant>(
I->getOperand(1)) ?
I->getOperand(1) :
nullptr;
742 auto *UserInst = cast<Instruction>(UseI);
743 if (!L->contains(UserInst))
750 seenIncrement =
true;
753 case Instruction::Trunc:
754 case Instruction::ZExt:
755 case Instruction::SExt:
771bool LoopRotate::simplifyLoopLatch(
Loop *L) {
781 if (!LastExit || !
L->isLoopExiting(LastExit))
792 << LastExit->
getName() <<
"\n");
800 SE->forgetBlockAndLoopDispositions();
804 MSSAU->getMemorySSA()->verifyMemorySSA();
810bool LoopRotate::processLoop(
Loop *L) {
812 MDNode *LoopMD =
L->getLoopID();
814 bool SimplifiedLatch =
false;
820 SimplifiedLatch = simplifyLoopLatch(L);
822 bool MadeChange = rotateLoop(L, SimplifiedLatch);
823 assert((!MadeChange ||
L->isLoopExiting(
L->getLoopLatch())) &&
824 "Loop latch should be exiting after loop-rotate.");
828 if ((MadeChange || SimplifiedLatch) && LoopMD)
829 L->setLoopID(LoopMD);
831 return MadeChange || SimplifiedLatch;
840 unsigned Threshold =
unsigned(-1),
841 bool IsUtilMode =
true,
bool PrepareForLTO) {
842 LoopRotate LR(Threshold, LI,
TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
843 IsUtilMode, PrepareForLTO);
844 return LR.processLoop(L);
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool canRotateDeoptimizingLatchExit(Loop *L)
static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, BasicBlock::iterator End, Loop *L)
Determine whether the instructions in this range may be safely and cheaply speculated.
static cl::opt< bool > MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden, cl::desc("Allow loop rotation multiple times in order to reach " "a better latch exit"))
static bool profitableToRotateLoopExitingLatch(Loop *L)
static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V)
Insert (K, V) pair into the ValueToValueMap, and verify the key did not previously exist in the map,...
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, BasicBlock *OrigPreheader, ValueToValueMapTy &ValueMap, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *InsertedPHIs)
RewriteUsesOfClonedInstructions - We just cloned the instructions from the old header into the prehea...
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Class recording the (high level) value of a variable.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const CallInst * getPostdominatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize that is present either in current ...
const Instruction & back() const
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
This is the common base class for debug info intrinsics for variables.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const BasicBlock * getParent() const
bool isTerminator() const
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Helper class for SSA formation on a set of values defined in multiple blocks.
The main scalar evolution driver.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
iterator_range< user_iterator > users()
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
void dump() const
Support for debugging, callable in GDB: V->dump()
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void insertDebugValuesForPHIs(BasicBlock *BB, SmallVectorImpl< PHINode * > &InsertedPHIs)
Propagate dbg.value intrinsics through the newly inserted PHIs.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto predecessors(const MachineBasicBlock *BB)
bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold, bool IsUtilMode, bool PrepareForLTO=false)
Convert a loop into a loop with bottom test.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Option class for critical edge splitting.