Go to the documentation of this file.
38 #define DEBUG_TYPE "loop-rotate"
41 "Number of loops not rotated due to the header size");
43 "Number of instructions hoisted into loop preheader");
45 "Number of instructions cloned into loop preheader");
46 STATISTIC(NumRotated,
"Number of loops rotated");
50 cl::desc(
"Allow loop rotation multiple times in order to reach "
51 "a better latch exit"));
56 const unsigned MaxHeaderSize;
69 LoopRotate(
unsigned MaxHeaderSize,
LoopInfo *LI,
74 : MaxHeaderSize(MaxHeaderSize), LI(LI),
TTI(
TTI), AC(AC), DT(DT), SE(SE),
75 MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
76 IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
77 bool processLoop(
Loop *L);
80 bool rotateLoop(
Loop *L,
bool SimplifiedLatch);
81 bool simplifyLoopLatch(
Loop *L);
88 bool Inserted = VM.
insert({K, V}).second;
104 PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
109 for (
I = OrigHeader->
begin();
I !=
E; ++
I) {
110 Value *OrigHeaderVal = &*
I;
126 SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
127 SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
133 Instruction *UserInst = cast<Instruction>(U.getUser());
134 if (!isa<PHINode>(UserInst)) {
139 if (UserBB == OrigHeader)
144 if (UserBB == OrigPreheader) {
145 U = OrigPreHeaderVal;
162 if (UserBB == OrigHeader)
170 if (UserBB == OrigPreheader)
171 NewVal = OrigPreHeaderVal;
172 else if (
SSA.HasValueForBlock(UserBB))
173 NewVal =
SSA.GetValueInMiddleOfBlock(UserBB);
176 DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
192 for (
auto &Phi : Header->
phis()) {
195 return cast<Instruction>(U)->getParent() != HeaderExit;
211 assert(Latch &&
"need latch");
227 if (!Exits.empty()) {
241 return !
BB->getPostdominatingDeoptimizeCall();
260 bool LoopRotate::rotateLoop(
Loop *L,
bool SimplifiedLatch) {
265 bool Rotated =
false;
287 if (L->
isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode ==
false &&
299 Metrics.analyzeBasicBlock(OrigHeader, *
TTI, EphValues, PrepareForLTO);
302 dbgs() <<
"LoopRotation: NOT rotating - contains non-duplicatable"
303 <<
" instructions: ";
308 LLVM_DEBUG(
dbgs() <<
"LoopRotation: NOT rotating - contains convergent "
313 if (!
Metrics.NumInsts.isValid()) {
314 LLVM_DEBUG(
dbgs() <<
"LoopRotation: NOT rotating - contains instructions"
315 " with invalid cost: ";
319 if (*
Metrics.NumInsts.getValue() > MaxHeaderSize) {
322 <<
" instructions, which is more than the threshold ("
323 << MaxHeaderSize <<
" instructions): ";
325 ++NumNotRotatedDueToHeaderSize;
331 if (PrepareForLTO &&
Metrics.NumInlineCandidates > 0)
349 SE->forgetTopmostLoop(L);
353 MSSAU->getMemorySSA()->verifyMemorySSA();
362 assert(NewHeader &&
"Unable to determine new loop header");
364 "Unable to determine loop header and exit blocks");
369 "New header doesn't have one pred!");
379 for (;
PHINode *PN = dyn_cast<PHINode>(
I); ++
I)
389 using DbgIntrinsicHash =
390 std::pair<std::pair<hash_code, DILocalVariable *>,
DIExpression *>;
392 auto VarLocOps =
D->location_ops();
399 if (
auto *DII = dyn_cast<DbgVariableIntrinsic>(&
I))
400 DbgIntrinsics.
insert(makeHash(DII));
410 if (
auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&
I))
411 NoAliasDeclInstructions.push_back(Decl);
424 !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
432 ++NumInstrsDuplicated;
439 if (
auto *DII = dyn_cast<DbgVariableIntrinsic>(
C))
440 if (DbgIntrinsics.
count(makeHash(DII))) {
449 if (V && LI->replacementPreservesLCSSAForm(
C, V)) {
453 if (!
C->mayHaveSideEffects()) {
463 C->insertBefore(LoopEntryBranch);
465 if (
auto *II = dyn_cast<AssumeInst>(
C))
466 AC->registerAssumption(II);
474 if (!NoAliasDeclInstructions.empty()) {
499 LLVM_DEBUG(
dbgs() <<
" Cloning llvm.experimental.noalias.scope.decl:"
512 NoAliasDeclScopes.push_back(NAD->getScopeList());
526 cast<Instruction>(
ValueMap[*NoAliasDeclInstructions.begin()]);
527 auto *LastInst = &OrigPreheader->
back();
542 PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
554 MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
567 if (!InsertedPHIs.empty())
572 assert(L->
getHeader() == NewHeader &&
"Latch block is our new header");
584 MSSAU->applyUpdates(Updates, *DT,
true);
586 MSSAU->getMemorySSA()->verifyMemorySSA();
588 DT->applyUpdates(Updates);
611 OrigPreheader, NewHeader,
620 bool SplitLatchEdge =
false;
623 Loop *PredLoop = LI->getLoopFor(ExitPred);
624 if (!PredLoop || PredLoop->
contains(Exit) ||
625 ExitPred->getTerminator()->isIndirectTerminator())
634 "Despite splitting all preds, failed to split latch exit?");
635 (void)SplitLatchEdge;
645 if (DT) DT->deleteEdge(OrigPreheader, Exit);
649 MSSAU->removeEdge(OrigPreheader, Exit);
656 MSSAU->getMemorySSA()->verifyMemorySSA();
669 MSSAU->getMemorySSA()->verifyMemorySSA();
676 SimplifiedLatch =
false;
694 bool seenIncrement =
false;
695 bool MultiExitLoop =
false;
698 MultiExitLoop =
true;
705 if (isa<DbgInfoIntrinsic>(
I))
708 switch (
I->getOpcode()) {
711 case Instruction::GetElementPtr:
713 if (!cast<GEPOperator>(
I)->hasAllConstantIndices())
718 case Instruction::Sub:
719 case Instruction::And:
720 case Instruction::Or:
721 case Instruction::Xor:
722 case Instruction::Shl:
723 case Instruction::LShr:
724 case Instruction::AShr: {
726 !isa<Constant>(
I->getOperand(0))
728 : !isa<Constant>(
I->getOperand(1)) ?
I->getOperand(1) :
nullptr;
736 auto *UserInst = cast<Instruction>(UseI);
744 seenIncrement =
true;
747 case Instruction::Trunc:
748 case Instruction::ZExt:
749 case Instruction::SExt:
765 bool LoopRotate::simplifyLoopLatch(
Loop *L) {
786 << LastExit->
getName() <<
"\n");
793 MSSAU->getMemorySSA()->verifyMemorySSA();
799 bool LoopRotate::processLoop(
Loop *L) {
803 bool SimplifiedLatch =
false;
809 SimplifiedLatch = simplifyLoopLatch(L);
811 bool MadeChange = rotateLoop(L, SimplifiedLatch);
813 "Loop latch should be exiting after loop-rotate.");
817 if ((MadeChange || SimplifiedLatch) && LoopMD)
820 return MadeChange || SimplifiedLatch;
829 unsigned Threshold =
unsigned(-1),
830 bool IsUtilMode =
true,
bool PrepareForLTO) {
831 LoopRotate LR(Threshold, LI,
TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
832 IsUtilMode, PrepareForLTO);
833 return LR.processLoop(L);
bool isTerminator() const
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
InstListType::iterator iterator
Instruction iterators...
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, BasicBlock::iterator End, Loop *L)
Determine whether the instructions in this range may be safely and cheaply speculated.
Represents a single loop in the control flow graph.
void dump() const
Support for debugging, callable in GDB: V->dump()
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Implements a dense probed hash-table based set with some number of buckets stored inline.
The main scalar evolution driver.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static bool profitableToRotateLoopExitingLatch(Loop *L)
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
static constexpr UpdateKind Insert
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
auto successors(MachineBasicBlock *BB)
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
static cl::opt< bool > MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden, cl::desc("Allow loop rotation multiple times in order to reach " "a better latch exit"))
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
iterator begin()
Instruction iterator methods.
Option class for critical edge splitting.
iterator_range< use_iterator > uses()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
STATISTIC(NumFunctions, "Total number of functions")
auto predecessors(MachineBasicBlock *BB)
void setName(const Twine &Name)
Change the name of the value.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
const CallInst * getPostdominatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize that is present either in current ...
Value * getCondition() const
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
This is the common base class for debug info intrinsics for variables.
Class recording the (high level) value of a variable.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveToHeader(BlockT *BB)
This method is used to move BB (which must be part of this loop) to be the loop header of the loop (t...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, BasicBlock *OrigPreheader, ValueToValueMapTy &ValueMap, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *InsertedPHIs)
RewriteUsesOfClonedInstructions - We just cloned the instructions from the old header into the prehea...
void insertDebugValuesForPHIs(BasicBlock *BB, SmallVectorImpl< PHINode * > &InsertedPHIs)
Propagate dbg.value intrinsics through the newly inserted PHIs.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static bool canRotateDeoptimizingLatchExit(Loop *L)
bool isUnconditional() const
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
A cache of @llvm.assume calls within a function.
Type * getType() const
All values are typed, get the type of this value.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
self_iterator getIterator()
bool mayReadFromMemory() const
Return true if this instruction may read memory.
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
StringRef getName() const
Return a constant reference to the value's name.
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
LLVMContext & getContext() const
Get the context in which this basic block lives.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold, bool IsUtilMode, bool PrepareForLTO=false)
Convert a loop into a loop with bottom test.
BlockT * getHeader() const
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
const Instruction & back() const
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
const BasicBlock * getParent() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V)
Insert (K, V) pair into the ValueToValueMap, and verify the key did not previously exist in the map,...
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool VerifyMemorySSA
Enables verification of MemorySSA.
Conditional or Unconditional Branch instruction.
Helper class for SSA formation on a set of values defined in multiple blocks.
LLVM Value Representation.
iterator_range< user_iterator > users()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
static constexpr UpdateKind Delete
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Use represents the edge between a Value definition and its users.