Go to the documentation of this file.
52 #define DEBUG_TYPE "loop-flatten"
59 cl::desc(
"Limit on the cost of instructions that can be repeated due to "
65 cl::desc(
"Assume that the product of the two iteration "
66 "limits will never overflow"));
71 cl::desc(
"Widen the loop induction variables, if possible, so "
72 "overflow checks won't reject flattening"));
120 IterationInstructions.
insert(BackBranch);
127 InductionPHI =
nullptr;
150 if (!
Compare || !IsValidPredicate(
Compare->getUnsignedPredicate()) ||
162 Increment = dyn_cast<BinaryOperator>(
Compare->getOperand(0));
163 Limit =
Compare->getOperand(1);
167 Increment = dyn_cast<BinaryOperator>(
Compare->getOperand(1));
168 Limit =
Compare->getOperand(0);
170 if (!Increment || Increment->hasNUsesOrMore(3)) {
174 IterationInstructions.
insert(Increment);
182 dbgs() <<
"Incoming value from latch is not the increment inst\n");
186 auto *CI = dyn_cast<ConstantInt>(
188 if (!CI || !CI->isZero()) {
225 assert(InnerPHI.getNumIncomingValues() == 2);
226 Value *PreHeaderValue =
234 PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
244 PHINode *LCSSAPHI = dyn_cast<PHINode>(
255 dbgs() <<
"LCSSA PHI incoming value does not match latch value\n");
262 SafeOuterPHIs.
insert(OuterPHI);
267 if (!SafeOuterPHIs.
count(&OuterPHI)) {
268 LLVM_DEBUG(
dbgs() <<
"found unsafe PHI in outer loop: "; OuterPHI.dump());
292 if (!isa<PHINode>(&
I) && !
I.isTerminator() &&
294 LLVM_DEBUG(
dbgs() <<
"Cannot flatten because instruction may have "
303 if (IterationInstructions.
count(&
I))
319 RepeatedInstrCost += Cost;
323 LLVM_DEBUG(
dbgs() <<
"Cost of instructions that will be repeated: "
324 << RepeatedInstrCost <<
"\n");
328 LLVM_DEBUG(
dbgs() <<
"checkOuterLoopInsts: not profitable, bailing.\n");
347 (isa<SExtInst>(InnerLimit) || isa<ZExtInst>(InnerLimit)))
348 InnerLimit = cast<Instruction>(InnerLimit)->getOperand(0);
359 if (isa<TruncInst>(U)) {
362 U = *U->user_begin();
365 LLVM_DEBUG(
dbgs() <<
"Found use of inner induction variable: "; U->dump());
368 Value *MatchedItCount;
382 if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) {
384 ValidOuterPHIUses.
insert(MatchedMul);
387 LLVM_DEBUG(
dbgs() <<
"Did not match expected pattern, bailing\n");
398 auto IsValidOuterPHIUses = [&] (
User *U) ->
bool {
399 LLVM_DEBUG(
dbgs() <<
"Found use of outer induction variable: "; U->dump());
400 if (!ValidOuterPHIUses.
count(U)) {
401 LLVM_DEBUG(
dbgs() <<
"Did not match expected pattern, bailing\n");
408 if (
auto *V = dyn_cast<TruncInst>(U)) {
409 for (
auto *K : V->users()) {
410 if (!IsValidOuterPHIUses(K))
416 if (!IsValidOuterPHIUses(U))
422 <<
" value(s) that can be replaced:\n";
451 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(U)) {
456 if (
GEP->isInBounds() &&
458 DL.getPointerTypeSizeInBits(
GEP->getType())) {
460 dbgs() <<
"use of linear IV would be UB if overflow occurred: ";
519 LLVM_DEBUG(
dbgs() <<
"Checks all passed, doing the transformation\n");
525 Remark <<
"Flattened into outer loop";
529 Value *NewTripCount =
533 NewTripCount->
dump());
565 dbgs() <<
"with: "; OuterValue->
dump());
587 auto &
DL =
M->getDataLayout();
590 unsigned MaxLegalSize =
DL.getLargestLegalIntTypeSizeInBits();
591 auto *MaxLegalType =
DL.getLargestLegalIntType(
M->getContext());
596 if (InnerType != OuterType ||
597 InnerType->getScalarSizeInBits() >= MaxLegalSize ||
598 MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) {
608 unsigned ElimExt = 0;
609 unsigned Widened = 0;
611 for (
const auto &WideIV : WideIVs) {
613 ElimExt, Widened,
true ,
618 LLVM_DEBUG(
dbgs() <<
"Deleting old phi: "; WideIV.NarrowIV->dump());
622 assert(Widened &&
"Widened IV expected");
631 dbgs() <<
"Loop flattening running on outer loop "
650 LLVM_DEBUG(
dbgs() <<
"Multiply would always overflow, so not profitable\n");
653 LLVM_DEBUG(
dbgs() <<
"Multiply might overflow, not flattening\n");
657 LLVM_DEBUG(
dbgs() <<
"Multiply cannot overflow, modifying loop in-place\n");
663 bool Changed =
false;
665 auto *OuterLoop = InnerLoop->getParentLoop();
720 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
721 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
722 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
724 auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
725 auto *
TTI = &TTIP.getTTI(
F);
726 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
static bool checkOuterLoopInsts(FlattenInfo &FI, SmallPtrSetImpl< Instruction * > &IterationInstructions, const TargetTransformInfo *TTI)
INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", false, false) INITIALIZE_PASS_END(LoopFlattenLegacyPass
A set of analyses that are preserved following a run of a transformation pass.
Analysis pass providing the TargetTransformInfo.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Analysis pass that exposes the ScalarEvolution for a function.
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
@ NeverOverflows
Never overflows.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
A parsed version of the target data layout string in and methods for querying it.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo=true)
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static cl::opt< bool > AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, cl::init(false), cl::desc("Assume that the product of the two iteration " "limits will never overflow"))
Represents a single loop in the control flow graph.
void dump() const
Support for debugging, callable in GDB: V->dump()
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
This class uses information about analyze scalars to rewrite expressions in canonical form.
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
The main scalar evolution driver.
PHINode * InnerInductionPHI
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
BinaryOperator * OuterIncrement
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
PHINode * OuterInductionPHI
SmallPtrSet< Value *, 4 > LinearIVUses
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
A struct for saving information about induction variables.
SmallVector< LoopT *, 4 > getLoopsInPreorder()
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Virtual Register Rewriter
bool match(Val *V, const Pattern &P)
@ MayOverflow
May or may not overflow.
Represent the analysis usage information of a pass.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
StringRef getName() const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
unsigned getIntegerBitWidth() const
Value * getCondition() const
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Value * hasConstantValue() const
If the specified PHI node always merges together the same value, return the value,...
This instruction compares its operands according to the predicate given to the constructor.
static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Module * getParent()
Get the module that this global value is contained inside of...
static cl::opt< unsigned > RepeatedInstructionThreshold("loop-flatten-cost-threshold", cl::Hidden, cl::init(2), cl::desc("Limit on the cost of instructions that can be repeated due to " "loop flattening"))
A function analysis which provides an AssumptionCache.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
initializer< Ty > init(const Ty &Val)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
static bool checkIVUsers(FlattenInfo &FI)
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BinaryOperator * InnerIncrement
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
A Module instance is used to store all the information related to an LLVM module.
FlattenInfo(Loop *OL, Loop *IL)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool isUnconditional() const
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT, AssumptionCache *AC)
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
@ ICMP_ULT
unsigned less than
Type * getType() const
All values are typed, get the type of this value.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool findLoopComponents(Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&Limit, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE)
StringRef getName() const
Return a constant reference to the value's name.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet< PHINode *, 4 > InnerPHIsToTransform
void initializeLoopFlattenLegacyPassPass(PassRegistry &)
BlockT * getHeader() const
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Analysis pass which computes a DominatorTree.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
const BasicBlock * getParent() const
FunctionPass * createLoopFlattenPass()
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
AnalysisUsage & addRequired()
bool Flatten(DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, TargetTransformInfo *TTI)
Conditional or Unconditional Branch instruction.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
LLVM Value Representation.
iterator_range< user_iterator > users()
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Analysis pass that exposes the LoopInfo for a function.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.