Go to the documentation of this file.
60 #define DEBUG_TYPE "loop-unroll-and-jam"
65 "llvm.loop.unroll_and_jam.followup_all";
67 "llvm.loop.unroll_and_jam.followup_inner";
69 "llvm.loop.unroll_and_jam.followup_outer";
71 "llvm.loop.unroll_and_jam.followup_remainder_inner";
73 "llvm.loop.unroll_and_jam.followup_remainder_outer";
78 cl::desc(
"Allows loops to be unroll-and-jammed."));
82 cl::desc(
"Use this unroll count for all loops including those with "
83 "unroll_and_jam_count pragma values, for testing purposes"));
87 cl::desc(
"Threshold to use for inner loop when doing unroll and jam."));
91 cl::desc(
"Unrolled size limit for loops with an unroll_and_jam(full) or "
92 "unroll_count pragma."));
108 assert(LoopID->getNumOperands() > 0 &&
"requires at least one operand");
109 assert(LoopID->getOperand(0) == LoopID &&
"invalid loop id");
111 for (
unsigned I = 1,
E = LoopID->getNumOperands();
I <
E; ++
I) {
112 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(
I));
120 if (
S->getString().startswith(
Prefix))
138 "Unroll count hint metadata should have two operands.");
140 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
141 assert(Count >= 1 &&
"Unroll count must be positive.");
151 assert(LoopSize >= UP.
BEInsns &&
"LoopSize should not be less than BEInsns!");
162 unsigned OuterTripMultiple,
unsigned OuterLoopSize,
unsigned InnerTripCount,
170 unsigned MaxTripCount = 0;
171 bool UseUpperBound =
false;
173 L,
TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
174 false, OuterTripMultiple, OuterLoopSize, UP, PP,
176 if (ExplicitUnroll || UseUpperBound) {
179 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; explicit count set by "
180 "computeUnrollCount\n");
187 if (UserUnrollCount) {
199 if (PragmaCount > 0) {
200 UP.
Count = PragmaCount;
203 if ((UP.
AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
211 bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
212 bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
216 if (ExplicitUnrollAndJam)
221 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; can't create remainder and "
222 "inner loop too large\n");
238 if (ExplicitUnrollAndJam)
243 if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.
Threshold) {
244 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; small inner loop count is "
245 "being left for the unroller\n");
254 dbgs() <<
"Won't unroll-and-jam; More than one inner loop block\n");
262 unsigned NumInvariant = 0;
265 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
266 Value *V = Ld->getPointerOperand();
273 if (NumInvariant == 0) {
274 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; No loop invariant loads\n");
327 unsigned NumInlineCandidates;
328 bool NotDuplicatable;
339 LLVM_DEBUG(
dbgs() <<
" Outer Loop Size: " << OuterLoopSizeIC <<
"\n");
340 LLVM_DEBUG(
dbgs() <<
" Inner Loop Size: " << InnerLoopSizeIC <<
"\n");
343 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop which contains instructions"
344 <<
" with invalid cost.\n");
347 unsigned InnerLoopSize = *InnerLoopSizeIC.
getValue();
348 unsigned OuterLoopSize = *OuterLoopSizeIC.
getValue();
350 if (NotDuplicatable) {
351 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop which contains non-duplicatable "
355 if (NumInlineCandidates != 0) {
356 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop with inlinable calls.\n");
361 dbgs() <<
" Not unrolling loop with convergent instructions.\n");
375 if (NewInnerEpilogueLoopID)
387 L, SubLoop,
TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
388 OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
392 if (OuterTripCount && UP.
Count > OuterTripCount)
393 UP.
Count = OuterTripCount;
395 Loop *EpilogueOuterLoop =
nullptr;
398 &SE, &DT, &AC, &
TTI, &ORE, &EpilogueOuterLoop);
401 if (EpilogueOuterLoop) {
405 if (NewOuterEpilogueLoopID)
421 if (NewOuterLoopID) {
443 bool DidSomething =
false;
451 while (!Worklist.
empty()) {
453 std::string LoopName = std::string(L->
getName());
467 class LoopUnrollAndJam :
public LoopPass {
472 LoopUnrollAndJam(
int OptLevel = 2) :
LoopPass(
ID), OptLevel(OptLevel) {
481 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
482 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
483 auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
484 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
485 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*
F);
486 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
487 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*
F);
517 "Unroll and Jam loops",
false,
false)
532 return new LoopUnrollAndJam(OptLevel);
A set of analyses that are preserved following a run of a transformation pass.
This is an optimization pass for GlobalISel generic memory operations.
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const Function * getParent() const
Return the enclosing method, or null if none.
static bool hasUnrollAndJamEnablePragma(const Loop *L)
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
Legacy pass manager pass to access dependence information.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Represents a single loop in the control flow graph.
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
static const char *const LLVMLoopUnrollAndJamFollowupOuter
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
The main scalar evolution driver.
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
The legacy pass manager's analysis pass to compute loop information.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
LLVM Basic Block Representation.
unsigned getNumOperands() const
Return number of MDNode operands.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static const char *const LLVMLoopUnrollAndJamFollowupInner
Represent the analysis usage information of a pass.
LLVM_NODISCARD T pop_back_val()
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
Legacy analysis pass which computes a DominatorTree.
int getNumOccurrences() const
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
StringRef getName() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This analysis provides information for a loop nest.
loop unroll and Unroll and Jam loops
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
const MDOperand & getOperand(unsigned I) const
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
This class represents an analyzed expression in the program.
DependenceInfo - This class is the main dependence-analysis driver.
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
initializer< Ty > init(const Ty &Val)
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
bool empty() const
Determine if the PriorityWorklist is empty or not.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
@ TM_Disable
The transformation should not be applied.
void initializeLoopUnrollAndJamPass(PassRegistry &)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
constexpr const T & getValue() const &
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
An immutable pass that tracks lazily created AssumptionCache objects.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
StringRef - Represent a constant reference to a string, i.e.
A cache of @llvm.assume calls within a function.
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Function * getParent() const
Return the function to which the loop-nest belongs.
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix)
StringRef getName() const
Return a constant reference to the value's name.
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
void markLoopAsDeleted(Loop &L)
TargetTransformInfo & TTI
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
static const char *const LLVMLoopUnrollAndJamFollowupAll
BlockT * getHeader() const
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Pass interface - Implemented by all 'passes'.
static unsigned unrollAndJamCountPragmaValue(const Loop *L)
A container for analyses that lazily runs them and caches their results.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
@ Unmodified
The loop was not modified.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
This class represents a loop nest and can be used to query its properties.
LLVM Value Representation.
TransformationMode
The mode sets how eager a transformation should be applied.
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_END(LoopUnrollAndJam
Pass * createLoopUnrollAndJamPass(int OptLevel=2)