54#define DEBUG_TYPE "loop-unroll-and-jam"
59 "llvm.loop.unroll_and_jam.followup_all";
61 "llvm.loop.unroll_and_jam.followup_inner";
63 "llvm.loop.unroll_and_jam.followup_outer";
65 "llvm.loop.unroll_and_jam.followup_remainder_inner";
67 "llvm.loop.unroll_and_jam.followup_remainder_outer";
72 cl::desc(
"Allows loops to be unroll-and-jammed."));
76 cl::desc(
"Use this unroll count for all loops including those with "
77 "unroll_and_jam_count pragma values, for testing purposes"));
81 cl::desc(
"Threshold to use for inner loop when doing unroll and jam."));
85 cl::desc(
"Unrolled size limit for loops with an unroll_and_jam(full) or "
86 "unroll_count pragma."));
92 if (
MDNode *LoopID = L->getLoopID())
100 if (
MDNode *LoopID = L->getLoopID()) {
102 assert(LoopID->getNumOperands() > 0 &&
"requires at least one operand");
103 assert(LoopID->getOperand(0) == LoopID &&
"invalid loop id");
105 for (
unsigned I = 1,
E = LoopID->getNumOperands();
I <
E; ++
I) {
106 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(
I));
132 "Unroll count hint metadata should have two operands.");
134 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
135 assert(Count >= 1 &&
"Unroll count must be positive.");
145 assert(LoopSize >= UP.
BEInsns &&
"LoopSize should not be less than BEInsns!");
156 unsigned OuterTripMultiple,
unsigned OuterLoopSize,
unsigned InnerTripCount,
164 unsigned MaxTripCount = 0;
165 bool UseUpperBound =
false;
167 L,
TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
168 false, OuterTripMultiple, OuterLoopSize, UP, PP,
170 if (ExplicitUnroll || UseUpperBound) {
173 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; explicit count set by "
174 "computeUnrollCount\n");
181 if (UserUnrollCount) {
193 if (PragmaCount > 0) {
194 UP.
Count = PragmaCount;
197 if ((UP.
AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
205 bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
206 bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
210 if (ExplicitUnrollAndJam)
215 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; can't create remainder and "
216 "inner loop too large\n");
232 if (ExplicitUnrollAndJam)
237 if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.
Threshold) {
238 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; small inner loop count is "
239 "being left for the unroller\n");
248 dbgs() <<
"Won't unroll-and-jam; More than one inner loop block\n");
256 unsigned NumInvariant = 0;
259 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
260 Value *V = Ld->getPointerOperand();
267 if (NumInvariant == 0) {
268 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; No loop invariant loads\n");
282 L, SE,
TTI,
nullptr,
nullptr, ORE, OptLevel, std::nullopt, std::nullopt,
283 std::nullopt, std::nullopt, std::nullopt, std::nullopt);
302 << L->getHeader()->getParent()->getName() <<
"] Loop %"
303 << L->getHeader()->getName() <<
"\n");
321 unsigned NumInlineCandidates;
322 bool NotDuplicatable;
326 Loop *SubLoop = L->getSubLoops()[0];
333 LLVM_DEBUG(
dbgs() <<
" Outer Loop Size: " << OuterLoopSizeIC <<
"\n");
334 LLVM_DEBUG(
dbgs() <<
" Inner Loop Size: " << InnerLoopSizeIC <<
"\n");
337 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop which contains instructions"
338 <<
" with invalid cost.\n");
341 unsigned InnerLoopSize = *InnerLoopSizeIC.
getValue();
342 unsigned OuterLoopSize = *OuterLoopSizeIC.
getValue();
344 if (NotDuplicatable) {
345 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop which contains non-duplicatable "
349 if (NumInlineCandidates != 0) {
350 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop with inlinable calls.\n");
355 dbgs() <<
" Not unrolling loop with convergent instructions.\n");
360 MDNode *OrigOuterLoopID = L->getLoopID();
369 if (NewInnerEpilogueLoopID)
370 SubLoop->
setLoopID(*NewInnerEpilogueLoopID);
381 L, SubLoop,
TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,
382 OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
386 if (OuterTripCount && UP.
Count > OuterTripCount)
387 UP.
Count = OuterTripCount;
389 Loop *EpilogueOuterLoop =
nullptr;
392 &SE, &DT, &AC, &
TTI, &ORE, &EpilogueOuterLoop);
395 if (EpilogueOuterLoop) {
399 if (NewOuterEpilogueLoopID)
400 EpilogueOuterLoop->
setLoopID(*NewOuterEpilogueLoopID);
403 std::optional<MDNode *> NewInnerLoopID =
415 if (NewOuterLoopID) {
416 L->setLoopID(*NewOuterLoopID);
426 L->setLoopAlreadyUnrolled();
437 bool DidSomething =
false;
445 while (!Worklist.
empty()) {
447 std::string LoopName = std::string(L->getName());
453 U.markLoopAsDeleted(*L, LoopName);
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This header provides classes for managing per-loop analyses.
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static const char *const LLVMLoopUnrollAndJamFollowupInner
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
static const char *const LLVMLoopUnrollAndJamFollowupOuter
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix)
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
static unsigned unrollAndJamCountPragmaValue(const Loop *L)
static bool hasUnrollAndJamEnablePragma(const Loop *L)
static const char *const LLVMLoopUnrollAndJamFollowupAll
This header defines various interfaces for pass management in LLVM.
This file provides a priority worklist.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
DependenceInfo - This class is the main dependence-analysis driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
This analysis provides information for a loop nest.
This class represents a loop nest and can be used to query its properties.
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
StringRef getString() const
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool empty() const
Determine if the PriorityWorklist is empty or not.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
StringRef - Represent a constant reference to a string, i.e.
bool startswith(StringRef Prefix) const
LLVM Value Representation.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
TransformationMode
The mode sets how eager a transformation should be applied.
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
@ TM_Disable
The transformation should not be applied.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI