43#define DEBUG_TYPE "loop-cache-cost"
47 cl::desc(
"Use this to specify the default trip count of a loop"));
54 cl::desc(
"Use this to specify the max. distance between array elements "
55 "accessed in a loop so that the elements are classified to have "
63 assert(!
Loops.empty() &&
"Expecting a non-empy loop vector");
68 if (ParentLoop ==
nullptr) {
69 assert(
Loops.size() == 1 &&
"Expecting a single loop");
103 return StepRec == &ElemSize;
119 <<
" could not be computed, using DefaultTripCount\n");
131 OS << R.StoreOrLoadInst;
132 OS <<
", IsValid=false.";
136 OS << *R.BasePointer;
137 for (
const SCEV *Subscript : R.Subscripts)
138 OS <<
"[" << *Subscript <<
"]";
142 OS <<
"[" << *
Size <<
"]";
149 : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
151 "Expecting a load or store instruction");
153 IsValid = delinearize(LI);
162 assert(IsValid &&
"Expecting a valid reference");
164 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other,
AA)) {
166 <<
"No spacial reuse: different base pointers\n");
171 if (NumSubscripts !=
Other.getNumSubscripts()) {
173 <<
"No spacial reuse: different number of subscripts\n");
182 << *
Other.getSubscript(SubNum) <<
"\n");
190 const SCEV *OtherLastSubscript =
Other.getLastSubscript();
192 SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
194 if (Diff ==
nullptr) {
196 <<
"No spacial reuse, difference between subscript:\n\t"
197 << *LastSubscript <<
"\n\t" << OtherLastSubscript
198 <<
"\nis not constant.\n");
202 bool InSameCacheLine = (Diff->
getValue()->getSExtValue() < CLS);
211 return InSameCacheLine;
216 unsigned MaxDistance,
const Loop &L,
218 assert(IsValid &&
"Expecting a valid reference");
220 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other,
AA)) {
222 <<
"No temporal reuse: different base pointer\n");
226 std::unique_ptr<Dependence>
D =
234 if (
D->isLoopIndependent()) {
242 int LoopDepth = L.getLoopDepth();
243 int Levels =
D->getLevels();
244 for (
int Level = 1; Level <= Levels; ++Level) {
245 const SCEV *Distance =
D->getDistance(Level);
248 if (SCEVConst ==
nullptr) {
254 if (Level != LoopDepth && !CI.
isZero()) {
256 <<
"No temporal reuse: distance is not zero at depth=" << Level
259 }
else if (Level == LoopDepth && CI.
getSExtValue() > MaxDistance) {
262 <<
"No temporal reuse: distance is greater than MaxDistance at depth="
273 unsigned CLS)
const {
274 assert(IsValid &&
"Expecting a valid reference");
276 dbgs().
indent(2) <<
"Computing cache cost for:\n";
281 if (isLoopInvariant(L)) {
287 assert(TripCount &&
"Expecting valid TripCount");
290 const SCEV *RefCost =
nullptr;
291 const SCEV *Stride =
nullptr;
292 if (isConsecutive(L, Stride, CLS)) {
295 assert(Stride !=
nullptr &&
296 "Stride should not be null for consecutive access!");
299 Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
300 TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType);
301 const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
310 <<
"Access is consecutive: RefCost=(TripCount*Stride)/CLS="
311 << *RefCost <<
"\n");
322 int Index = getSubscriptIndex(L);
323 assert(Index >= 0 &&
"Could not locate a valid Index");
328 const SCEV *TripCount =
333 RefCost = SE.getMulExpr(SE.getNoopOrZeroExtend(RefCost, WiderType),
334 SE.getNoopOrZeroExtend(TripCount, WiderType));
338 <<
"Access is not consecutive: RefCost=" << *RefCost <<
"\n");
340 assert(RefCost &&
"Expecting a valid RefCost");
347 return ConstantCost->getValue()->getLimitedValue(
348 std::numeric_limits<int64_t>::max());
351 <<
"RefCost is not a constant! Setting to RefCost=InvalidCost "
352 "(invalid value).\n");
357bool IndexedReference::tryDelinearizeFixedSize(
367 SE.
getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1]));
370 dbgs() <<
"Delinearized subscripts of fixed-size array\n"
377bool IndexedReference::delinearize(
const LoopInfo &LI) {
378 assert(Subscripts.empty() &&
"Subscripts should be empty");
379 assert(Sizes.empty() &&
"Sizes should be empty");
380 assert(!IsValid &&
"Should be called once from the constructor");
381 LLVM_DEBUG(
dbgs() <<
"Delinearizing: " << StoreOrLoadInst <<
"\n");
383 const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
384 const BasicBlock *BB = StoreOrLoadInst.getParent();
387 const SCEV *AccessFn =
391 if (BasePointer ==
nullptr) {
394 <<
"ERROR: failed to delinearize, can't identify base pointer\n");
398 bool IsFixedSize =
false;
400 if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
403 Sizes.push_back(ElemSize);
405 <<
"', AccessFn: " << *AccessFn <<
"\n");
408 AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
413 <<
"', AccessFn: " << *AccessFn <<
"\n");
415 SE.getElementSize(&StoreOrLoadInst));
418 if (Subscripts.empty() || Sizes.empty() ||
419 Subscripts.size() != Sizes.size()) {
424 <<
"ERROR: failed to delinearize reference\n");
438 if (StepRec && SE.isKnownNegative(StepRec))
439 AccessFn = SE.getAddRecExpr(AccessFnAR->
getStart(),
440 SE.getNegativeSCEV(StepRec),
443 const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
444 Subscripts.push_back(Div);
445 Sizes.push_back(ElemSize);
448 return all_of(Subscripts, [&](
const SCEV *Subscript) {
449 return isSimpleAddRecurrence(*Subscript, *L);
456bool IndexedReference::isLoopInvariant(
const Loop &L)
const {
458 assert(Addr !=
nullptr &&
"Expecting either a load or a store instruction");
459 assert(SE.isSCEVable(Addr->
getType()) &&
"Addr should be SCEVable");
461 if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
466 bool allCoeffForLoopAreZero =
all_of(Subscripts, [&](
const SCEV *Subscript) {
467 return isCoeffForLoopZeroOrInvariant(*Subscript, L);
470 return allCoeffForLoopAreZero;
473bool IndexedReference::isConsecutive(
const Loop &L,
const SCEV *&Stride,
474 unsigned CLS)
const {
477 const SCEV *LastSubscript = Subscripts.back();
478 for (
const SCEV *Subscript : Subscripts) {
479 if (Subscript == LastSubscript)
481 if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
486 const SCEV *Coeff = getLastCoefficient();
487 const SCEV *ElemSize = Sizes.back();
499 Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
500 SE.getNoopOrSignExtend(ElemSize, WiderType));
503 Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
507int IndexedReference::getSubscriptIndex(
const Loop &L)
const {
510 if (AR && AR->
getLoop() == &L) {
517const SCEV *IndexedReference::getLastCoefficient()
const {
523bool IndexedReference::isCoeffForLoopZeroOrInvariant(
const SCEV &Subscript,
524 const Loop &L)
const {
526 return (AR !=
nullptr) ? AR->
getLoop() != &
L
527 : SE.isLoopInvariant(&Subscript, &L);
530bool IndexedReference::isSimpleAddRecurrence(
const SCEV &Subscript,
531 const Loop &L)
const {
544 if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
561 for (
const auto &LC : CC.LoopCosts) {
562 const Loop *L = LC.first;
563 OS <<
"Loop '" << L->getName() <<
"' has cost = " << LC.second <<
"\n";
571 std::optional<unsigned> TRT)
573 TTI(TTI), AA(AA), DI(DI) {
574 assert(!Loops.empty() &&
"Expecting a non-empty loop vector.");
576 for (
const Loop *L : Loops) {
577 unsigned TripCount = SE.getSmallConstantTripCount(L);
579 TripCounts.push_back({L, TripCount});
582 calculateCacheFootprint();
585std::unique_ptr<CacheCost>
589 LLVM_DEBUG(
dbgs() <<
"Expecting the outermost loop in a loop nest\n");
597 LLVM_DEBUG(
dbgs() <<
"Cannot compute cache cost of loop nest with more "
598 "than one innermost loop\n");
602 return std::make_unique<CacheCost>(Loops, AR.
LI, AR.
SE, AR.
TTI, AR.
AA, DI, TRT);
605void CacheCost::calculateCacheFootprint() {
608 if (!populateReferenceGroups(RefGroups))
615 [L](
const LoopCacheCostTy &LCC) {
return LCC.first == L; }) &&
616 "Should not add duplicate element");
617 CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
618 LoopCosts.
push_back(std::make_pair(L, LoopCost));
626 assert(RefGroups.
empty() &&
"Reference groups should be empty");
628 unsigned CLS = TTI.getCacheLineSize();
630 assert(InnerMostLoop !=
nullptr &&
"Expecting a valid innermost loop");
632 for (BasicBlock *BB : InnerMostLoop->
getBlocks()) {
633 for (Instruction &
I : *BB) {
637 std::unique_ptr<IndexedReference>
R(
new IndexedReference(
I, LI, SE));
643 const IndexedReference &Representative = *RefGroup.front();
645 dbgs() <<
"References:\n";
662 std::optional<bool> HasTemporalReuse =
663 R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
664 std::optional<bool> HasSpacialReuse =
665 R->hasSpacialReuse(Representative, CLS, AA);
667 if ((HasTemporalReuse && *HasTemporalReuse) ||
668 (HasSpacialReuse && *HasSpacialReuse)) {
669 RefGroup.push_back(std::move(R));
678 RefGroups.push_back(std::move(RG));
683 if (RefGroups.empty())
687 dbgs() <<
"\nIDENTIFIED REFERENCE GROUPS:\n";
690 dbgs().
indent(2) <<
"RefGroup " << n <<
":\n";
691 for (
const auto &
IR : RG)
702CacheCost::computeLoopCacheCost(
const Loop &L,
704 if (!
L.isLoopSimplifyForm())
708 <<
"' as innermost loop.\n");
712 for (
const auto &TC : TripCounts) {
715 TripCountsProduct *= TC.second;
720 CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
721 LoopCost += RefGroupCost * TripCountsProduct;
725 <<
"' has cost=" << LoopCost <<
"\n");
731 const Loop &L)
const {
732 assert(!RG.
empty() &&
"Reference group should have at least one member.");
734 const IndexedReference *Representative = RG.
front().get();
744 Function *
F = L.getHeader()->getParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file builds on the ADT/GraphTraits.h file to build a generic breadth first graph iterator.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Legalize the Machine IR a function s Machine IR
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
static const SCEV * computeTripCount(const Loop &L, const SCEV &ElemSize, ScalarEvolution &SE)
Compute the trip count for the given loop L or assume a default value if it is not a compile time con...
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
This file defines the interface for the loop cache analysis.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallVector class.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
Represents a memory reference as a base pointer and a set of indexing operations.
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
static InstructionCost getInvalid(CostType Val=0)
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getLoopDepth() const
Return the nesting level of this loop.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI Type * getExtendedType() const
Given scalar/vector integer type, returns a type with elements twice as wide as in the original type.
Type * getType() const
All values are typed, get the type of this value.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Abstract Attribute helper functions.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
InstructionCost CacheCostTy
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SmallVector< std::unique_ptr< IndexedReference >, 8 > ReferenceGroupTy
A reference group represents a set of memory references that exhibit temporal or spacial reuse.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
SmallVector< Loop *, 8 > LoopVectorTy
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
iterator_range< bf_iterator< T > > breadth_first(const T &G)
bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Implementation of fixed size array delinearization.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
SmallVector< ReferenceGroupTy, 8 > ReferenceGroupsTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI