LLVM 22.0.0git
LoopCacheAnalysis.cpp
Go to the documentation of this file.
1//===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2//
3// The LLVM Compiler Infrastructure
4//
5// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6// See https://llvm.org/LICENSE.txt for license information.
7// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8//
9//===----------------------------------------------------------------------===//
10///
11/// \file
12/// This file defines the implementation for the loop cache analysis.
13/// The implementation is largely based on the following paper:
14///
15/// Compiler Optimizations for Improving Data Locality
16/// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17/// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18///
19/// The general approach taken to estimate the number of cache lines used by the
20/// memory references in an inner loop is:
21/// 1. Partition memory references that exhibit temporal or spacial reuse
22/// into reference groups.
23/// 2. For each loop L in the a loop nest LN:
24/// a. Compute the cost of the reference group
25/// b. Compute the loop cost by summing up the reference groups costs
26//===----------------------------------------------------------------------===//
27
30#include "llvm/ADT/Sequence.h"
39#include "llvm/Support/Debug.h"
40
41using namespace llvm;
42
43#define DEBUG_TYPE "loop-cache-cost"
44
46 "default-trip-count", cl::init(100), cl::Hidden,
47 cl::desc("Use this to specify the default trip count of a loop"));
48
49// In this analysis two array references are considered to exhibit temporal
50// reuse if they access either the same memory location, or a memory location
51// with distance smaller than a configurable threshold.
53 "temporal-reuse-threshold", cl::init(2), cl::Hidden,
54 cl::desc("Use this to specify the max. distance between array elements "
55 "accessed in a loop so that the elements are classified to have "
56 "temporal reuse"));
57
58/// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
59/// nullptr if any loops in the loop vector supplied has more than one sibling.
60/// The loop vector is expected to contain loops collected in breadth-first
61/// order.
63 assert(!Loops.empty() && "Expecting a non-empy loop vector");
64
65 Loop *LastLoop = Loops.back();
66 Loop *ParentLoop = LastLoop->getParentLoop();
67
68 if (ParentLoop == nullptr) {
69 assert(Loops.size() == 1 && "Expecting a single loop");
70 return LastLoop;
71 }
72
73 return (llvm::is_sorted(Loops,
74 [](const Loop *L1, const Loop *L2) {
75 return L1->getLoopDepth() < L2->getLoopDepth();
76 }))
77 ? LastLoop
78 : nullptr;
79}
80
81static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
82 const Loop &L, ScalarEvolution &SE) {
83 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
84 if (!AR || !AR->isAffine())
85 return false;
86
87 assert(AR->getLoop() && "AR should have a loop");
88
89 // Check that start and increment are not add recurrences.
90 const SCEV *Start = AR->getStart();
91 const SCEV *Step = AR->getStepRecurrence(SE);
92 if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
93 return false;
94
95 // Check that start and increment are both invariant in the loop.
96 if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
97 return false;
98
99 const SCEV *StepRec = AR->getStepRecurrence(SE);
100 if (StepRec && SE.isKnownNegative(StepRec))
101 StepRec = SE.getNegativeSCEV(StepRec);
102
103 return StepRec == &ElemSize;
104}
105
106/// Compute the trip count for the given loop \p L or assume a default value if
107/// it is not a compile time constant. Return the SCEV expression for the trip
108/// count.
109static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize,
110 ScalarEvolution &SE) {
111 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
112 const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
113 isa<SCEVConstant>(BackedgeTakenCount))
114 ? SE.getTripCountFromExitCount(BackedgeTakenCount)
115 : nullptr;
116
117 if (!TripCount) {
118 LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
119 << " could not be computed, using DefaultTripCount\n");
120 TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount);
121 }
122
123 return TripCount;
124}
125
126//===----------------------------------------------------------------------===//
127// IndexedReference implementation
128//
130 if (!R.IsValid) {
131 OS << R.StoreOrLoadInst;
132 OS << ", IsValid=false.";
133 return OS;
134 }
135
136 OS << *R.BasePointer;
137 for (const SCEV *Subscript : R.Subscripts)
138 OS << "[" << *Subscript << "]";
139
140 OS << ", Sizes: ";
141 for (const SCEV *Size : R.Sizes)
142 OS << "[" << *Size << "]";
143
144 return OS;
145}
146
148 const LoopInfo &LI, ScalarEvolution &SE)
149 : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
150 assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
151 "Expecting a load or store instruction");
152
153 IsValid = delinearize(LI);
154 if (IsValid)
155 LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
156 << "\n");
157}
158
159std::optional<bool>
161 AAResults &AA) const {
162 assert(IsValid && "Expecting a valid reference");
163
164 if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
166 << "No spacial reuse: different base pointers\n");
167 return false;
168 }
169
170 unsigned NumSubscripts = getNumSubscripts();
171 if (NumSubscripts != Other.getNumSubscripts()) {
173 << "No spacial reuse: different number of subscripts\n");
174 return false;
175 }
176
177 // all subscripts must be equal, except the leftmost one (the last one).
178 for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
179 if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
180 LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
181 << "\n\t" << *getSubscript(SubNum) << "\n\t"
182 << *Other.getSubscript(SubNum) << "\n");
183 return false;
184 }
185 }
186
187 // the difference between the last subscripts must be less than the cache line
188 // size.
189 const SCEV *LastSubscript = getLastSubscript();
190 const SCEV *OtherLastSubscript = Other.getLastSubscript();
192 SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
193
194 if (Diff == nullptr) {
196 << "No spacial reuse, difference between subscript:\n\t"
197 << *LastSubscript << "\n\t" << OtherLastSubscript
198 << "\nis not constant.\n");
199 return std::nullopt;
200 }
201
202 bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
203
204 LLVM_DEBUG({
205 if (InSameCacheLine)
206 dbgs().indent(2) << "Found spacial reuse.\n";
207 else
208 dbgs().indent(2) << "No spacial reuse.\n";
209 });
210
211 return InSameCacheLine;
212}
213
214std::optional<bool>
216 unsigned MaxDistance, const Loop &L,
217 DependenceInfo &DI, AAResults &AA) const {
218 assert(IsValid && "Expecting a valid reference");
219
220 if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
222 << "No temporal reuse: different base pointer\n");
223 return false;
224 }
225
226 std::unique_ptr<Dependence> D =
227 DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst);
228
229 if (D == nullptr) {
230 LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
231 return false;
232 }
233
234 if (D->isLoopIndependent()) {
235 LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
236 return true;
237 }
238
239 // Check the dependence distance at every loop level. There is temporal reuse
240 // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
241 // it is zero at every other loop level.
242 int LoopDepth = L.getLoopDepth();
243 int Levels = D->getLevels();
244 for (int Level = 1; Level <= Levels; ++Level) {
245 const SCEV *Distance = D->getDistance(Level);
246 const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
247
248 if (SCEVConst == nullptr) {
249 LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
250 return std::nullopt;
251 }
252
253 const ConstantInt &CI = *SCEVConst->getValue();
254 if (Level != LoopDepth && !CI.isZero()) {
256 << "No temporal reuse: distance is not zero at depth=" << Level
257 << "\n");
258 return false;
259 } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
261 dbgs().indent(2)
262 << "No temporal reuse: distance is greater than MaxDistance at depth="
263 << Level << "\n");
264 return false;
265 }
266 }
267
268 LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
269 return true;
270}
271
273 unsigned CLS) const {
274 assert(IsValid && "Expecting a valid reference");
275 LLVM_DEBUG({
276 dbgs().indent(2) << "Computing cache cost for:\n";
277 dbgs().indent(4) << *this << "\n";
278 });
279
280 // If the indexed reference is loop invariant the cost is one.
281 if (isLoopInvariant(L)) {
282 LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
283 return 1;
284 }
285
286 const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE);
287 assert(TripCount && "Expecting valid TripCount");
288 LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
289
290 const SCEV *RefCost = nullptr;
291 const SCEV *Stride = nullptr;
292 if (isConsecutive(L, Stride, CLS)) {
293 // If the indexed reference is 'consecutive' the cost is
294 // (TripCount*Stride)/CLS.
295 assert(Stride != nullptr &&
296 "Stride should not be null for consecutive access!");
297 Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
298 const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
299 Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
300 TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType);
301 const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
302 // Round the fractional cost up to the nearest integer number.
303 // The impact is the most significant when cost is calculated
304 // to be a number less than one, because it makes more sense
305 // to say one cache line is used rather than zero cache line
306 // is used.
307 RefCost = SE.getUDivCeilSCEV(Numerator, CacheLineSize);
308
310 << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
311 << *RefCost << "\n");
312 } else {
313 // If the indexed reference is not 'consecutive' the cost is proportional to
314 // the trip count and the depth of the dimension which the subject loop
315 // subscript is accessing. We try to estimate this by multiplying the cost
316 // by the trip counts of loops corresponding to the inner dimensions. For
317 // example, given the indexed reference 'A[i][j][k]', and assuming the
318 // i-loop is in the innermost position, the cost would be equal to the
319 // iterations of the i-loop multiplied by iterations of the j-loop.
320 RefCost = TripCount;
321
322 int Index = getSubscriptIndex(L);
323 assert(Index >= 0 && "Could not locate a valid Index");
324
325 for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) {
327 assert(AR && AR->getLoop() && "Expecting valid loop");
328 const SCEV *TripCount =
329 computeTripCount(*AR->getLoop(), *Sizes.back(), SE);
330 Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType());
331 // For the multiplication result to fit, request a type twice as wide.
332 WiderType = WiderType->getExtendedType();
333 RefCost = SE.getMulExpr(SE.getNoopOrZeroExtend(RefCost, WiderType),
334 SE.getNoopOrZeroExtend(TripCount, WiderType));
335 }
336
338 << "Access is not consecutive: RefCost=" << *RefCost << "\n");
339 }
340 assert(RefCost && "Expecting a valid RefCost");
341
342 // Attempt to fold RefCost into a constant.
343 // CacheCostTy is a signed integer, but the tripcount value can be large
344 // and may not fit, so saturate/limit the value to the maximum signed
345 // integer value.
346 if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
347 return ConstantCost->getValue()->getLimitedValue(
348 std::numeric_limits<int64_t>::max());
349
351 << "RefCost is not a constant! Setting to RefCost=InvalidCost "
352 "(invalid value).\n");
353
355}
356
357bool IndexedReference::tryDelinearizeFixedSize(
358 const SCEV *AccessFn, SmallVectorImpl<const SCEV *> &Subscripts) {
359 SmallVector<int, 4> ArraySizes;
360 if (!tryDelinearizeFixedSizeImpl(&SE, &StoreOrLoadInst, AccessFn, Subscripts,
361 ArraySizes))
362 return false;
363
364 // Populate Sizes with scev expressions to be used in calculations later.
365 for (auto Idx : seq<unsigned>(1, Subscripts.size()))
366 Sizes.push_back(
367 SE.getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1]));
368
369 LLVM_DEBUG({
370 dbgs() << "Delinearized subscripts of fixed-size array\n"
371 << "GEP:" << *getLoadStorePointerOperand(&StoreOrLoadInst)
372 << "\n";
373 });
374 return true;
375}
376
377bool IndexedReference::delinearize(const LoopInfo &LI) {
378 assert(Subscripts.empty() && "Subscripts should be empty");
379 assert(Sizes.empty() && "Sizes should be empty");
380 assert(!IsValid && "Should be called once from the constructor");
381 LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
382
383 const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
384 const BasicBlock *BB = StoreOrLoadInst.getParent();
385
386 if (Loop *L = LI.getLoopFor(BB)) {
387 const SCEV *AccessFn =
388 SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
389
390 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
391 if (BasePointer == nullptr) {
393 dbgs().indent(2)
394 << "ERROR: failed to delinearize, can't identify base pointer\n");
395 return false;
396 }
397
398 bool IsFixedSize = false;
399 // Try to delinearize fixed-size arrays.
400 if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
401 IsFixedSize = true;
402 // The last element of Sizes is the element size.
403 Sizes.push_back(ElemSize);
404 LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
405 << "', AccessFn: " << *AccessFn << "\n");
406 }
407
408 AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
409
410 // Try to delinearize parametric-size arrays.
411 if (!IsFixedSize) {
412 LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
413 << "', AccessFn: " << *AccessFn << "\n");
414 llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
415 SE.getElementSize(&StoreOrLoadInst));
416 }
417
418 if (Subscripts.empty() || Sizes.empty() ||
419 Subscripts.size() != Sizes.size()) {
420 // Attempt to determine whether we have a single dimensional array access.
421 // before giving up.
422 if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
423 LLVM_DEBUG(dbgs().indent(2)
424 << "ERROR: failed to delinearize reference\n");
425 Subscripts.clear();
426 Sizes.clear();
427 return false;
428 }
429
430 // The array may be accessed in reverse, for example:
431 // for (i = N; i > 0; i--)
432 // A[i] = 0;
433 // In this case, reconstruct the access function using the absolute value
434 // of the step recurrence.
435 const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
436 const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr;
437
438 if (StepRec && SE.isKnownNegative(StepRec))
439 AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(),
440 SE.getNegativeSCEV(StepRec),
441 AccessFnAR->getLoop(),
442 AccessFnAR->getNoWrapFlags());
443 const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
444 Subscripts.push_back(Div);
445 Sizes.push_back(ElemSize);
446 }
447
448 return all_of(Subscripts, [&](const SCEV *Subscript) {
449 return isSimpleAddRecurrence(*Subscript, *L);
450 });
451 }
452
453 return false;
454}
455
456bool IndexedReference::isLoopInvariant(const Loop &L) const {
457 Value *Addr = getPointerOperand(&StoreOrLoadInst);
458 assert(Addr != nullptr && "Expecting either a load or a store instruction");
459 assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
460
461 if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
462 return true;
463
464 // The indexed reference is loop invariant if none of the coefficients use
465 // the loop induction variable.
466 bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
467 return isCoeffForLoopZeroOrInvariant(*Subscript, L);
468 });
469
470 return allCoeffForLoopAreZero;
471}
472
473bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride,
474 unsigned CLS) const {
475 // The indexed reference is 'consecutive' if the only coefficient that uses
476 // the loop induction variable is the last one...
477 const SCEV *LastSubscript = Subscripts.back();
478 for (const SCEV *Subscript : Subscripts) {
479 if (Subscript == LastSubscript)
480 continue;
481 if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
482 return false;
483 }
484
485 // ...and the access stride is less than the cache line size.
486 const SCEV *Coeff = getLastCoefficient();
487 const SCEV *ElemSize = Sizes.back();
488 Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType());
489 // FIXME: This assumes that all values are signed integers which may
490 // be incorrect in unusual codes and incorrectly use sext instead of zext.
491 // for (uint32_t i = 0; i < 512; ++i) {
492 // uint8_t trunc = i;
493 // A[trunc] = 42;
494 // }
495 // This consecutively iterates twice over A. If `trunc` is sign-extended,
496 // we would conclude that this may iterate backwards over the array.
497 // However, LoopCacheAnalysis is heuristic anyway and transformations must
498 // not result in wrong optimizations if the heuristic was incorrect.
499 Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
500 SE.getNoopOrSignExtend(ElemSize, WiderType));
501 const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
502
503 Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
504 return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
505}
506
507int IndexedReference::getSubscriptIndex(const Loop &L) const {
508 for (auto Idx : seq<int>(0, getNumSubscripts())) {
509 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(Idx));
510 if (AR && AR->getLoop() == &L) {
511 return Idx;
512 }
513 }
514 return -1;
515}
516
517const SCEV *IndexedReference::getLastCoefficient() const {
518 const SCEV *LastSubscript = getLastSubscript();
519 auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
520 return AR->getStepRecurrence(SE);
521}
522
523bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
524 const Loop &L) const {
525 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
526 return (AR != nullptr) ? AR->getLoop() != &L
527 : SE.isLoopInvariant(&Subscript, &L);
528}
529
530bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
531 const Loop &L) const {
532 if (!isa<SCEVAddRecExpr>(Subscript))
533 return false;
534
535 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
536 assert(AR->getLoop() && "AR should have a loop");
537
538 if (!AR->isAffine())
539 return false;
540
541 const SCEV *Start = AR->getStart();
542 const SCEV *Step = AR->getStepRecurrence(SE);
543
544 if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
545 return false;
546
547 return true;
548}
549
550bool IndexedReference::isAliased(const IndexedReference &Other,
551 AAResults &AA) const {
552 const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
553 const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
554 return AA.isMustAlias(Loc1, Loc2);
555}
556
557//===----------------------------------------------------------------------===//
558// CacheCost implementation
559//
561 for (const auto &LC : CC.LoopCosts) {
562 const Loop *L = LC.first;
563 OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
564 }
565 return OS;
566}
567
570 AAResults &AA, DependenceInfo &DI,
571 std::optional<unsigned> TRT)
572 : Loops(Loops), TRT(TRT.value_or(TemporalReuseThreshold)), LI(LI), SE(SE),
573 TTI(TTI), AA(AA), DI(DI) {
574 assert(!Loops.empty() && "Expecting a non-empty loop vector.");
575
576 for (const Loop *L : Loops) {
577 unsigned TripCount = SE.getSmallConstantTripCount(L);
578 TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
579 TripCounts.push_back({L, TripCount});
580 }
581
582 calculateCacheFootprint();
583}
584
585std::unique_ptr<CacheCost>
587 DependenceInfo &DI, std::optional<unsigned> TRT) {
588 if (!Root.isOutermost()) {
589 LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
590 return nullptr;
591 }
592
593 LoopVectorTy Loops;
594 append_range(Loops, breadth_first(&Root));
595
596 if (!getInnerMostLoop(Loops)) {
597 LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
598 "than one innermost loop\n");
599 return nullptr;
600 }
601
602 return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
603}
604
605void CacheCost::calculateCacheFootprint() {
606 LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
607 ReferenceGroupsTy RefGroups;
608 if (!populateReferenceGroups(RefGroups))
609 return;
610
611 LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
612 for (const Loop *L : Loops) {
614 LoopCosts,
615 [L](const LoopCacheCostTy &LCC) { return LCC.first == L; }) &&
616 "Should not add duplicate element");
617 CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
618 LoopCosts.push_back(std::make_pair(L, LoopCost));
619 }
620
621 sortLoopCosts();
622 RefGroups.clear();
623}
624
625bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
626 assert(RefGroups.empty() && "Reference groups should be empty");
627
628 unsigned CLS = TTI.getCacheLineSize();
629 Loop *InnerMostLoop = getInnerMostLoop(Loops);
630 assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
631
632 for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
633 for (Instruction &I : *BB) {
634 if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
635 continue;
636
637 std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
638 if (!R->isValid())
639 continue;
640
641 bool Added = false;
642 for (ReferenceGroupTy &RefGroup : RefGroups) {
643 const IndexedReference &Representative = *RefGroup.front();
644 LLVM_DEBUG({
645 dbgs() << "References:\n";
646 dbgs().indent(2) << *R << "\n";
647 dbgs().indent(2) << Representative << "\n";
648 });
649
650
651 // FIXME: Both positive and negative access functions will be placed
652 // into the same reference group, resulting in a bi-directional array
653 // access such as:
654 // for (i = N; i > 0; i--)
655 // A[i] = A[N - i];
656 // having the same cost calculation as a single dimention access pattern
657 // for (i = 0; i < N; i++)
658 // A[i] = A[i];
659 // when in actuality, depending on the array size, the first example
660 // should have a cost closer to 2x the second due to the two cache
661 // access per iteration from opposite ends of the array
662 std::optional<bool> HasTemporalReuse =
663 R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
664 std::optional<bool> HasSpacialReuse =
665 R->hasSpacialReuse(Representative, CLS, AA);
666
667 if ((HasTemporalReuse && *HasTemporalReuse) ||
668 (HasSpacialReuse && *HasSpacialReuse)) {
669 RefGroup.push_back(std::move(R));
670 Added = true;
671 break;
672 }
673 }
674
675 if (!Added) {
677 RG.push_back(std::move(R));
678 RefGroups.push_back(std::move(RG));
679 }
680 }
681 }
682
683 if (RefGroups.empty())
684 return false;
685
686 LLVM_DEBUG({
687 dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
688 int n = 1;
689 for (const ReferenceGroupTy &RG : RefGroups) {
690 dbgs().indent(2) << "RefGroup " << n << ":\n";
691 for (const auto &IR : RG)
692 dbgs().indent(4) << *IR << "\n";
693 n++;
694 }
695 dbgs() << "\n";
696 });
697
698 return true;
699}
700
702CacheCost::computeLoopCacheCost(const Loop &L,
703 const ReferenceGroupsTy &RefGroups) const {
704 if (!L.isLoopSimplifyForm())
706
707 LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
708 << "' as innermost loop.\n");
709
710 // Compute the product of the trip counts of each other loop in the nest.
711 CacheCostTy TripCountsProduct = 1;
712 for (const auto &TC : TripCounts) {
713 if (TC.first == &L)
714 continue;
715 TripCountsProduct *= TC.second;
716 }
717
718 CacheCostTy LoopCost = 0;
719 for (const ReferenceGroupTy &RG : RefGroups) {
720 CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
721 LoopCost += RefGroupCost * TripCountsProduct;
722 }
723
724 LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
725 << "' has cost=" << LoopCost << "\n");
726
727 return LoopCost;
728}
729
730CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
731 const Loop &L) const {
732 assert(!RG.empty() && "Reference group should have at least one member.");
733
734 const IndexedReference *Representative = RG.front().get();
735 return Representative->computeRefCost(L, TTI.getCacheLineSize());
736}
737
738//===----------------------------------------------------------------------===//
739// LoopCachePrinterPass implementation
740//
743 LPMUpdater &U) {
744 Function *F = L.getHeader()->getParent();
745 DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
746
747 if (auto CC = CacheCost::getCacheCost(L, AR, DI))
748 OS << *CC;
749
750 return PreservedAnalyses::all();
751}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file builds on the ADT/GraphTraits.h file to build a generic breadth first graph iterator.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Hexagon Hardware Loops
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:80
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
static const SCEV * computeTripCount(const Loop &L, const SCEV &ElemSize, ScalarEvolution &SE)
Compute the trip count for the given loop L or assume a default value if it is not a compile time con...
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
This file defines the interface for the loop cache analysis.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
This pass exposes codegen information to IR-level passes.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
Represents a memory reference as a base pointer and a set of indexing operations.
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
static InstructionCost getInvalid(CostType Val=0)
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getLoopDepth() const
Return the nesting level of this loop.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
ConstantInt * getValue() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI Type * getExtendedType() const
Given scalar/vector integer type, returns a type with elements twice as wide as in the original type.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
InstructionCost CacheCostTy
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
SmallVector< std::unique_ptr< IndexedReference >, 8 > ReferenceGroupTy
A reference group represents a set of memory references that exhibit temporal or spacial reuse.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
SmallVector< Loop *, 8 > LoopVectorTy
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1900
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
iterator_range< bf_iterator< T > > breadth_first(const T &G)
@ Other
Any other memory.
Definition ModRef.h:68
bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Implementation of fixed size array delinearization.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
SmallVector< ReferenceGroupTy, 8 > ReferenceGroupsTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...