LLVM  14.0.0git
LoopCacheAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 ///
11 /// \file
12 /// This file defines the implementation for the loop cache analysis.
13 /// The implementation is largely based on the following paper:
14 ///
15 /// Compiler Optimizations for Improving Data Locality
16 /// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17 /// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18 ///
19 /// The general approach taken to estimate the number of cache lines used by the
20 /// memory references in an inner loop is:
21 /// 1. Partition memory references that exhibit temporal or spacial reuse
22 /// into reference groups.
23 /// 2. For each loop L in the a loop nest LN:
24 /// a. Compute the cost of the reference group
25 /// b. Compute the loop cost by summing up the reference groups costs
26 //===----------------------------------------------------------------------===//
27 
30 #include "llvm/ADT/Sequence.h"
31 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "loop-cache-cost"
44 
46  "default-trip-count", cl::init(100), cl::Hidden,
47  cl::desc("Use this to specify the default trip count of a loop"));
48 
49 // In this analysis two array references are considered to exhibit temporal
50 // reuse if they access either the same memory location, or a memory location
51 // with distance smaller than a configurable threshold.
53  "temporal-reuse-threshold", cl::init(2), cl::Hidden,
54  cl::desc("Use this to specify the max. distance between array elements "
55  "accessed in a loop so that the elements are classified to have "
56  "temporal reuse"));
57 
58 /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
59 /// nullptr if any loops in the loop vector supplied has more than one sibling.
60 /// The loop vector is expected to contain loops collected in breadth-first
61 /// order.
63  assert(!Loops.empty() && "Expecting a non-empy loop vector");
64 
65  Loop *LastLoop = Loops.back();
66  Loop *ParentLoop = LastLoop->getParentLoop();
67 
68  if (ParentLoop == nullptr) {
69  assert(Loops.size() == 1 && "Expecting a single loop");
70  return LastLoop;
71  }
72 
73  return (llvm::is_sorted(Loops,
74  [](const Loop *L1, const Loop *L2) {
75  return L1->getLoopDepth() < L2->getLoopDepth();
76  }))
77  ? LastLoop
78  : nullptr;
79 }
80 
81 static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
82  const Loop &L, ScalarEvolution &SE) {
83  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
84  if (!AR || !AR->isAffine())
85  return false;
86 
87  assert(AR->getLoop() && "AR should have a loop");
88 
89  // Check that start and increment are not add recurrences.
90  const SCEV *Start = AR->getStart();
91  const SCEV *Step = AR->getStepRecurrence(SE);
92  if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
93  return false;
94 
95  // Check that start and increment are both invariant in the loop.
96  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
97  return false;
98 
99  const SCEV *StepRec = AR->getStepRecurrence(SE);
100  if (StepRec && SE.isKnownNegative(StepRec))
101  StepRec = SE.getNegativeSCEV(StepRec);
102 
103  return StepRec == &ElemSize;
104 }
105 
106 /// Compute the trip count for the given loop \p L. Return the SCEV expression
107 /// for the trip count or nullptr if it cannot be computed.
108 static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
109  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
110  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
111  !isa<SCEVConstant>(BackedgeTakenCount))
112  return nullptr;
113  return SE.getTripCountFromExitCount(BackedgeTakenCount);
114 }
115 
116 //===----------------------------------------------------------------------===//
117 // IndexedReference implementation
118 //
120  if (!R.IsValid) {
121  OS << R.StoreOrLoadInst;
122  OS << ", IsValid=false.";
123  return OS;
124  }
125 
126  OS << *R.BasePointer;
127  for (const SCEV *Subscript : R.Subscripts)
128  OS << "[" << *Subscript << "]";
129 
130  OS << ", Sizes: ";
131  for (const SCEV *Size : R.Sizes)
132  OS << "[" << *Size << "]";
133 
134  return OS;
135 }
136 
138  const LoopInfo &LI, ScalarEvolution &SE)
139  : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
140  assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
141  "Expecting a load or store instruction");
142 
143  IsValid = delinearize(LI);
144  if (IsValid)
145  LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
146  << "\n");
147 }
148 
150  unsigned CLS,
151  AAResults &AA) const {
152  assert(IsValid && "Expecting a valid reference");
153 
154  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
155  LLVM_DEBUG(dbgs().indent(2)
156  << "No spacial reuse: different base pointers\n");
157  return false;
158  }
159 
160  unsigned NumSubscripts = getNumSubscripts();
161  if (NumSubscripts != Other.getNumSubscripts()) {
162  LLVM_DEBUG(dbgs().indent(2)
163  << "No spacial reuse: different number of subscripts\n");
164  return false;
165  }
166 
167  // all subscripts must be equal, except the leftmost one (the last one).
168  for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
169  if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
170  LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
171  << "\n\t" << *getSubscript(SubNum) << "\n\t"
172  << *Other.getSubscript(SubNum) << "\n");
173  return false;
174  }
175  }
176 
177  // the difference between the last subscripts must be less than the cache line
178  // size.
179  const SCEV *LastSubscript = getLastSubscript();
180  const SCEV *OtherLastSubscript = Other.getLastSubscript();
181  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
182  SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
183 
184  if (Diff == nullptr) {
185  LLVM_DEBUG(dbgs().indent(2)
186  << "No spacial reuse, difference between subscript:\n\t"
187  << *LastSubscript << "\n\t" << OtherLastSubscript
188  << "\nis not constant.\n");
189  return None;
190  }
191 
192  bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
193 
194  LLVM_DEBUG({
195  if (InSameCacheLine)
196  dbgs().indent(2) << "Found spacial reuse.\n";
197  else
198  dbgs().indent(2) << "No spacial reuse.\n";
199  });
200 
201  return InSameCacheLine;
202 }
203 
205  unsigned MaxDistance,
206  const Loop &L,
207  DependenceInfo &DI,
208  AAResults &AA) const {
209  assert(IsValid && "Expecting a valid reference");
210 
211  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
212  LLVM_DEBUG(dbgs().indent(2)
213  << "No temporal reuse: different base pointer\n");
214  return false;
215  }
216 
217  std::unique_ptr<Dependence> D =
218  DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
219 
220  if (D == nullptr) {
221  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
222  return false;
223  }
224 
225  if (D->isLoopIndependent()) {
226  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
227  return true;
228  }
229 
230  // Check the dependence distance at every loop level. There is temporal reuse
231  // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
232  // it is zero at every other loop level.
233  int LoopDepth = L.getLoopDepth();
234  int Levels = D->getLevels();
235  for (int Level = 1; Level <= Levels; ++Level) {
236  const SCEV *Distance = D->getDistance(Level);
237  const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
238 
239  if (SCEVConst == nullptr) {
240  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
241  return None;
242  }
243 
244  const ConstantInt &CI = *SCEVConst->getValue();
245  if (Level != LoopDepth && !CI.isZero()) {
246  LLVM_DEBUG(dbgs().indent(2)
247  << "No temporal reuse: distance is not zero at depth=" << Level
248  << "\n");
249  return false;
250  } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
251  LLVM_DEBUG(
252  dbgs().indent(2)
253  << "No temporal reuse: distance is greater than MaxDistance at depth="
254  << Level << "\n");
255  return false;
256  }
257  }
258 
259  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
260  return true;
261 }
262 
264  unsigned CLS) const {
265  assert(IsValid && "Expecting a valid reference");
266  LLVM_DEBUG({
267  dbgs().indent(2) << "Computing cache cost for:\n";
268  dbgs().indent(4) << *this << "\n";
269  });
270 
271  // If the indexed reference is loop invariant the cost is one.
272  if (isLoopInvariant(L)) {
273  LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
274  return 1;
275  }
276 
277  const SCEV *TripCount = computeTripCount(L, SE);
278  if (!TripCount) {
279  LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
280  << " could not be computed, using DefaultTripCount\n");
281  const SCEV *ElemSize = Sizes.back();
282  TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
283  }
284  LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
285 
286  // If the indexed reference is 'consecutive' the cost is
287  // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
288  const SCEV *RefCost = TripCount;
289 
290  if (isConsecutive(L, CLS)) {
291  const SCEV *Coeff = getLastCoefficient();
292  const SCEV *ElemSize = Sizes.back();
293  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
294  Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
295  const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
296  if (SE.isKnownNegative(Stride))
297  Stride = SE.getNegativeSCEV(Stride);
298  Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
299  TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
300  const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
301  RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
302 
303  LLVM_DEBUG(dbgs().indent(4)
304  << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
305  << *RefCost << "\n");
306  } else
307  LLVM_DEBUG(dbgs().indent(4)
308  << "Access is not consecutive: RefCost=TripCount=" << *RefCost
309  << "\n");
310 
311  // Attempt to fold RefCost into a constant.
312  if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
313  return ConstantCost->getValue()->getSExtValue();
314 
315  LLVM_DEBUG(dbgs().indent(4)
316  << "RefCost is not a constant! Setting to RefCost=InvalidCost "
317  "(invalid value).\n");
318 
319  return CacheCost::InvalidCost;
320 }
321 
322 bool IndexedReference::delinearize(const LoopInfo &LI) {
323  assert(Subscripts.empty() && "Subscripts should be empty");
324  assert(Sizes.empty() && "Sizes should be empty");
325  assert(!IsValid && "Should be called once from the constructor");
326  LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
327 
328  const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
329  const BasicBlock *BB = StoreOrLoadInst.getParent();
330 
331  if (Loop *L = LI.getLoopFor(BB)) {
332  const SCEV *AccessFn =
333  SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
334 
335  BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
336  if (BasePointer == nullptr) {
337  LLVM_DEBUG(
338  dbgs().indent(2)
339  << "ERROR: failed to delinearize, can't identify base pointer\n");
340  return false;
341  }
342 
343  AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
344 
345  LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
346  << "', AccessFn: " << *AccessFn << "\n");
347 
348  llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
349  SE.getElementSize(&StoreOrLoadInst));
350 
351  if (Subscripts.empty() || Sizes.empty() ||
352  Subscripts.size() != Sizes.size()) {
353  // Attempt to determine whether we have a single dimensional array access.
354  // before giving up.
355  if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
356  LLVM_DEBUG(dbgs().indent(2)
357  << "ERROR: failed to delinearize reference\n");
358  Subscripts.clear();
359  Sizes.clear();
360  return false;
361  }
362 
363  // The array may be accessed in reverse, for example:
364  // for (i = N; i > 0; i--)
365  // A[i] = 0;
366  // In this case, reconstruct the access function using the absolute value
367  // of the step recurrence.
368  const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
369  const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr;
370 
371  if (StepRec && SE.isKnownNegative(StepRec))
372  AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(),
373  SE.getNegativeSCEV(StepRec),
374  AccessFnAR->getLoop(),
375  AccessFnAR->getNoWrapFlags());
376  const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
377  Subscripts.push_back(Div);
378  Sizes.push_back(ElemSize);
379  }
380 
381  return all_of(Subscripts, [&](const SCEV *Subscript) {
382  return isSimpleAddRecurrence(*Subscript, *L);
383  });
384  }
385 
386  return false;
387 }
388 
389 bool IndexedReference::isLoopInvariant(const Loop &L) const {
390  Value *Addr = getPointerOperand(&StoreOrLoadInst);
391  assert(Addr != nullptr && "Expecting either a load or a store instruction");
392  assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
393 
394  if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
395  return true;
396 
397  // The indexed reference is loop invariant if none of the coefficients use
398  // the loop induction variable.
399  bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
400  return isCoeffForLoopZeroOrInvariant(*Subscript, L);
401  });
402 
403  return allCoeffForLoopAreZero;
404 }
405 
406 bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
407  // The indexed reference is 'consecutive' if the only coefficient that uses
408  // the loop induction variable is the last one...
409  const SCEV *LastSubscript = Subscripts.back();
410  for (const SCEV *Subscript : Subscripts) {
411  if (Subscript == LastSubscript)
412  continue;
413  if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
414  return false;
415  }
416 
417  // ...and the access stride is less than the cache line size.
418  const SCEV *Coeff = getLastCoefficient();
419  const SCEV *ElemSize = Sizes.back();
420  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
421  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
422 
423  Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
425 }
426 
427 const SCEV *IndexedReference::getLastCoefficient() const {
428  const SCEV *LastSubscript = getLastSubscript();
429  assert(isa<SCEVAddRecExpr>(LastSubscript) &&
430  "Expecting a SCEV add recurrence expression");
431  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
432  return AR->getStepRecurrence(SE);
433 }
434 
435 bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
436  const Loop &L) const {
437  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
438  return (AR != nullptr) ? AR->getLoop() != &L
439  : SE.isLoopInvariant(&Subscript, &L);
440 }
441 
442 bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
443  const Loop &L) const {
444  if (!isa<SCEVAddRecExpr>(Subscript))
445  return false;
446 
447  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
448  assert(AR->getLoop() && "AR should have a loop");
449 
450  if (!AR->isAffine())
451  return false;
452 
453  const SCEV *Start = AR->getStart();
454  const SCEV *Step = AR->getStepRecurrence(SE);
455 
456  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
457  return false;
458 
459  return true;
460 }
461 
462 bool IndexedReference::isAliased(const IndexedReference &Other,
463  AAResults &AA) const {
464  const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
465  const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
466  return AA.isMustAlias(Loc1, Loc2);
467 }
468 
469 //===----------------------------------------------------------------------===//
470 // CacheCost implementation
471 //
473  for (const auto &LC : CC.LoopCosts) {
474  const Loop *L = LC.first;
475  OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
476  }
477  return OS;
478 }
479 
482  AAResults &AA, DependenceInfo &DI,
483  Optional<unsigned> TRT)
484  : Loops(Loops), TripCounts(), LoopCosts(),
485  TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
486  LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
487  assert(!Loops.empty() && "Expecting a non-empty loop vector.");
488 
489  for (const Loop *L : Loops) {
490  unsigned TripCount = SE.getSmallConstantTripCount(L);
491  TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
492  TripCounts.push_back({L, TripCount});
493  }
494 
495  calculateCacheFootprint();
496 }
497 
498 std::unique_ptr<CacheCost>
501  if (!Root.isOutermost()) {
502  LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
503  return nullptr;
504  }
505 
506  LoopVectorTy Loops;
508 
509  if (!getInnerMostLoop(Loops)) {
510  LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
511  "than one innermost loop\n");
512  return nullptr;
513  }
514 
515  return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
516 }
517 
518 void CacheCost::calculateCacheFootprint() {
519  LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
520  ReferenceGroupsTy RefGroups;
521  if (!populateReferenceGroups(RefGroups))
522  return;
523 
524  LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
525  for (const Loop *L : Loops) {
526  assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
527  [L](const LoopCacheCostTy &LCC) {
528  return LCC.first == L;
529  }) == LoopCosts.end()) &&
530  "Should not add duplicate element");
531  CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
532  LoopCosts.push_back(std::make_pair(L, LoopCost));
533  }
534 
535  sortLoopCosts();
536  RefGroups.clear();
537 }
538 
539 bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
540  assert(RefGroups.empty() && "Reference groups should be empty");
541 
542  unsigned CLS = TTI.getCacheLineSize();
543  Loop *InnerMostLoop = getInnerMostLoop(Loops);
544  assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
545 
546  for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
547  for (Instruction &I : *BB) {
548  if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
549  continue;
550 
551  std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
552  if (!R->isValid())
553  continue;
554 
555  bool Added = false;
556  for (ReferenceGroupTy &RefGroup : RefGroups) {
557  const IndexedReference &Representative = *RefGroup.front().get();
558  LLVM_DEBUG({
559  dbgs() << "References:\n";
560  dbgs().indent(2) << *R << "\n";
561  dbgs().indent(2) << Representative << "\n";
562  });
563 
564 
565  // FIXME: Both positive and negative access functions will be placed
566  // into the same reference group, resulting in a bi-directional array
567  // access such as:
568  // for (i = N; i > 0; i--)
569  // A[i] = A[N - i];
570  // having the same cost calculation as a single dimention access pattern
571  // for (i = 0; i < N; i++)
572  // A[i] = A[i];
573  // when in actuality, depending on the array size, the first example
574  // should have a cost closer to 2x the second due to the two cache
575  // access per iteration from opposite ends of the array
576  Optional<bool> HasTemporalReuse =
577  R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
578  Optional<bool> HasSpacialReuse =
579  R->hasSpacialReuse(Representative, CLS, AA);
580 
581  if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
582  (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
583  RefGroup.push_back(std::move(R));
584  Added = true;
585  break;
586  }
587  }
588 
589  if (!Added) {
590  ReferenceGroupTy RG;
591  RG.push_back(std::move(R));
592  RefGroups.push_back(std::move(RG));
593  }
594  }
595  }
596 
597  if (RefGroups.empty())
598  return false;
599 
600  LLVM_DEBUG({
601  dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
602  int n = 1;
603  for (const ReferenceGroupTy &RG : RefGroups) {
604  dbgs().indent(2) << "RefGroup " << n << ":\n";
605  for (const auto &IR : RG)
606  dbgs().indent(4) << *IR << "\n";
607  n++;
608  }
609  dbgs() << "\n";
610  });
611 
612  return true;
613 }
614 
616 CacheCost::computeLoopCacheCost(const Loop &L,
617  const ReferenceGroupsTy &RefGroups) const {
618  if (!L.isLoopSimplifyForm())
619  return InvalidCost;
620 
621  LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
622  << "' as innermost loop.\n");
623 
624  // Compute the product of the trip counts of each other loop in the nest.
625  CacheCostTy TripCountsProduct = 1;
626  for (const auto &TC : TripCounts) {
627  if (TC.first == &L)
628  continue;
629  TripCountsProduct *= TC.second;
630  }
631 
632  CacheCostTy LoopCost = 0;
633  for (const ReferenceGroupTy &RG : RefGroups) {
634  CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
635  LoopCost += RefGroupCost * TripCountsProduct;
636  }
637 
638  LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
639  << "' has cost=" << LoopCost << "\n");
640 
641  return LoopCost;
642 }
643 
644 CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
645  const Loop &L) const {
646  assert(!RG.empty() && "Reference group should have at least one member.");
647 
648  const IndexedReference *Representative = RG.front().get();
649  return Representative->computeRefCost(L, TTI.getCacheLineSize());
650 }
651 
652 //===----------------------------------------------------------------------===//
653 // LoopCachePrinterPass implementation
654 //
657  LPMUpdater &U) {
658  Function *F = L.getHeader()->getParent();
659  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
660 
661  if (auto CC = CacheCost::getCacheCost(L, AR, DI))
662  OS << *CC;
663 
664  return PreservedAnalyses::all();
665 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
TemporalReuseThreshold
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
isOneDimensionalArray
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
Definition: LoopCacheAnalysis.cpp:81
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4113
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
BreadthFirstIterator.h
llvm::IndexedReference::hasSpacialReuse
Optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
Definition: LoopCacheAnalysis.cpp:149
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:379
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:363
llvm::IndexedReference::getSubscript
const SCEV * getSubscript(unsigned SubNum) const
Definition: LoopCacheAnalysis.h:56
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3510
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::ScalarEvolution::getNoopOrAnyExtend
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4298
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
getInnerMostLoop
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
Definition: LoopCacheAnalysis.cpp:62
llvm::Optional< bool >
llvm::ScalarEvolution::getUDivExactExpr
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3456
llvm::ScalarEvolution::getWiderType
Type * getWiderType(Type *Ty1, Type *Ty2) const
Definition: ScalarEvolution.cpp:3960
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:56
Sequence.h
llvm::ScalarEvolution::getPointerBase
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
Definition: ScalarEvolution.cpp:4365
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:193
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2986
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::CacheCost::getCacheCost
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
Definition: LoopCacheAnalysis.cpp:499
L2
add sub stmia L5 ldr L2
Definition: README.txt:201
llvm::AAResults
Definition: AliasAnalysis.h:456
DefaultTripCount
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
llvm::ScalarEvolution::getUDivExpr
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3280
llvm::IndexedReference::getNumSubscripts
size_t getNumSubscripts() const
Definition: LoopCacheAnalysis.h:55
llvm::breadth_first
iterator_range< bf_iterator< T > > breadth_first(const T &G)
Definition: BreadthFirstIterator.h:157
llvm::LoopCachePrinterPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopCacheAnalysis.cpp:655
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:9905
llvm::Instruction
Definition: Instruction.h:45
llvm::IndexedReference::computeRefCost
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
Definition: LoopCacheAnalysis.cpp:263
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::IndexedReference::hasTemporalReuse
Optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
Definition: LoopCacheAnalysis.cpp:204
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:744
llvm::IndexedReference::IndexedReference
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
Definition: LoopCacheAnalysis.cpp:137
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:866
llvm::None
const NoneType None
Definition: None.h:23
LoopInfo.h
computeTripCount
static const SCEV * computeTripCount(const Loop &L, ScalarEvolution &SE)
Compute the trip count for the given loop L.
Definition: LoopCacheAnalysis.cpp:108
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4066
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7179
CacheLineSize
static cl::opt< unsigned > CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size"))
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::getPointerOperand
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Definition: Instructions.h:5315
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:249
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::IndexedReference
Represents a memory reference as a base pointer and a set of indexing operations.
Definition: LoopCacheAnalysis.h:45
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::IndexedReference::getLastSubscript
const SCEV * getLastSubscript() const
Definition: LoopCacheAnalysis.h:64
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:213
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:446
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::ScalarEvolution::getElementSize
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
Definition: ScalarEvolution.cpp:12218
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1748
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12619
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1578
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:627
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:364
llvm::delinearize
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
Definition: Delinearization.cpp:452
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4199
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
llvm::ScalarEvolution::getTripCountFromExitCount
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
Convert from an "exit count" (i.e.
Definition: ScalarEvolution.cpp:7159
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3932
llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1622
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
ScalarEvolutionExpressions.h
llvm::CacheCostTy
int64_t CacheCostTy
Definition: LoopCacheAnalysis.h:31
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3525
SmallVector.h
llvm::raw_ostream::indent
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Definition: raw_ostream.cpp:497
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CacheCost
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
Definition: LoopCacheAnalysis.h:174
TargetTransformInfo.h
Delinearization.h
llvm::AAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Definition: AliasAnalysis.h:528
LoopCacheAnalysis.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
DependenceAnalysis.h
llvm::cl::desc
Definition: CommandLine.h:414
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7284
llvm::CacheCost::InvalidCost
static constexpr CacheCostTy InvalidCost
Definition: LoopCacheAnalysis.h:180
llvm::ScalarEvolution::isKnownNegative
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
Definition: ScalarEvolution.cpp:9822
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:8727
llvm::CacheCost::CacheCost
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional< unsigned > TRT=None)
Construct a CacheCost object for the loop nest described by Loops.
Definition: LoopCacheAnalysis.cpp:480
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:370
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172