LLVM  13.0.0git
LoopCacheAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 ///
11 /// \file
12 /// This file defines the implementation for the loop cache analysis.
13 /// The implementation is largely based on the following paper:
14 ///
15 /// Compiler Optimizations for Improving Data Locality
16 /// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17 /// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18 ///
19 /// The general approach taken to estimate the number of cache lines used by the
20 /// memory references in an inner loop is:
21 /// 1. Partition memory references that exhibit temporal or spacial reuse
22 /// into reference groups.
23 /// 2. For each loop L in the a loop nest LN:
24 /// a. Compute the cost of the reference group
25 /// b. Compute the loop cost by summing up the reference groups costs
26 //===----------------------------------------------------------------------===//
27 
30 #include "llvm/ADT/Sequence.h"
31 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Support/Debug.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "loop-cache-cost"
43 
45  "default-trip-count", cl::init(100), cl::Hidden,
46  cl::desc("Use this to specify the default trip count of a loop"));
47 
48 // In this analysis two array references are considered to exhibit temporal
49 // reuse if they access either the same memory location, or a memory location
50 // with distance smaller than a configurable threshold.
52  "temporal-reuse-threshold", cl::init(2), cl::Hidden,
53  cl::desc("Use this to specify the max. distance between array elements "
54  "accessed in a loop so that the elements are classified to have "
55  "temporal reuse"));
56 
57 /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
58 /// nullptr if any loops in the loop vector supplied has more than one sibling.
59 /// The loop vector is expected to contain loops collected in breadth-first
60 /// order.
62  assert(!Loops.empty() && "Expecting a non-empy loop vector");
63 
64  Loop *LastLoop = Loops.back();
65  Loop *ParentLoop = LastLoop->getParentLoop();
66 
67  if (ParentLoop == nullptr) {
68  assert(Loops.size() == 1 && "Expecting a single loop");
69  return LastLoop;
70  }
71 
72  return (llvm::is_sorted(Loops,
73  [](const Loop *L1, const Loop *L2) {
74  return L1->getLoopDepth() < L2->getLoopDepth();
75  }))
76  ? LastLoop
77  : nullptr;
78 }
79 
80 static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
81  const Loop &L, ScalarEvolution &SE) {
82  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
83  if (!AR || !AR->isAffine())
84  return false;
85 
86  assert(AR->getLoop() && "AR should have a loop");
87 
88  // Check that start and increment are not add recurrences.
89  const SCEV *Start = AR->getStart();
90  const SCEV *Step = AR->getStepRecurrence(SE);
91  if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
92  return false;
93 
94  // Check that start and increment are both invariant in the loop.
95  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
96  return false;
97 
98  const SCEV *StepRec = AR->getStepRecurrence(SE);
99  if (StepRec && SE.isKnownNegative(StepRec))
100  StepRec = SE.getNegativeSCEV(StepRec);
101 
102  return StepRec == &ElemSize;
103 }
104 
105 /// Compute the trip count for the given loop \p L. Return the SCEV expression
106 /// for the trip count or nullptr if it cannot be computed.
107 static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
108  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
109  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
110  !isa<SCEVConstant>(BackedgeTakenCount))
111  return nullptr;
112  return SE.getTripCountFromExitCount(BackedgeTakenCount);
113 }
114 
115 //===----------------------------------------------------------------------===//
116 // IndexedReference implementation
117 //
119  if (!R.IsValid) {
120  OS << R.StoreOrLoadInst;
121  OS << ", IsValid=false.";
122  return OS;
123  }
124 
125  OS << *R.BasePointer;
126  for (const SCEV *Subscript : R.Subscripts)
127  OS << "[" << *Subscript << "]";
128 
129  OS << ", Sizes: ";
130  for (const SCEV *Size : R.Sizes)
131  OS << "[" << *Size << "]";
132 
133  return OS;
134 }
135 
137  const LoopInfo &LI, ScalarEvolution &SE)
138  : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
139  assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
140  "Expecting a load or store instruction");
141 
142  IsValid = delinearize(LI);
143  if (IsValid)
144  LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
145  << "\n");
146 }
147 
149  unsigned CLS,
150  AAResults &AA) const {
151  assert(IsValid && "Expecting a valid reference");
152 
153  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
154  LLVM_DEBUG(dbgs().indent(2)
155  << "No spacial reuse: different base pointers\n");
156  return false;
157  }
158 
159  unsigned NumSubscripts = getNumSubscripts();
160  if (NumSubscripts != Other.getNumSubscripts()) {
161  LLVM_DEBUG(dbgs().indent(2)
162  << "No spacial reuse: different number of subscripts\n");
163  return false;
164  }
165 
166  // all subscripts must be equal, except the leftmost one (the last one).
167  for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
168  if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
169  LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
170  << "\n\t" << *getSubscript(SubNum) << "\n\t"
171  << *Other.getSubscript(SubNum) << "\n");
172  return false;
173  }
174  }
175 
176  // the difference between the last subscripts must be less than the cache line
177  // size.
178  const SCEV *LastSubscript = getLastSubscript();
179  const SCEV *OtherLastSubscript = Other.getLastSubscript();
180  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
181  SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
182 
183  if (Diff == nullptr) {
184  LLVM_DEBUG(dbgs().indent(2)
185  << "No spacial reuse, difference between subscript:\n\t"
186  << *LastSubscript << "\n\t" << OtherLastSubscript
187  << "\nis not constant.\n");
188  return None;
189  }
190 
191  bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
192 
193  LLVM_DEBUG({
194  if (InSameCacheLine)
195  dbgs().indent(2) << "Found spacial reuse.\n";
196  else
197  dbgs().indent(2) << "No spacial reuse.\n";
198  });
199 
200  return InSameCacheLine;
201 }
202 
204  unsigned MaxDistance,
205  const Loop &L,
206  DependenceInfo &DI,
207  AAResults &AA) const {
208  assert(IsValid && "Expecting a valid reference");
209 
210  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
211  LLVM_DEBUG(dbgs().indent(2)
212  << "No temporal reuse: different base pointer\n");
213  return false;
214  }
215 
216  std::unique_ptr<Dependence> D =
217  DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
218 
219  if (D == nullptr) {
220  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
221  return false;
222  }
223 
224  if (D->isLoopIndependent()) {
225  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
226  return true;
227  }
228 
229  // Check the dependence distance at every loop level. There is temporal reuse
230  // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
231  // it is zero at every other loop level.
232  int LoopDepth = L.getLoopDepth();
233  int Levels = D->getLevels();
234  for (int Level = 1; Level <= Levels; ++Level) {
235  const SCEV *Distance = D->getDistance(Level);
236  const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
237 
238  if (SCEVConst == nullptr) {
239  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
240  return None;
241  }
242 
243  const ConstantInt &CI = *SCEVConst->getValue();
244  if (Level != LoopDepth && !CI.isZero()) {
245  LLVM_DEBUG(dbgs().indent(2)
246  << "No temporal reuse: distance is not zero at depth=" << Level
247  << "\n");
248  return false;
249  } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
250  LLVM_DEBUG(
251  dbgs().indent(2)
252  << "No temporal reuse: distance is greater than MaxDistance at depth="
253  << Level << "\n");
254  return false;
255  }
256  }
257 
258  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
259  return true;
260 }
261 
263  unsigned CLS) const {
264  assert(IsValid && "Expecting a valid reference");
265  LLVM_DEBUG({
266  dbgs().indent(2) << "Computing cache cost for:\n";
267  dbgs().indent(4) << *this << "\n";
268  });
269 
270  // If the indexed reference is loop invariant the cost is one.
271  if (isLoopInvariant(L)) {
272  LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
273  return 1;
274  }
275 
276  const SCEV *TripCount = computeTripCount(L, SE);
277  if (!TripCount) {
278  LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
279  << " could not be computed, using DefaultTripCount\n");
280  const SCEV *ElemSize = Sizes.back();
281  TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
282  }
283  LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
284 
285  // If the indexed reference is 'consecutive' the cost is
286  // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
287  const SCEV *RefCost = TripCount;
288 
289  if (isConsecutive(L, CLS)) {
290  const SCEV *Coeff = getLastCoefficient();
291  const SCEV *ElemSize = Sizes.back();
292  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
293  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
294  Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
295  if (SE.isKnownNegative(Stride))
296  Stride = SE.getNegativeSCEV(Stride);
297  Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
298  TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
299  const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
300  RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
301 
302  LLVM_DEBUG(dbgs().indent(4)
303  << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
304  << *RefCost << "\n");
305  } else
306  LLVM_DEBUG(dbgs().indent(4)
307  << "Access is not consecutive: RefCost=TripCount=" << *RefCost
308  << "\n");
309 
310  // Attempt to fold RefCost into a constant.
311  if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
312  return ConstantCost->getValue()->getSExtValue();
313 
314  LLVM_DEBUG(dbgs().indent(4)
315  << "RefCost is not a constant! Setting to RefCost=InvalidCost "
316  "(invalid value).\n");
317 
318  return CacheCost::InvalidCost;
319 }
320 
321 bool IndexedReference::delinearize(const LoopInfo &LI) {
322  assert(Subscripts.empty() && "Subscripts should be empty");
323  assert(Sizes.empty() && "Sizes should be empty");
324  assert(!IsValid && "Should be called once from the constructor");
325  LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
326 
327  const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
328  const BasicBlock *BB = StoreOrLoadInst.getParent();
329 
330  if (Loop *L = LI.getLoopFor(BB)) {
331  const SCEV *AccessFn =
332  SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
333 
334  BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
335  if (BasePointer == nullptr) {
336  LLVM_DEBUG(
337  dbgs().indent(2)
338  << "ERROR: failed to delinearize, can't identify base pointer\n");
339  return false;
340  }
341 
342  AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
343 
344  LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
345  << "', AccessFn: " << *AccessFn << "\n");
346 
347  SE.delinearize(AccessFn, Subscripts, Sizes,
348  SE.getElementSize(&StoreOrLoadInst));
349 
350  if (Subscripts.empty() || Sizes.empty() ||
351  Subscripts.size() != Sizes.size()) {
352  // Attempt to determine whether we have a single dimensional array access.
353  // before giving up.
354  if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
355  LLVM_DEBUG(dbgs().indent(2)
356  << "ERROR: failed to delinearize reference\n");
357  Subscripts.clear();
358  Sizes.clear();
359  return false;
360  }
361 
362  // The array may be accessed in reverse, for example:
363  // for (i = N; i > 0; i--)
364  // A[i] = 0;
365  // In this case, reconstruct the access function using the absolute value
366  // of the step recurrence.
367  const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
368  const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr;
369 
370  if (StepRec && SE.isKnownNegative(StepRec))
371  AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(),
372  SE.getNegativeSCEV(StepRec),
373  AccessFnAR->getLoop(),
374  AccessFnAR->getNoWrapFlags());
375  const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
376  Subscripts.push_back(Div);
377  Sizes.push_back(ElemSize);
378  }
379 
380  return all_of(Subscripts, [&](const SCEV *Subscript) {
381  return isSimpleAddRecurrence(*Subscript, *L);
382  });
383  }
384 
385  return false;
386 }
387 
388 bool IndexedReference::isLoopInvariant(const Loop &L) const {
389  Value *Addr = getPointerOperand(&StoreOrLoadInst);
390  assert(Addr != nullptr && "Expecting either a load or a store instruction");
391  assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
392 
393  if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
394  return true;
395 
396  // The indexed reference is loop invariant if none of the coefficients use
397  // the loop induction variable.
398  bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
399  return isCoeffForLoopZeroOrInvariant(*Subscript, L);
400  });
401 
402  return allCoeffForLoopAreZero;
403 }
404 
405 bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
406  // The indexed reference is 'consecutive' if the only coefficient that uses
407  // the loop induction variable is the last one...
408  const SCEV *LastSubscript = Subscripts.back();
409  for (const SCEV *Subscript : Subscripts) {
410  if (Subscript == LastSubscript)
411  continue;
412  if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
413  return false;
414  }
415 
416  // ...and the access stride is less than the cache line size.
417  const SCEV *Coeff = getLastCoefficient();
418  const SCEV *ElemSize = Sizes.back();
419  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
420  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
421 
422  Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
424 }
425 
426 const SCEV *IndexedReference::getLastCoefficient() const {
427  const SCEV *LastSubscript = getLastSubscript();
428  assert(isa<SCEVAddRecExpr>(LastSubscript) &&
429  "Expecting a SCEV add recurrence expression");
430  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
431  return AR->getStepRecurrence(SE);
432 }
433 
434 bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
435  const Loop &L) const {
436  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
437  return (AR != nullptr) ? AR->getLoop() != &L
438  : SE.isLoopInvariant(&Subscript, &L);
439 }
440 
441 bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
442  const Loop &L) const {
443  if (!isa<SCEVAddRecExpr>(Subscript))
444  return false;
445 
446  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
447  assert(AR->getLoop() && "AR should have a loop");
448 
449  if (!AR->isAffine())
450  return false;
451 
452  const SCEV *Start = AR->getStart();
453  const SCEV *Step = AR->getStepRecurrence(SE);
454 
455  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
456  return false;
457 
458  return true;
459 }
460 
461 bool IndexedReference::isAliased(const IndexedReference &Other,
462  AAResults &AA) const {
463  const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
464  const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
465  return AA.isMustAlias(Loc1, Loc2);
466 }
467 
468 //===----------------------------------------------------------------------===//
469 // CacheCost implementation
470 //
472  for (const auto &LC : CC.LoopCosts) {
473  const Loop *L = LC.first;
474  OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
475  }
476  return OS;
477 }
478 
481  AAResults &AA, DependenceInfo &DI,
482  Optional<unsigned> TRT)
483  : Loops(Loops), TripCounts(), LoopCosts(),
484  TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
485  LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
486  assert(!Loops.empty() && "Expecting a non-empty loop vector.");
487 
488  for (const Loop *L : Loops) {
489  unsigned TripCount = SE.getSmallConstantTripCount(L);
490  TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
491  TripCounts.push_back({L, TripCount});
492  }
493 
494  calculateCacheFootprint();
495 }
496 
497 std::unique_ptr<CacheCost>
500  if (!Root.isOutermost()) {
501  LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
502  return nullptr;
503  }
504 
505  LoopVectorTy Loops;
507 
508  if (!getInnerMostLoop(Loops)) {
509  LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
510  "than one innermost loop\n");
511  return nullptr;
512  }
513 
514  return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
515 }
516 
517 void CacheCost::calculateCacheFootprint() {
518  LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
519  ReferenceGroupsTy RefGroups;
520  if (!populateReferenceGroups(RefGroups))
521  return;
522 
523  LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
524  for (const Loop *L : Loops) {
525  assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
526  [L](const LoopCacheCostTy &LCC) {
527  return LCC.first == L;
528  }) == LoopCosts.end()) &&
529  "Should not add duplicate element");
530  CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
531  LoopCosts.push_back(std::make_pair(L, LoopCost));
532  }
533 
534  sortLoopCosts();
535  RefGroups.clear();
536 }
537 
538 bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
539  assert(RefGroups.empty() && "Reference groups should be empty");
540 
541  unsigned CLS = TTI.getCacheLineSize();
542  Loop *InnerMostLoop = getInnerMostLoop(Loops);
543  assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
544 
545  for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
546  for (Instruction &I : *BB) {
547  if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
548  continue;
549 
550  std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
551  if (!R->isValid())
552  continue;
553 
554  bool Added = false;
555  for (ReferenceGroupTy &RefGroup : RefGroups) {
556  const IndexedReference &Representative = *RefGroup.front().get();
557  LLVM_DEBUG({
558  dbgs() << "References:\n";
559  dbgs().indent(2) << *R << "\n";
560  dbgs().indent(2) << Representative << "\n";
561  });
562 
563 
564  // FIXME: Both positive and negative access functions will be placed
565  // into the same reference group, resulting in a bi-directional array
566  // access such as:
567  // for (i = N; i > 0; i--)
568  // A[i] = A[N - i];
569  // having the same cost calculation as a single dimention access pattern
570  // for (i = 0; i < N; i++)
571  // A[i] = A[i];
572  // when in actuality, depending on the array size, the first example
573  // should have a cost closer to 2x the second due to the two cache
574  // access per iteration from opposite ends of the array
575  Optional<bool> HasTemporalReuse =
576  R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
577  Optional<bool> HasSpacialReuse =
578  R->hasSpacialReuse(Representative, CLS, AA);
579 
580  if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
581  (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
582  RefGroup.push_back(std::move(R));
583  Added = true;
584  break;
585  }
586  }
587 
588  if (!Added) {
589  ReferenceGroupTy RG;
590  RG.push_back(std::move(R));
591  RefGroups.push_back(std::move(RG));
592  }
593  }
594  }
595 
596  if (RefGroups.empty())
597  return false;
598 
599  LLVM_DEBUG({
600  dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
601  int n = 1;
602  for (const ReferenceGroupTy &RG : RefGroups) {
603  dbgs().indent(2) << "RefGroup " << n << ":\n";
604  for (const auto &IR : RG)
605  dbgs().indent(4) << *IR << "\n";
606  n++;
607  }
608  dbgs() << "\n";
609  });
610 
611  return true;
612 }
613 
615 CacheCost::computeLoopCacheCost(const Loop &L,
616  const ReferenceGroupsTy &RefGroups) const {
617  if (!L.isLoopSimplifyForm())
618  return InvalidCost;
619 
620  LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
621  << "' as innermost loop.\n");
622 
623  // Compute the product of the trip counts of each other loop in the nest.
624  CacheCostTy TripCountsProduct = 1;
625  for (const auto &TC : TripCounts) {
626  if (TC.first == &L)
627  continue;
628  TripCountsProduct *= TC.second;
629  }
630 
631  CacheCostTy LoopCost = 0;
632  for (const ReferenceGroupTy &RG : RefGroups) {
633  CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
634  LoopCost += RefGroupCost * TripCountsProduct;
635  }
636 
637  LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
638  << "' has cost=" << LoopCost << "\n");
639 
640  return LoopCost;
641 }
642 
643 CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
644  const Loop &L) const {
645  assert(!RG.empty() && "Reference group should have at least one member.");
646 
647  const IndexedReference *Representative = RG.front().get();
648  return Representative->computeRefCost(L, TTI.getCacheLineSize());
649 }
650 
651 //===----------------------------------------------------------------------===//
652 // LoopCachePrinterPass implementation
653 //
656  LPMUpdater &U) {
657  Function *F = L.getHeader()->getParent();
658  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
659 
660  if (auto CC = CacheCost::getCacheCost(L, AR, DI))
661  OS << *CC;
662 
663  return PreservedAnalyses::all();
664 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
TemporalReuseThreshold
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
isOneDimensionalArray
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
Definition: LoopCacheAnalysis.cpp:80
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4022
llvm
Definition: AllocatorList.h:23
BreadthFirstIterator.h
llvm::IndexedReference::hasSpacialReuse
Optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
Definition: LoopCacheAnalysis.cpp:148
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:378
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:362
llvm::IndexedReference::getSubscript
const SCEV * getSubscript(unsigned SubNum) const
Definition: LoopCacheAnalysis.h:56
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3428
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::ScalarEvolution::getNoopOrAnyExtend
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4164
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
getInnerMostLoop
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
Definition: LoopCacheAnalysis.cpp:61
llvm::Optional< bool >
llvm::ScalarEvolution::getUDivExactExpr
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3374
llvm::ScalarEvolution::getWiderType
Type * getWiderType(Type *Ty1, Type *Ty2) const
Definition: ScalarEvolution.cpp:3869
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:56
Sequence.h
llvm::ScalarEvolution::getPointerBase
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
Definition: ScalarEvolution.cpp:4231
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:180
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2911
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1482
llvm::CacheCost::getCacheCost
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
Definition: LoopCacheAnalysis.cpp:498
L2
add sub stmia L5 ldr L2
Definition: README.txt:201
llvm::AAResults
Definition: AliasAnalysis.h:456
DefaultTripCount
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
llvm::ScalarEvolution::getUDivExpr
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3204
llvm::IndexedReference::getNumSubscripts
size_t getNumSubscripts() const
Definition: LoopCacheAnalysis.h:55
llvm::breadth_first
iterator_range< bf_iterator< T > > breadth_first(const T &G)
Definition: BreadthFirstIterator.h:157
llvm::LoopCachePrinterPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopCacheAnalysis.cpp:654
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:9749
llvm::Instruction
Definition: Instruction.h:45
llvm::IndexedReference::computeRefCost
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
Definition: LoopCacheAnalysis.cpp:262
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::IndexedReference::hasTemporalReuse
Optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
Definition: LoopCacheAnalysis.cpp:203
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:744
llvm::IndexedReference::IndexedReference
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
Definition: LoopCacheAnalysis.cpp:136
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:863
llvm::None
const NoneType None
Definition: None.h:23
LoopInfo.h
computeTripCount
static const SCEV * computeTripCount(const Loop &L, ScalarEvolution &SE)
Compute the trip count for the given loop L.
Definition: LoopCacheAnalysis.cpp:107
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3975
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7043
CacheLineSize
static cl::opt< unsigned > CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size"))
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
llvm::cl::opt
Definition: CommandLine.h:1422
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::getPointerOperand
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Definition: Instructions.h:5281
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:240
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:964
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::IndexedReference
Represents a memory reference as a base pointer and a set of indexing operations.
Definition: LoopCacheAnalysis.h:45
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::IndexedReference::getLastSubscript
const SCEV * getLastSubscript() const
Definition: LoopCacheAnalysis.h:64
llvm::ScalarEvolution::delinearize
void delinearize(const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
Definition: ScalarEvolution.cpp:12183
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:215
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:747
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::ScalarEvolution::getElementSize
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
Definition: ScalarEvolution.cpp:12001
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1672
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:146
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12652
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1509
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:621
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:363
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Definition: ScalarEvolution.cpp:4076
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
llvm::ScalarEvolution::getTripCountFromExitCount
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
Convert from an "exit count" (i.e.
Definition: ScalarEvolution.cpp:7023
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3841
llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1553
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:584
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
ScalarEvolutionExpressions.h
llvm::CacheCostTy
int64_t CacheCostTy
Definition: LoopCacheAnalysis.h:31
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3506
SmallVector.h
llvm::raw_ostream::indent
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Definition: raw_ostream.cpp:497
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CacheCost
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
Definition: LoopCacheAnalysis.h:174
TargetTransformInfo.h
llvm::AAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Definition: AliasAnalysis.h:528
LoopCacheAnalysis.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
DependenceAnalysis.h
llvm::cl::desc
Definition: CommandLine.h:414
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7148
llvm::CacheCost::InvalidCost
static constexpr CacheCostTy InvalidCost
Definition: LoopCacheAnalysis.h:180
llvm::ScalarEvolution::isKnownNegative
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
Definition: ScalarEvolution.cpp:9666
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:8570
llvm::CacheCost::CacheCost
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional< unsigned > TRT=None)
Construct a CacheCost object for the loop nest described by Loops.
Definition: LoopCacheAnalysis.cpp:479
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:369
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1169