LLVM  13.0.0git
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a Loop Data Prefetching Pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/InitializePasses.h"
15 
17 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/IR/CFG.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/Module.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Transforms/Scalar.h"
35 
36 #define DEBUG_TYPE "loop-data-prefetch"
37 
38 using namespace llvm;
39 
40 // By default, we limit this to creating 16 PHIs (which is a little over half
41 // of the allocatable register set).
42 static cl::opt<bool>
43 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
44  cl::desc("Prefetch write addresses"));
45 
46 static cl::opt<unsigned>
47  PrefetchDistance("prefetch-distance",
48  cl::desc("Number of instructions to prefetch ahead"),
49  cl::Hidden);
50 
51 static cl::opt<unsigned>
52  MinPrefetchStride("min-prefetch-stride",
53  cl::desc("Min stride to add prefetches"), cl::Hidden);
54 
56  "max-prefetch-iters-ahead",
57  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
58 
59 STATISTIC(NumPrefetches, "Number of prefetches inserted");
60 
61 namespace {
62 
63 /// Loop prefetch implementation class.
64 class LoopDataPrefetch {
65 public:
66  LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
69  : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
70 
71  bool run();
72 
73 private:
74  bool runOnLoop(Loop *L);
75 
76  /// Check if the stride of the accesses is large enough to
77  /// warrant a prefetch.
78  bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
79 
80  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
81  unsigned NumStridedMemAccesses,
82  unsigned NumPrefetches,
83  bool HasCall) {
84  if (MinPrefetchStride.getNumOccurrences() > 0)
85  return MinPrefetchStride;
86  return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
87  NumPrefetches, HasCall);
88  }
89 
90  unsigned getPrefetchDistance() {
91  if (PrefetchDistance.getNumOccurrences() > 0)
92  return PrefetchDistance;
93  return TTI->getPrefetchDistance();
94  }
95 
96  unsigned getMaxPrefetchIterationsAhead() {
97  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
100  }
101 
102  bool doPrefetchWrites() {
104  return PrefetchWrites;
105  return TTI->enableWritePrefetching();
106  }
107 
108  AssumptionCache *AC;
109  DominatorTree *DT;
110  LoopInfo *LI;
111  ScalarEvolution *SE;
112  const TargetTransformInfo *TTI;
114 };
115 
116 /// Legacy class for inserting loop data prefetches.
117 class LoopDataPrefetchLegacyPass : public FunctionPass {
118 public:
119  static char ID; // Pass ID, replacement for typeid
120  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
122  }
123 
124  void getAnalysisUsage(AnalysisUsage &AU) const override {
134  }
135 
136  bool runOnFunction(Function &F) override;
137  };
138 }
139 
141 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
142  "Loop Data Prefetch", false, false)
148 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
150 
152  return new LoopDataPrefetchLegacyPass();
153 }
154 
155 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
156  unsigned TargetMinStride) {
157  // No need to check if any stride goes.
158  if (TargetMinStride <= 1)
159  return true;
160 
161  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
162  // If MinStride is set, don't prefetch unless we can ensure that stride is
163  // larger.
164  if (!ConstStride)
165  return false;
166 
167  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
168  return TargetMinStride <= AbsStride;
169 }
170 
174  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
180 
181  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
182  bool Changed = LDP.run();
183 
184  if (Changed) {
187  PA.preserve<LoopAnalysis>();
188  return PA;
189  }
190 
191  return PreservedAnalyses::all();
192 }
193 
195  if (skipFunction(F))
196  return false;
197 
198  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
199  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
200  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
201  AssumptionCache *AC =
202  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
204  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
205  const TargetTransformInfo *TTI =
206  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
207 
208  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
209  return LDP.run();
210 }
211 
212 bool LoopDataPrefetch::run() {
213  // If PrefetchDistance is not set, don't run the pass. This gives an
214  // opportunity for targets to run this pass for selected subtargets only
215  // (whose TTI sets PrefetchDistance).
216  if (getPrefetchDistance() == 0)
217  return false;
218  assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
219 
220  bool MadeChange = false;
221 
222  for (Loop *I : *LI)
223  for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
224  MadeChange |= runOnLoop(*L);
225 
226  return MadeChange;
227 }
228 
229 /// A record for a potential prefetch made during the initial scan of the
230 /// loop. This is used to let a single prefetch target multiple memory accesses.
231 struct Prefetch {
232  /// The address formula for this prefetch as returned by ScalarEvolution.
234  /// The point of insertion for the prefetch instruction.
236  /// True if targeting a write memory access.
237  bool Writes;
238  /// The (first seen) prefetched instruction.
240 
241  /// Constructor to create a new Prefetch for \p I.
243  : LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) {
244  addInstruction(I);
245  };
246 
247  /// Add the instruction \param I to this prefetch. If it's not the first
248  /// one, 'InsertPt' and 'Writes' will be updated as required.
249  /// \param PtrDiff the known constant address difference to the first added
250  /// instruction.
252  int64_t PtrDiff = 0) {
253  if (!InsertPt) {
254  MemI = I;
255  InsertPt = I;
256  Writes = isa<StoreInst>(I);
257  } else {
258  BasicBlock *PrefBB = InsertPt->getParent();
259  BasicBlock *InsBB = I->getParent();
260  if (PrefBB != InsBB) {
261  BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
262  if (DomBB != PrefBB)
263  InsertPt = DomBB->getTerminator();
264  }
265 
266  if (isa<StoreInst>(I) && PtrDiff == 0)
267  Writes = true;
268  }
269  }
270 };
271 
272 bool LoopDataPrefetch::runOnLoop(Loop *L) {
273  bool MadeChange = false;
274 
275  // Only prefetch in the inner-most loop
276  if (!L->isInnermost())
277  return MadeChange;
278 
280  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
281 
282  // Calculate the number of iterations ahead to prefetch
284  bool HasCall = false;
285  for (const auto BB : L->blocks()) {
286  // If the loop already has prefetches, then assume that the user knows
287  // what they are doing and don't add any more.
288  for (auto &I : *BB) {
289  if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
290  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
291  if (F->getIntrinsicID() == Intrinsic::prefetch)
292  return MadeChange;
293  if (TTI->isLoweredToCall(F))
294  HasCall = true;
295  } else { // indirect call.
296  HasCall = true;
297  }
298  }
299  }
300  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
301  }
302  unsigned LoopSize = Metrics.NumInsts;
303  if (!LoopSize)
304  LoopSize = 1;
305 
306  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
307  if (!ItersAhead)
308  ItersAhead = 1;
309 
310  if (ItersAhead > getMaxPrefetchIterationsAhead())
311  return MadeChange;
312 
313  unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
314  if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
315  return MadeChange;
316 
317  unsigned NumMemAccesses = 0;
318  unsigned NumStridedMemAccesses = 0;
319  SmallVector<Prefetch, 16> Prefetches;
320  for (const auto BB : L->blocks())
321  for (auto &I : *BB) {
322  Value *PtrValue;
323  Instruction *MemI;
324 
325  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
326  MemI = LMemI;
327  PtrValue = LMemI->getPointerOperand();
328  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
329  if (!doPrefetchWrites()) continue;
330  MemI = SMemI;
331  PtrValue = SMemI->getPointerOperand();
332  } else continue;
333 
334  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
335  if (PtrAddrSpace)
336  continue;
337  NumMemAccesses++;
338  if (L->isLoopInvariant(PtrValue))
339  continue;
340 
341  const SCEV *LSCEV = SE->getSCEV(PtrValue);
342  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
343  if (!LSCEVAddRec)
344  continue;
345  NumStridedMemAccesses++;
346 
347  // We don't want to double prefetch individual cache lines. If this
348  // access is known to be within one cache line of some other one that
349  // has already been prefetched, then don't prefetch this one as well.
350  bool DupPref = false;
351  for (auto &Pref : Prefetches) {
352  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
353  if (const SCEVConstant *ConstPtrDiff =
354  dyn_cast<SCEVConstant>(PtrDiff)) {
355  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
356  if (PD < (int64_t) TTI->getCacheLineSize()) {
357  Pref.addInstruction(MemI, DT, PD);
358  DupPref = true;
359  break;
360  }
361  }
362  }
363  if (!DupPref)
364  Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
365  }
366 
367  unsigned TargetMinStride =
368  getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
369  Prefetches.size(), HasCall);
370 
371  LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
372  << " iterations ahead (loop size: " << LoopSize << ") in "
373  << L->getHeader()->getParent()->getName() << ": " << *L);
374  LLVM_DEBUG(dbgs() << "Loop has: "
375  << NumMemAccesses << " memory accesses, "
376  << NumStridedMemAccesses << " strided memory accesses, "
377  << Prefetches.size() << " potential prefetch(es), "
378  << "a minimum stride of " << TargetMinStride << ", "
379  << (HasCall ? "calls" : "no calls") << ".\n");
380 
381  for (auto &P : Prefetches) {
382  // Check if the stride of the accesses is large enough to warrant a
383  // prefetch.
384  if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
385  continue;
386 
387  const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
388  SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
389  P.LSCEVAddRec->getStepRecurrence(*SE)));
390  if (!isSafeToExpand(NextLSCEV, *SE))
391  continue;
392 
393  BasicBlock *BB = P.InsertPt->getParent();
394  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/);
395  SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
396  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
397 
398  IRBuilder<> Builder(P.InsertPt);
399  Module *M = BB->getParent()->getParent();
400  Type *I32 = Type::getInt32Ty(BB->getContext());
401  Function *PrefetchFunc = Intrinsic::getDeclaration(
402  M, Intrinsic::prefetch, PrefPtrValue->getType());
403  Builder.CreateCall(
404  PrefetchFunc,
405  {PrefPtrValue,
406  ConstantInt::get(I32, P.Writes),
407  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
408  ++NumPrefetches;
409  LLVM_DEBUG(dbgs() << " Access: "
410  << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
411  << ", SCEV: " << *P.LSCEVAddRec << "\n");
412  ORE->emit([&]() {
413  return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
414  << "prefetched memory access";
415  });
416 
417  MadeChange = true;
418  }
419 
420  return MadeChange;
421 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2320
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2105
llvm
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::wasm::ValType::I32
@ I32
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
ValueMapper.h
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1329
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:249
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
ScalarEvolutionExpander.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
Scalar.h
MinPrefetchStride
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::X86II::PD
@ PD
Definition: X86BaseInfo.h:782
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
Statistic.h
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:728
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::df_end
df_iterator< T > df_end(const T &G)
Definition: DepthFirstIterator.h:223
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
Module.h
llvm::AArch64ISD::LDP
@ LDP
Definition: AArch64ISelLowering.h:439
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1258
llvm::initializeLoopDataPrefetchLegacyPassPass
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
Prefetch
loop data Loop Data Prefetch
Definition: LoopDataPrefetch.cpp:149
llvm::Data
@ Data
Definition: SIMachineScheduler.h:56
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:197
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
MaxPrefetchIterationsAhead
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2911
llvm::isSafeToExpand
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2699
CommandLine.h
CodeMetrics.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Prefetch::MemI
Instruction * MemI
The (first seen) prefetched instruction.
Definition: LoopDataPrefetch.cpp:239
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
Prefetch
A record for a potential prefetch made during the initial scan of the loop.
Definition: LoopDataPrefetch.cpp:231
false
Definition: StackSlotColoring.cpp:142
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:249
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:898
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:635
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2135
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:117
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3975
PrefetchDistance
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopDataPrefetch.cpp:36
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2376
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
PrefetchWrites
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:7059
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
llvm::df_begin
df_iterator< T > df_begin(const T &G)
Definition: DepthFirstIterator.h:218
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Prefetch::InsertPt
Instruction * InsertPt
The point of insertion for the prefetch instruction.
Definition: LoopDataPrefetch.cpp:235
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopDataPrefetchPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: LoopDataPrefetch.cpp:171
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:275
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
prefetch
loop data prefetch
Definition: LoopDataPrefetch.cpp:148
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Prefetch::Writes
bool Writes
True if targeting a write memory access.
Definition: LoopDataPrefetch.cpp:237
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
Prefetch::LSCEVAddRec
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Definition: LoopDataPrefetch.cpp:233
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:646
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:294
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
Prefetch::Prefetch
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Definition: LoopDataPrefetch.cpp:242
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:621
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Definition: ScalarEvolution.cpp:4076
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
ScalarEvolutionExpressions.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
Prefetch::addInstruction
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
Definition: LoopDataPrefetch.cpp:251
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:639
TargetTransformInfo.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2395
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:414
BasicBlockUtils.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1284
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:650
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
LoopDataPrefetch.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:369
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1233
llvm::createLoopDataPrefetchPass
FunctionPass * createLoopDataPrefetchPass()
Definition: LoopDataPrefetch.cpp:151
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38