LLVM  15.0.0git
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a Loop Data Prefetching Pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/InitializePasses.h"
15 
17 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/Module.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Transforms/Scalar.h"
31 #include "llvm/Transforms/Utils.h"
33 
34 #define DEBUG_TYPE "loop-data-prefetch"
35 
36 using namespace llvm;
37 
38 // By default, we limit this to creating 16 PHIs (which is a little over half
39 // of the allocatable register set).
40 static cl::opt<bool>
41 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
42  cl::desc("Prefetch write addresses"));
43 
44 static cl::opt<unsigned>
45  PrefetchDistance("prefetch-distance",
46  cl::desc("Number of instructions to prefetch ahead"),
47  cl::Hidden);
48 
49 static cl::opt<unsigned>
50  MinPrefetchStride("min-prefetch-stride",
51  cl::desc("Min stride to add prefetches"), cl::Hidden);
52 
54  "max-prefetch-iters-ahead",
55  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
56 
57 STATISTIC(NumPrefetches, "Number of prefetches inserted");
58 
59 namespace {
60 
61 /// Loop prefetch implementation class.
62 class LoopDataPrefetch {
63 public:
64  LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
67  : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
68 
69  bool run();
70 
71 private:
72  bool runOnLoop(Loop *L);
73 
74  /// Check if the stride of the accesses is large enough to
75  /// warrant a prefetch.
76  bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
77 
78  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
79  unsigned NumStridedMemAccesses,
80  unsigned NumPrefetches,
81  bool HasCall) {
82  if (MinPrefetchStride.getNumOccurrences() > 0)
83  return MinPrefetchStride;
84  return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
85  NumPrefetches, HasCall);
86  }
87 
88  unsigned getPrefetchDistance() {
89  if (PrefetchDistance.getNumOccurrences() > 0)
90  return PrefetchDistance;
91  return TTI->getPrefetchDistance();
92  }
93 
94  unsigned getMaxPrefetchIterationsAhead() {
95  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
98  }
99 
100  bool doPrefetchWrites() {
102  return PrefetchWrites;
103  return TTI->enableWritePrefetching();
104  }
105 
106  AssumptionCache *AC;
107  DominatorTree *DT;
108  LoopInfo *LI;
109  ScalarEvolution *SE;
110  const TargetTransformInfo *TTI;
112 };
113 
114 /// Legacy class for inserting loop data prefetches.
115 class LoopDataPrefetchLegacyPass : public FunctionPass {
116 public:
117  static char ID; // Pass ID, replacement for typeid
118  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
120  }
121 
122  void getAnalysisUsage(AnalysisUsage &AU) const override {
134  }
135 
136  bool runOnFunction(Function &F) override;
137  };
138 }
139 
141 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
142  "Loop Data Prefetch", false, false)
146 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
149 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
151 
153  return new LoopDataPrefetchLegacyPass();
154 }
155 
156 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
157  unsigned TargetMinStride) {
158  // No need to check if any stride goes.
159  if (TargetMinStride <= 1)
160  return true;
161 
162  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
163  // If MinStride is set, don't prefetch unless we can ensure that stride is
164  // larger.
165  if (!ConstStride)
166  return false;
167 
168  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
169  return TargetMinStride <= AbsStride;
170 }
171 
175  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
181 
182  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
183  bool Changed = LDP.run();
184 
185  if (Changed) {
188  PA.preserve<LoopAnalysis>();
189  return PA;
190  }
191 
192  return PreservedAnalyses::all();
193 }
194 
196  if (skipFunction(F))
197  return false;
198 
199  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
200  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
201  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
202  AssumptionCache *AC =
203  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
205  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
206  const TargetTransformInfo *TTI =
207  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
208 
209  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
210  return LDP.run();
211 }
212 
213 bool LoopDataPrefetch::run() {
214  // If PrefetchDistance is not set, don't run the pass. This gives an
215  // opportunity for targets to run this pass for selected subtargets only
216  // (whose TTI sets PrefetchDistance).
217  if (getPrefetchDistance() == 0)
218  return false;
219  assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
220 
221  bool MadeChange = false;
222 
223  for (Loop *I : *LI)
224  for (Loop *L : depth_first(I))
225  MadeChange |= runOnLoop(L);
226 
227  return MadeChange;
228 }
229 
230 /// A record for a potential prefetch made during the initial scan of the
231 /// loop. This is used to let a single prefetch target multiple memory accesses.
232 struct Prefetch {
233  /// The address formula for this prefetch as returned by ScalarEvolution.
235  /// The point of insertion for the prefetch instruction.
236  Instruction *InsertPt = nullptr;
237  /// True if targeting a write memory access.
238  bool Writes = false;
239  /// The (first seen) prefetched instruction.
240  Instruction *MemI = nullptr;
241 
242  /// Constructor to create a new Prefetch for \p I.
243  Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
244  addInstruction(I);
245  };
246 
247  /// Add the instruction \param I to this prefetch. If it's not the first
248  /// one, 'InsertPt' and 'Writes' will be updated as required.
249  /// \param PtrDiff the known constant address difference to the first added
250  /// instruction.
252  int64_t PtrDiff = 0) {
253  if (!InsertPt) {
254  MemI = I;
255  InsertPt = I;
256  Writes = isa<StoreInst>(I);
257  } else {
258  BasicBlock *PrefBB = InsertPt->getParent();
259  BasicBlock *InsBB = I->getParent();
260  if (PrefBB != InsBB) {
261  BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
262  if (DomBB != PrefBB)
263  InsertPt = DomBB->getTerminator();
264  }
265 
266  if (isa<StoreInst>(I) && PtrDiff == 0)
267  Writes = true;
268  }
269  }
270 };
271 
272 bool LoopDataPrefetch::runOnLoop(Loop *L) {
273  bool MadeChange = false;
274 
275  // Only prefetch in the inner-most loop
276  if (!L->isInnermost())
277  return MadeChange;
278 
280  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
281 
282  // Calculate the number of iterations ahead to prefetch
284  bool HasCall = false;
285  for (const auto BB : L->blocks()) {
286  // If the loop already has prefetches, then assume that the user knows
287  // what they are doing and don't add any more.
288  for (auto &I : *BB) {
289  if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
290  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
291  if (F->getIntrinsicID() == Intrinsic::prefetch)
292  return MadeChange;
293  if (TTI->isLoweredToCall(F))
294  HasCall = true;
295  } else { // indirect call.
296  HasCall = true;
297  }
298  }
299  }
300  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
301  }
302 
303  if (!Metrics.NumInsts.isValid())
304  return MadeChange;
305 
306  unsigned LoopSize = *Metrics.NumInsts.getValue();
307  if (!LoopSize)
308  LoopSize = 1;
309 
310  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
311  if (!ItersAhead)
312  ItersAhead = 1;
313 
314  if (ItersAhead > getMaxPrefetchIterationsAhead())
315  return MadeChange;
316 
317  unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
318  if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
319  return MadeChange;
320 
321  unsigned NumMemAccesses = 0;
322  unsigned NumStridedMemAccesses = 0;
323  SmallVector<Prefetch, 16> Prefetches;
324  for (const auto BB : L->blocks())
325  for (auto &I : *BB) {
326  Value *PtrValue;
327  Instruction *MemI;
328 
329  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
330  MemI = LMemI;
331  PtrValue = LMemI->getPointerOperand();
332  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
333  if (!doPrefetchWrites()) continue;
334  MemI = SMemI;
335  PtrValue = SMemI->getPointerOperand();
336  } else continue;
337 
338  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
339  if (PtrAddrSpace)
340  continue;
341  NumMemAccesses++;
342  if (L->isLoopInvariant(PtrValue))
343  continue;
344 
345  const SCEV *LSCEV = SE->getSCEV(PtrValue);
346  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
347  if (!LSCEVAddRec)
348  continue;
349  NumStridedMemAccesses++;
350 
351  // We don't want to double prefetch individual cache lines. If this
352  // access is known to be within one cache line of some other one that
353  // has already been prefetched, then don't prefetch this one as well.
354  bool DupPref = false;
355  for (auto &Pref : Prefetches) {
356  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
357  if (const SCEVConstant *ConstPtrDiff =
358  dyn_cast<SCEVConstant>(PtrDiff)) {
359  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
360  if (PD < (int64_t) TTI->getCacheLineSize()) {
361  Pref.addInstruction(MemI, DT, PD);
362  DupPref = true;
363  break;
364  }
365  }
366  }
367  if (!DupPref)
368  Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
369  }
370 
371  unsigned TargetMinStride =
372  getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
373  Prefetches.size(), HasCall);
374 
375  LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
376  << " iterations ahead (loop size: " << LoopSize << ") in "
377  << L->getHeader()->getParent()->getName() << ": " << *L);
378  LLVM_DEBUG(dbgs() << "Loop has: "
379  << NumMemAccesses << " memory accesses, "
380  << NumStridedMemAccesses << " strided memory accesses, "
381  << Prefetches.size() << " potential prefetch(es), "
382  << "a minimum stride of " << TargetMinStride << ", "
383  << (HasCall ? "calls" : "no calls") << ".\n");
384 
385  for (auto &P : Prefetches) {
386  // Check if the stride of the accesses is large enough to warrant a
387  // prefetch.
388  if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
389  continue;
390 
391  const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
392  SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
393  P.LSCEVAddRec->getStepRecurrence(*SE)));
394  if (!isSafeToExpand(NextLSCEV, *SE))
395  continue;
396 
397  BasicBlock *BB = P.InsertPt->getParent();
398  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/);
399  SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
400  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
401 
402  IRBuilder<> Builder(P.InsertPt);
403  Module *M = BB->getParent()->getParent();
404  Type *I32 = Type::getInt32Ty(BB->getContext());
405  Function *PrefetchFunc = Intrinsic::getDeclaration(
406  M, Intrinsic::prefetch, PrefPtrValue->getType());
407  Builder.CreateCall(
408  PrefetchFunc,
409  {PrefPtrValue,
410  ConstantInt::get(I32, P.Writes),
411  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
412  ++NumPrefetches;
413  LLVM_DEBUG(dbgs() << " Access: "
414  << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
415  << ", SCEV: " << *P.LSCEVAddRec << "\n");
416  ORE->emit([&]() {
417  return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
418  << "prefetched memory access";
419  });
420 
421  MadeChange = true;
422  }
423 
424  return MadeChange;
425 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2479
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2115
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::isSafeToExpand
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode=true)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2616
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1418
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
ScalarEvolutionExpander.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Scalar.h
MinPrefetchStride
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
llvm::X86II::PD
@ PD
Definition: X86BaseInfo.h:787
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:31
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::AArch64ISD::LDP
@ LDP
Definition: AArch64ISelLowering.h:450
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1287
llvm::initializeLoopDataPrefetchLegacyPassPass
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
Prefetch
loop data Loop Data Prefetch
Definition: LoopDataPrefetch.cpp:150
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
MaxPrefetchIterationsAhead
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3050
CommandLine.h
CodeMetrics.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
Prefetch
A record for a potential prefetch made during the initial scan of the loop.
Definition: LoopDataPrefetch.cpp:232
false
Definition: StackSlotColoring.cpp:141
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:682
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2145
Utils.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
LoopInfo.h
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4406
PrefetchDistance
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopDataPrefetch.cpp:34
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
PrefetchWrites
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:7692
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:160
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopDataPrefetchPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: LoopDataPrefetch.cpp:172
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:284
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
prefetch
loop data prefetch
Definition: LoopDataPrefetch.cpp:149
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1102
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition: PassAnalysisSupport.h:88
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:461
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Prefetch::LSCEVAddRec
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Definition: LoopDataPrefetch.cpp:234
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:693
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
I32
@ I32
Definition: DXILOpLowering.cpp:40
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
Prefetch::Prefetch
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Definition: LoopDataPrefetch.cpp:243
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:667
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::LoopSimplifyID
char & LoopSimplifyID
Definition: LoopSimplify.cpp:794
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4523
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
ScalarEvolutionExpressions.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
Prefetch::addInstruction
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
Definition: LoopDataPrefetch.cpp:251
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:686
TargetTransformInfo.h
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2453
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::AnalysisUsage::addRequiredID
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:277
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:405
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:697
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
LoopDataPrefetch.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1262
llvm::createLoopDataPrefetchPass
FunctionPass * createLoopDataPrefetchPass()
Definition: LoopDataPrefetch.cpp:152
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38