LLVM  16.0.0git
LoopDataPrefetch.cpp
Go to the documentation of this file.
1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a Loop Data Prefetching Pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/InitializePasses.h"
15 
17 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/Module.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Transforms/Scalar.h"
31 #include "llvm/Transforms/Utils.h"
33 
34 #define DEBUG_TYPE "loop-data-prefetch"
35 
36 using namespace llvm;
37 
38 // By default, we limit this to creating 16 PHIs (which is a little over half
39 // of the allocatable register set).
40 static cl::opt<bool>
41 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
42  cl::desc("Prefetch write addresses"));
43 
44 static cl::opt<unsigned>
45  PrefetchDistance("prefetch-distance",
46  cl::desc("Number of instructions to prefetch ahead"),
47  cl::Hidden);
48 
49 static cl::opt<unsigned>
50  MinPrefetchStride("min-prefetch-stride",
51  cl::desc("Min stride to add prefetches"), cl::Hidden);
52 
54  "max-prefetch-iters-ahead",
55  cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
56 
57 STATISTIC(NumPrefetches, "Number of prefetches inserted");
58 
59 namespace {
60 
61 /// Loop prefetch implementation class.
62 class LoopDataPrefetch {
63 public:
64  LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
67  : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
68 
69  bool run();
70 
71 private:
72  bool runOnLoop(Loop *L);
73 
74  /// Check if the stride of the accesses is large enough to
75  /// warrant a prefetch.
76  bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
77 
78  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
79  unsigned NumStridedMemAccesses,
80  unsigned NumPrefetches,
81  bool HasCall) {
82  if (MinPrefetchStride.getNumOccurrences() > 0)
83  return MinPrefetchStride;
84  return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
85  NumPrefetches, HasCall);
86  }
87 
88  unsigned getPrefetchDistance() {
89  if (PrefetchDistance.getNumOccurrences() > 0)
90  return PrefetchDistance;
91  return TTI->getPrefetchDistance();
92  }
93 
94  unsigned getMaxPrefetchIterationsAhead() {
95  if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
98  }
99 
100  bool doPrefetchWrites() {
102  return PrefetchWrites;
103  return TTI->enableWritePrefetching();
104  }
105 
106  AssumptionCache *AC;
107  DominatorTree *DT;
108  LoopInfo *LI;
109  ScalarEvolution *SE;
110  const TargetTransformInfo *TTI;
112 };
113 
114 /// Legacy class for inserting loop data prefetches.
115 class LoopDataPrefetchLegacyPass : public FunctionPass {
116 public:
117  static char ID; // Pass ID, replacement for typeid
118  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
120  }
121 
122  void getAnalysisUsage(AnalysisUsage &AU) const override {
134  }
135 
136  bool runOnFunction(Function &F) override;
137  };
138 }
139 
141 INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
142  "Loop Data Prefetch", false, false)
146 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
149 INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
151 
153  return new LoopDataPrefetchLegacyPass();
154 }
155 
156 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
157  unsigned TargetMinStride) {
158  // No need to check if any stride goes.
159  if (TargetMinStride <= 1)
160  return true;
161 
162  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
163  // If MinStride is set, don't prefetch unless we can ensure that stride is
164  // larger.
165  if (!ConstStride)
166  return false;
167 
168  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
169  return TargetMinStride <= AbsStride;
170 }
171 
175  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
181 
182  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
183  bool Changed = LDP.run();
184 
185  if (Changed) {
188  PA.preserve<LoopAnalysis>();
189  return PA;
190  }
191 
192  return PreservedAnalyses::all();
193 }
194 
196  if (skipFunction(F))
197  return false;
198 
199  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
200  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
201  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
202  AssumptionCache *AC =
203  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
205  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
206  const TargetTransformInfo *TTI =
207  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
208 
209  LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
210  return LDP.run();
211 }
212 
213 bool LoopDataPrefetch::run() {
214  // If PrefetchDistance is not set, don't run the pass. This gives an
215  // opportunity for targets to run this pass for selected subtargets only
216  // (whose TTI sets PrefetchDistance and CacheLineSize).
217  if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) {
218  LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "
219  "for loop data prefetch.\n");
220  return false;
221  }
222 
223  bool MadeChange = false;
224 
225  for (Loop *I : *LI)
226  for (Loop *L : depth_first(I))
227  MadeChange |= runOnLoop(L);
228 
229  return MadeChange;
230 }
231 
232 /// A record for a potential prefetch made during the initial scan of the
233 /// loop. This is used to let a single prefetch target multiple memory accesses.
234 struct Prefetch {
235  /// The address formula for this prefetch as returned by ScalarEvolution.
237  /// The point of insertion for the prefetch instruction.
238  Instruction *InsertPt = nullptr;
239  /// True if targeting a write memory access.
240  bool Writes = false;
241  /// The (first seen) prefetched instruction.
242  Instruction *MemI = nullptr;
243 
244  /// Constructor to create a new Prefetch for \p I.
245  Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
246  addInstruction(I);
247  };
248 
249  /// Add the instruction \param I to this prefetch. If it's not the first
250  /// one, 'InsertPt' and 'Writes' will be updated as required.
251  /// \param PtrDiff the known constant address difference to the first added
252  /// instruction.
254  int64_t PtrDiff = 0) {
255  if (!InsertPt) {
256  MemI = I;
257  InsertPt = I;
258  Writes = isa<StoreInst>(I);
259  } else {
260  BasicBlock *PrefBB = InsertPt->getParent();
261  BasicBlock *InsBB = I->getParent();
262  if (PrefBB != InsBB) {
263  BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
264  if (DomBB != PrefBB)
265  InsertPt = DomBB->getTerminator();
266  }
267 
268  if (isa<StoreInst>(I) && PtrDiff == 0)
269  Writes = true;
270  }
271  }
272 };
273 
274 bool LoopDataPrefetch::runOnLoop(Loop *L) {
275  bool MadeChange = false;
276 
277  // Only prefetch in the inner-most loop
278  if (!L->isInnermost())
279  return MadeChange;
280 
282  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
283 
284  // Calculate the number of iterations ahead to prefetch
286  bool HasCall = false;
287  for (const auto BB : L->blocks()) {
288  // If the loop already has prefetches, then assume that the user knows
289  // what they are doing and don't add any more.
290  for (auto &I : *BB) {
291  if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
292  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
293  if (F->getIntrinsicID() == Intrinsic::prefetch)
294  return MadeChange;
295  if (TTI->isLoweredToCall(F))
296  HasCall = true;
297  } else { // indirect call.
298  HasCall = true;
299  }
300  }
301  }
302  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
303  }
304 
305  if (!Metrics.NumInsts.isValid())
306  return MadeChange;
307 
308  unsigned LoopSize = *Metrics.NumInsts.getValue();
309  if (!LoopSize)
310  LoopSize = 1;
311 
312  unsigned ItersAhead = getPrefetchDistance() / LoopSize;
313  if (!ItersAhead)
314  ItersAhead = 1;
315 
316  if (ItersAhead > getMaxPrefetchIterationsAhead())
317  return MadeChange;
318 
319  unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
320  if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
321  return MadeChange;
322 
323  unsigned NumMemAccesses = 0;
324  unsigned NumStridedMemAccesses = 0;
325  SmallVector<Prefetch, 16> Prefetches;
326  for (const auto BB : L->blocks())
327  for (auto &I : *BB) {
328  Value *PtrValue;
329  Instruction *MemI;
330 
331  if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
332  MemI = LMemI;
333  PtrValue = LMemI->getPointerOperand();
334  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
335  if (!doPrefetchWrites()) continue;
336  MemI = SMemI;
337  PtrValue = SMemI->getPointerOperand();
338  } else continue;
339 
340  unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
341  if (!TTI->shouldPrefetchAddressSpace(PtrAddrSpace))
342  continue;
343  NumMemAccesses++;
344  if (L->isLoopInvariant(PtrValue))
345  continue;
346 
347  const SCEV *LSCEV = SE->getSCEV(PtrValue);
348  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
349  if (!LSCEVAddRec)
350  continue;
351  NumStridedMemAccesses++;
352 
353  // We don't want to double prefetch individual cache lines. If this
354  // access is known to be within one cache line of some other one that
355  // has already been prefetched, then don't prefetch this one as well.
356  bool DupPref = false;
357  for (auto &Pref : Prefetches) {
358  const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
359  if (const SCEVConstant *ConstPtrDiff =
360  dyn_cast<SCEVConstant>(PtrDiff)) {
361  int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
362  if (PD < (int64_t) TTI->getCacheLineSize()) {
363  Pref.addInstruction(MemI, DT, PD);
364  DupPref = true;
365  break;
366  }
367  }
368  }
369  if (!DupPref)
370  Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
371  }
372 
373  unsigned TargetMinStride =
374  getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
375  Prefetches.size(), HasCall);
376 
377  LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
378  << " iterations ahead (loop size: " << LoopSize << ") in "
379  << L->getHeader()->getParent()->getName() << ": " << *L);
380  LLVM_DEBUG(dbgs() << "Loop has: "
381  << NumMemAccesses << " memory accesses, "
382  << NumStridedMemAccesses << " strided memory accesses, "
383  << Prefetches.size() << " potential prefetch(es), "
384  << "a minimum stride of " << TargetMinStride << ", "
385  << (HasCall ? "calls" : "no calls") << ".\n");
386 
387  for (auto &P : Prefetches) {
388  // Check if the stride of the accesses is large enough to warrant a
389  // prefetch.
390  if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
391  continue;
392 
393  BasicBlock *BB = P.InsertPt->getParent();
394  SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
395  const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
396  SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
397  P.LSCEVAddRec->getStepRecurrence(*SE)));
398  if (!SCEVE.isSafeToExpand(NextLSCEV))
399  continue;
400 
401  unsigned PtrAddrSpace = NextLSCEV->getType()->getPointerAddressSpace();
402  Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
403  Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
404 
405  IRBuilder<> Builder(P.InsertPt);
406  Module *M = BB->getParent()->getParent();
407  Type *I32 = Type::getInt32Ty(BB->getContext());
408  Function *PrefetchFunc = Intrinsic::getDeclaration(
409  M, Intrinsic::prefetch, PrefPtrValue->getType());
410  Builder.CreateCall(
411  PrefetchFunc,
412  {PrefPtrValue,
413  ConstantInt::get(I32, P.Writes),
414  ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
415  ++NumPrefetches;
416  LLVM_DEBUG(dbgs() << " Access: "
417  << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
418  << ", SCEV: " << *P.LSCEVAddRec << "\n");
419  ORE->emit([&]() {
420  return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
421  << "prefetched memory access";
422  });
423 
424  MadeChange = true;
425  }
426 
427  return MadeChange;
428 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2570
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2143
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::wasm::ValType::I32
@ I32
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1421
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
ScalarEvolutionExpander.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Scalar.h
MinPrefetchStride
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:50
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
Statistic.h
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:31
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::AArch64ISD::LDP
@ LDP
Definition: AArch64ISelLowering.h:465
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1290
llvm::initializeLoopDataPrefetchLegacyPassPass
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
Prefetch
loop data Loop Data Prefetch
Definition: LoopDataPrefetch.cpp:150
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
MaxPrefetchIterationsAhead
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3084
CommandLine.h
CodeMetrics.h
llvm::X86II::PD
@ PD
Definition: X86BaseInfo.h:787
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
Prefetch
A record for a potential prefetch made during the initial scan of the loop.
Definition: LoopDataPrefetch.cpp:234
false
Definition: StackSlotColoring.cpp:141
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:403
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:697
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2173
Utils.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189
LoopInfo.h
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4436
PrefetchDistance
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopDataPrefetch.cpp:34
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2626
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
PrefetchWrites
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
llvm::ScalarEvolution::getSmallConstantMaxTripCount
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
Definition: ScalarEvolution.cpp:7959
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:154
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopDataPrefetchPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: LoopDataPrefetch.cpp:172
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:167
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:286
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
prefetch
loop data prefetch
Definition: LoopDataPrefetch.cpp:149
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1105
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition: PassAnalysisSupport.h:88
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:466
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Prefetch::LSCEVAddRec
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Definition: LoopDataPrefetch.cpp:236
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::TargetTransformInfo::shouldPrefetchAddressSpace
bool shouldPrefetchAddressSpace(unsigned AS) const
Definition: TargetTransformInfo.cpp:716
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:708
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
Prefetch::Prefetch
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
Definition: LoopDataPrefetch.cpp:245
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:682
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::LoopSimplifyID
char & LoopSimplifyID
Definition: LoopSimplify.cpp:794
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4544
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
ScalarEvolutionExpressions.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
Prefetch::addInstruction
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
Definition: LoopDataPrefetch.cpp:253
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:701
TargetTransformInfo.h
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:397
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2487
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::AnalysisUsage::addRequiredID
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:277
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:413
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1288
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:712
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
LoopDataPrefetch.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1265
llvm::createLoopDataPrefetchPass
FunctionPass * createLoopDataPrefetchPass()
Definition: LoopDataPrefetch.cpp:152
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38