Go to the documentation of this file.
34 #define DEBUG_TYPE "loop-data-prefetch"
42 cl::desc(
"Prefetch write addresses"));
46 cl::desc(
"Number of instructions to prefetch ahead"),
54 "max-prefetch-iters-ahead",
57 STATISTIC(NumPrefetches,
"Number of prefetches inserted");
62 class LoopDataPrefetch {
67 : AC(AC), DT(DT), LI(LI), SE(SE),
TTI(
TTI), ORE(ORE) {}
72 bool runOnLoop(
Loop *L);
76 bool isStrideLargeEnough(
const SCEVAddRecExpr *AR,
unsigned TargetMinStride);
78 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
79 unsigned NumStridedMemAccesses,
80 unsigned NumPrefetches,
85 NumPrefetches, HasCall);
88 unsigned getPrefetchDistance() {
94 unsigned getMaxPrefetchIterationsAhead() {
100 bool doPrefetchWrites() {
115 class LoopDataPrefetchLegacyPass :
public FunctionPass {
142 "Loop Data Prefetch",
false,
false)
153 return new LoopDataPrefetchLegacyPass();
156 bool LoopDataPrefetch::isStrideLargeEnough(
const SCEVAddRecExpr *AR,
157 unsigned TargetMinStride) {
159 if (TargetMinStride <= 1)
168 unsigned AbsStride =
std::abs(ConstStride->getAPInt().getSExtValue());
169 return TargetMinStride <= AbsStride;
182 LoopDataPrefetch
LDP(AC, DT, LI, SE,
TTI, ORE);
183 bool Changed =
LDP.run();
199 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
200 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
201 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
203 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
205 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
207 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
209 LoopDataPrefetch
LDP(AC, DT, LI, SE,
TTI, ORE);
218 LLVM_DEBUG(
dbgs() <<
"Please set both PrefetchDistance and CacheLineSize "
219 "for loop data prefetch.\n");
223 bool MadeChange =
false;
227 MadeChange |= runOnLoop(L);
254 int64_t PtrDiff = 0) {
258 Writes = isa<StoreInst>(
I);
262 if (PrefBB != InsBB) {
268 if (isa<StoreInst>(
I) && PtrDiff == 0)
274 bool LoopDataPrefetch::runOnLoop(
Loop *L) {
275 bool MadeChange =
false;
286 bool HasCall =
false;
290 for (
auto &
I : *
BB) {
291 if (isa<CallInst>(&
I) || isa<InvokeInst>(&
I)) {
305 if (!
Metrics.NumInsts.isValid())
308 unsigned LoopSize = *
Metrics.NumInsts.getValue();
312 unsigned ItersAhead = getPrefetchDistance() / LoopSize;
316 if (ItersAhead > getMaxPrefetchIterationsAhead())
320 if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
323 unsigned NumMemAccesses = 0;
324 unsigned NumStridedMemAccesses = 0;
327 for (
auto &
I : *
BB) {
331 if (
LoadInst *LMemI = dyn_cast<LoadInst>(&
I)) {
333 PtrValue = LMemI->getPointerOperand();
334 }
else if (
StoreInst *SMemI = dyn_cast<StoreInst>(&
I)) {
335 if (!doPrefetchWrites())
continue;
337 PtrValue = SMemI->getPointerOperand();
348 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
351 NumStridedMemAccesses++;
356 bool DupPref =
false;
357 for (
auto &Pref : Prefetches) {
360 dyn_cast<SCEVConstant>(PtrDiff)) {
361 int64_t
PD =
std::abs(ConstPtrDiff->getValue()->getSExtValue());
363 Pref.addInstruction(MemI, DT,
PD);
370 Prefetches.push_back(
Prefetch(LSCEVAddRec, MemI));
373 unsigned TargetMinStride =
374 getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
375 Prefetches.size(), HasCall);
378 <<
" iterations ahead (loop size: " << LoopSize <<
") in "
381 << NumMemAccesses <<
" memory accesses, "
382 << NumStridedMemAccesses <<
" strided memory accesses, "
383 << Prefetches.size() <<
" potential prefetch(es), "
384 <<
"a minimum stride of " << TargetMinStride <<
", "
385 << (HasCall ?
"calls" :
"no calls") <<
".\n");
387 for (
auto &
P : Prefetches) {
390 if (!isStrideLargeEnough(
P.LSCEVAddRec, TargetMinStride))
394 SCEVExpander SCEVE(*SE,
BB->getModule()->getDataLayout(),
"prefaddr");
397 P.LSCEVAddRec->getStepRecurrence(*SE)));
398 if (!SCEVE.isSafeToExpand(NextLSCEV))
403 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr,
P.InsertPt);
406 Module *
M =
BB->getParent()->getParent();
417 << *
P.MemI->getOperand(isa<LoadInst>(
P.MemI) ? 0 : 1)
418 <<
", SCEV: " << *
P.LSCEVAddRec <<
"\n");
421 <<
"prefetched memory access";
A set of analyses that are preserved following a run of a transformation pass.
Analysis pass providing the TargetTransformInfo.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Analysis pass that exposes the ScalarEvolution for a function.
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
Represents a single loop in the control flow graph.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
This class uses information about analyze scalars to rewrite expressions in canonical form.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
The main scalar evolution driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
The instances of the Type class are immutable: once they are created, they are never changed.
The legacy pass manager's analysis pass to compute loop information.
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
loop data Loop Data Prefetch
static IntegerType * getInt32Ty(LLVMContext &C)
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Represent the analysis usage information of a pass.
iterator_range< block_iterator > blocks() const
A record for a potential prefetch made during the initial scan of the loop.
Legacy analysis pass which computes a DominatorTree.
STATISTIC(NumFunctions, "Total number of functions")
int getNumOccurrences() const
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
This class represents an analyzed expression in the program.
An instruction for storing to memory.
A function analysis which provides an AssumptionCache.
void preserve()
Mark an analysis as preserved.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
initializer< Ty > init(const Ty &Val)
This class represents a constant integer value.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
A Module instance is used to store all the information related to an LLVM module.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
AnalysisUsage & addPreservedID(const void *ID)
const SCEV * getConstant(ConstantInt *V)
Type * getType() const
All values are typed, get the type of this value.
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
StringRef getName() const
Return a constant reference to the value's name.
An instruction for reading from memory.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
iterator_range< df_iterator< T > > depth_first(const T &G)
static bool runOnFunction(Function &F, bool PostInlining)
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
BlockT * getHeader() const
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Analysis pass which computes a DominatorTree.
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
const BasicBlock * getParent() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Type * getType() const
Return the LLVM type of this SCEV expression.
A container for analyses that lazily runs them and caches their results.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
FunctionPass class - This class is used to implement most global optimizations.
AnalysisUsage & addRequiredID(const void *ID)
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
APFloat abs(APFloat X)
Returns the absolute value of the argument.
LLVM Value Representation.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Analysis pass that exposes the LoopInfo for a function.
FunctionPass * createLoopDataPrefetchPass()