LLVM  13.0.0git
AMDGPUPerfHintAnalysis.cpp
Go to the documentation of this file.
1 //===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes if a function potentially memory bound and if a kernel
11 /// kernel may benefit from limiting number of waves to reduce cache thrashing.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUPerfHintAnalysis.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/IR/Instructions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "amdgpu-perf-hint"
32 
33 static cl::opt<unsigned>
34  MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden,
35  cl::desc("Function mem bound threshold in %"));
36 
37 static cl::opt<unsigned>
38  LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden,
39  cl::desc("Kernel limit wave threshold in %"));
40 
41 static cl::opt<unsigned>
42  IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden,
43  cl::desc("Indirect access memory instruction weight"));
44 
45 static cl::opt<unsigned>
46  LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden,
47  cl::desc("Large stride memory access weight"));
48 
49 static cl::opt<unsigned>
50  LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden,
51  cl::desc("Large stride memory access threshold"));
52 
53 STATISTIC(NumMemBound, "Number of functions marked as memory bound");
54 STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
55 
58 
60  "Analysis if a function is memory bound", true, true)
61 
62 namespace {
63 
64 struct AMDGPUPerfHint {
66 
67 public:
68  AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
69  const TargetLowering *TLI_)
70  : FIM(FIM_), DL(nullptr), TLI(TLI_) {}
71 
72  bool runOnFunction(Function &F);
73 
74 private:
75  struct MemAccessInfo {
76  const Value *V;
77  const Value *Base;
78  int64_t Offset;
79  MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {}
80  bool isLargeStride(MemAccessInfo &Reference) const;
81 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
82  Printable print() const {
83  return Printable([this](raw_ostream &OS) {
84  OS << "Value: " << *V << '\n'
85  << "Base: " << *Base << " Offset: " << Offset << '\n';
86  });
87  }
88 #endif
89  };
90 
91  MemAccessInfo makeMemAccessInfo(Instruction *) const;
92 
93  MemAccessInfo LastAccess; // Last memory access info
94 
96 
97  const DataLayout *DL;
98 
99  const TargetLowering *TLI;
100 
102  static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
103  static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
104 
105  bool isIndirectAccess(const Instruction *Inst) const;
106 
107  /// Check if the instruction is large stride.
108  /// The purpose is to identify memory access pattern like:
109  /// x = a[i];
110  /// y = a[i+1000];
111  /// z = a[i+2000];
112  /// In the above example, the second and third memory access will be marked
113  /// large stride memory access.
114  bool isLargeStride(const Instruction *Inst);
115 
116  bool isGlobalAddr(const Value *V) const;
117  bool isLocalAddr(const Value *V) const;
118  bool isConstantAddr(const Value *V) const;
119 };
120 
121 static const Value *getMemoryInstrPtr(const Instruction *Inst) {
122  if (auto LI = dyn_cast<LoadInst>(Inst)) {
123  return LI->getPointerOperand();
124  }
125  if (auto SI = dyn_cast<StoreInst>(Inst)) {
126  return SI->getPointerOperand();
127  }
128  if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
129  return AI->getPointerOperand();
130  }
131  if (auto AI = dyn_cast<AtomicRMWInst>(Inst)) {
132  return AI->getPointerOperand();
133  }
134  if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
135  return MI->getRawDest();
136  }
137 
138  return nullptr;
139 }
140 
141 bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
142  LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');
145  if (const Value *MO = getMemoryInstrPtr(Inst)) {
146  if (isGlobalAddr(MO))
147  WorkSet.insert(MO);
148  }
149 
150  while (!WorkSet.empty()) {
151  const Value *V = *WorkSet.begin();
152  WorkSet.erase(*WorkSet.begin());
153  if (!Visited.insert(V).second)
154  continue;
155  LLVM_DEBUG(dbgs() << " check: " << *V << '\n');
156 
157  if (auto LD = dyn_cast<LoadInst>(V)) {
158  auto M = LD->getPointerOperand();
159  if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) {
160  LLVM_DEBUG(dbgs() << " is IA\n");
161  return true;
162  }
163  continue;
164  }
165 
166  if (auto GEP = dyn_cast<GetElementPtrInst>(V)) {
167  auto P = GEP->getPointerOperand();
168  WorkSet.insert(P);
169  for (unsigned I = 1, E = GEP->getNumIndices() + 1; I != E; ++I)
170  WorkSet.insert(GEP->getOperand(I));
171  continue;
172  }
173 
174  if (auto U = dyn_cast<UnaryInstruction>(V)) {
175  WorkSet.insert(U->getOperand(0));
176  continue;
177  }
178 
179  if (auto BO = dyn_cast<BinaryOperator>(V)) {
180  WorkSet.insert(BO->getOperand(0));
181  WorkSet.insert(BO->getOperand(1));
182  continue;
183  }
184 
185  if (auto S = dyn_cast<SelectInst>(V)) {
186  WorkSet.insert(S->getFalseValue());
187  WorkSet.insert(S->getTrueValue());
188  continue;
189  }
190 
191  if (auto E = dyn_cast<ExtractElementInst>(V)) {
192  WorkSet.insert(E->getVectorOperand());
193  continue;
194  }
195 
196  LLVM_DEBUG(dbgs() << " dropped\n");
197  }
198 
199  LLVM_DEBUG(dbgs() << " is not IA\n");
200  return false;
201 }
202 
203 AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
205 
206  LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
207 
208  for (auto &B : F) {
209  LastAccess = MemAccessInfo();
210  for (auto &I : B) {
211  if (getMemoryInstrPtr(&I)) {
212  if (isIndirectAccess(&I))
213  ++FI.IAMInstCount;
214  if (isLargeStride(&I))
215  ++FI.LSMInstCount;
216  ++FI.MemInstCount;
217  ++FI.InstCount;
218  continue;
219  }
220  if (auto *CB = dyn_cast<CallBase>(&I)) {
221  Function *Callee = CB->getCalledFunction();
222  if (!Callee || Callee->isDeclaration()) {
223  ++FI.InstCount;
224  continue;
225  }
226  if (&F == Callee) // Handle immediate recursion
227  continue;
228 
229  auto Loc = FIM.find(Callee);
230  if (Loc == FIM.end())
231  continue;
232 
233  FI.MemInstCount += Loc->second.MemInstCount;
234  FI.InstCount += Loc->second.InstCount;
235  FI.IAMInstCount += Loc->second.IAMInstCount;
236  FI.LSMInstCount += Loc->second.LSMInstCount;
237  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
239  auto *Ptr = GetPointerBaseWithConstantOffset(GEP, AM.BaseOffs, *DL);
240  AM.BaseGV = dyn_cast_or_null<GlobalValue>(const_cast<Value *>(Ptr));
241  AM.HasBaseReg = !AM.BaseGV;
242  if (TLI->isLegalAddressingMode(*DL, AM, GEP->getResultElementType(),
243  GEP->getPointerAddressSpace()))
244  // Offset will likely be folded into load or store
245  continue;
246  ++FI.InstCount;
247  } else {
248  ++FI.InstCount;
249  }
250  }
251  }
252 
253  return &FI;
254 }
255 
257  const Module &M = *F.getParent();
258  DL = &M.getDataLayout();
259 
260  if (F.hasFnAttribute("amdgpu-wave-limiter") &&
261  F.hasFnAttribute("amdgpu-memory-bound"))
262  return false;
263 
264  const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
265 
266  LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
267  << '\n'
268  << " IAMInst: " << Info->IAMInstCount << '\n'
269  << " LSMInst: " << Info->LSMInstCount << '\n'
270  << " TotalInst: " << Info->InstCount << '\n');
271 
272  if (isMemBound(*Info)) {
273  LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
274  NumMemBound++;
275  F.addFnAttr("amdgpu-memory-bound", "true");
276  }
277 
278  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
279  LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
280  NumLimitWave++;
281  F.addFnAttr("amdgpu-wave-limiter", "true");
282  }
283 
284  return true;
285 }
286 
287 bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
288  return FI.MemInstCount * 100 / FI.InstCount > MemBoundThresh;
289 }
290 
291 bool AMDGPUPerfHint::needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
292  return ((FI.MemInstCount + FI.IAMInstCount * IAWeight +
293  FI.LSMInstCount * LSWeight) *
294  100 / FI.InstCount) > LimitWaveThresh;
295 }
296 
297 bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
298  if (auto PT = dyn_cast<PointerType>(V->getType())) {
299  unsigned As = PT->getAddressSpace();
300  // Flat likely points to global too.
301  return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
302  }
303  return false;
304 }
305 
306 bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
307  if (auto PT = dyn_cast<PointerType>(V->getType()))
308  return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
309  return false;
310 }
311 
312 bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {
313  LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n');
314 
315  MemAccessInfo MAI = makeMemAccessInfo(const_cast<Instruction *>(Inst));
316  bool IsLargeStride = MAI.isLargeStride(LastAccess);
317  if (MAI.Base)
318  LastAccess = std::move(MAI);
319 
320  return IsLargeStride;
321 }
322 
323 AMDGPUPerfHint::MemAccessInfo
324 AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
325  MemAccessInfo MAI;
326  const Value *MO = getMemoryInstrPtr(Inst);
327 
328  LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');
329  // Do not treat local-addr memory access as large stride.
330  if (isLocalAddr(MO))
331  return MAI;
332 
333  MAI.V = MO;
334  MAI.Base = GetPointerBaseWithConstantOffset(MO, MAI.Offset, *DL);
335  return MAI;
336 }
337 
338 bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
339  if (auto PT = dyn_cast<PointerType>(V->getType())) {
340  unsigned As = PT->getAddressSpace();
341  return As == AMDGPUAS::CONSTANT_ADDRESS ||
343  }
344  return false;
345 }
346 
347 bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
348  MemAccessInfo &Reference) const {
349 
350  if (!Base || !Reference.Base || Base != Reference.Base)
351  return false;
352 
353  uint64_t Diff = Offset > Reference.Offset ? Offset - Reference.Offset
354  : Reference.Offset - Offset;
355  bool Result = Diff > LargeStrideThresh;
356  LLVM_DEBUG(dbgs() << "[isLargeStride compare]\n"
357  << print() << "<=>\n"
358  << Reference.print() << "Result:" << Result << '\n');
359  return Result;
360 }
361 } // namespace
362 
364  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
365  if (!TPC)
366  return false;
367 
368  const TargetMachine &TM = TPC->getTM<TargetMachine>();
369 
370  bool Changed = false;
371  for (CallGraphNode *I : SCC) {
372  Function *F = I->getFunction();
373  if (!F || F->isDeclaration())
374  continue;
375 
376  const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
377  AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
378 
379  if (Analyzer.runOnFunction(*F))
380  Changed = true;
381  }
382 
383  return Changed;
384 }
385 
387  auto FI = FIM.find(F);
388  if (FI == FIM.end())
389  return false;
390 
391  return AMDGPUPerfHint::isMemBound(FI->second);
392 }
393 
395  auto FI = FIM.find(F);
396  if (FI == FIM.end())
397  return false;
398 
399  return AMDGPUPerfHint::needLimitWave(FI->second);
400 }
llvm::AMDGPUPerfHintAnalysis::isMemoryBound
bool isMemoryBound(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:386
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm
Definition: AllocatorList.h:23
LargeStrideThresh
static cl::opt< unsigned > LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden, cl::desc("Large stride memory access threshold"))
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AMDGPUPerfHintAnalysis::FuncInfo::MemInstCount
unsigned MemInstCount
Definition: AMDGPUPerfHintAnalysis.h:40
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::AMDGPUPerfHintAnalysis::runOnSCC
bool runOnSCC(CallGraphSCC &SCC) override
runOnSCC - This method should be implemented by the subclass to perform whatever action is necessary ...
Definition: AMDGPUPerfHintAnalysis.cpp:363
LimitWaveThresh
static cl::opt< unsigned > LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden, cl::desc("Kernel limit wave threshold in %"))
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
llvm::Function
Definition: Function.h:61
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
IAWeight
static cl::opt< unsigned > IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden, cl::desc("Indirect access memory instruction weight"))
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
Statistic.h
LSWeight
static cl::opt< unsigned > LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden, cl::desc("Large stride memory access weight"))
llvm::AMDGPUPerfHintAnalysis::FuncInfo::LSMInstCount
unsigned LSMInstCount
Definition: AMDGPUPerfHintAnalysis.h:43
ValueTracking.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::AMDGPUPerfHintAnalysis::FuncInfo::IAMInstCount
unsigned IAMInstCount
Definition: AMDGPUPerfHintAnalysis.h:42
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::AMDGPUPerfHintAnalysis::needsWaveLimiter
bool needsWaveLimiter(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:394
llvm::AMDGPUPerfHintAnalysis::FuncInfo::InstCount
unsigned InstCount
Definition: AMDGPUPerfHintAnalysis.h:41
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPUPerfHintAnalysis::FuncInfo
Definition: AMDGPUPerfHintAnalysis.h:39
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
CommandLine.h
TargetLowering.h
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:367
llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:87
TargetMachine.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2332
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3150
llvm::AMDGPUPerfHintAnalysis::ID
static char ID
Definition: AMDGPUPerfHintAnalysis.h:24
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:360
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:45
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1370
llvm::cl::opt
Definition: CommandLine.h:1422
AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
MemBoundThresh
static cl::opt< unsigned > MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden, cl::desc("Function mem bound threshold in %"))
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
TargetPassConfig.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:223
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE, "Analysis if a function is memory bound", true, true) namespace
Definition: AMDGPUPerfHintAnalysis.cpp:59
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::SmallSet::erase
bool erase(const T &V)
Definition: SmallSet.h:207
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:285
AMDGPU.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2330
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::ValueMap< const Function *, FuncInfo >
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:205
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2331
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:364
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:279
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
CallGraph.h
Instructions.h
llvm::Printable
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:37
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPerfHintAnalysis.cpp:31
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2329
llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:359
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:363
llvm::cl::desc
Definition: CommandLine.h:414
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:57
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
AMDGPUBaseInfo.h
SmallSet.h