LLVM  14.0.0git
AMDGPUResourceUsageAnalysis.cpp
Go to the documentation of this file.
1 //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes how many registers and other resources are used by
11 /// functions.
12 ///
13 /// The results of this analysis are used to fill the register usage, flat
14 /// usage, etc. into hardware registers.
15 ///
16 /// The analysis takes callees into account. E.g. if a function A that needs 10
17 /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18 /// will return 20.
19 /// It is assumed that an indirect call can go into any function except
20 /// hardware-entrypoints. Therefore the register usage of functions with
21 /// indirect calls is estimated as the maximum of all non-entrypoint functions
22 /// in the module.
23 ///
24 //===----------------------------------------------------------------------===//
25 
27 #include "AMDGPU.h"
28 #include "GCNSubtarget.h"
29 #include "SIMachineFunctionInfo.h"
33 
34 using namespace llvm;
35 using namespace llvm::AMDGPU;
36 
37 #define DEBUG_TYPE "amdgpu-resource-usage"
38 
41 
42 // We need to tell the runtime some amount ahead of time if we don't know the
43 // true stack size. Assume a smaller number if this is only due to dynamic /
44 // non-entry block allocas.
46  "amdgpu-assume-external-call-stack-size",
47  cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
48  cl::init(16384));
49 
51  "amdgpu-assume-dynamic-stack-object-size",
52  cl::desc("Assumed extra stack use if there are any "
53  "variable sized objects (in bytes)"),
54  cl::Hidden, cl::init(4096));
55 
57  "Function register usage analysis", true, true)
58 
59 static const Function *getCalleeFunction(const MachineOperand &Op) {
60  if (Op.isImm()) {
61  assert(Op.getImm() == 0);
62  return nullptr;
63  }
64 
65  return cast<Function>(Op.getGlobal());
66 }
67 
69  const SIInstrInfo &TII, unsigned Reg) {
70  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
71  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
72  return true;
73  }
74 
75  return false;
76 }
77 
79  const GCNSubtarget &ST) const {
80  return NumExplicitSGPR +
81  IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
82  ST.getTargetID().isXnackOnOrAny());
83 }
84 
86  const GCNSubtarget &ST) const {
87  if (ST.hasGFX90AInsts() && NumAGPR)
88  return alignTo(NumVGPR, 4) + NumAGPR;
89  return std::max(NumVGPR, NumAGPR);
90 }
91 
93  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
94  if (!TPC)
95  return false;
96 
97  const TargetMachine &TM = TPC->getTM<TargetMachine>();
98  bool HasIndirectCall = false;
99 
100  for (CallGraphNode *I : SCC) {
101  Function *F = I->getFunction();
102  if (!F || F->isDeclaration())
103  continue;
104 
105  MachineModuleInfo &MMI =
106  getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
108 
109  auto CI = CallGraphResourceInfo.insert(
110  std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
111  SIFunctionResourceInfo &Info = CI.first->second;
112  assert(CI.second && "should only be called once per function");
113  Info = analyzeResourceUsage(MF, TM);
114  HasIndirectCall |= Info.HasIndirectCall;
115  }
116 
117  if (HasIndirectCall)
118  propagateIndirectCallRegisterUsage();
119 
120  return false;
121 }
122 
124 AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
125  const MachineFunction &MF, const TargetMachine &TM) const {
126  SIFunctionResourceInfo Info;
127 
129  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
130  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
131  const MachineRegisterInfo &MRI = MF.getRegInfo();
132  const SIInstrInfo *TII = ST.getInstrInfo();
133  const SIRegisterInfo &TRI = TII->getRegisterInfo();
134 
135  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
136  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
139 
140  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
141  // instructions aren't used to access the scratch buffer. Inline assembly may
142  // need it though.
143  //
144  // If we only have implicit uses of flat_scr on flat instructions, it is not
145  // really needed.
146  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
147  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
148  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
149  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
150  Info.UsesFlatScratch = false;
151  }
152 
153  Info.PrivateSegmentSize = FrameInfo.getStackSize();
154 
155  // Assume a big number if there are any unknown sized objects.
156  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
157  if (Info.HasDynamicallySizedStack)
158  Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
159 
160  if (MFI->isStackRealigned())
161  Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
162 
163  Info.UsesVCC =
164  MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
165 
166  // If there are no calls, MachineRegisterInfo can tell us the used register
167  // count easily.
168  // A tail call isn't considered a call for MachineFrameInfo's purposes.
169  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
170  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
171  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
172  if (MRI.isPhysRegUsed(Reg)) {
173  HighestVGPRReg = Reg;
174  break;
175  }
176  }
177 
178  if (ST.hasMAIInsts()) {
179  MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
180  for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
181  if (MRI.isPhysRegUsed(Reg)) {
182  HighestAGPRReg = Reg;
183  break;
184  }
185  }
186  Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
187  ? 0
188  : TRI.getHWRegIndex(HighestAGPRReg) + 1;
189  }
190 
191  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
192  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
193  if (MRI.isPhysRegUsed(Reg)) {
194  HighestSGPRReg = Reg;
195  break;
196  }
197  }
198 
199  // We found the maximum register index. They start at 0, so add one to get
200  // the number of registers.
201  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
202  ? 0
203  : TRI.getHWRegIndex(HighestVGPRReg) + 1;
204  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
205  ? 0
206  : TRI.getHWRegIndex(HighestSGPRReg) + 1;
207 
208  return Info;
209  }
210 
211  int32_t MaxVGPR = -1;
212  int32_t MaxAGPR = -1;
213  int32_t MaxSGPR = -1;
214  uint64_t CalleeFrameSize = 0;
215 
216  for (const MachineBasicBlock &MBB : MF) {
217  for (const MachineInstr &MI : MBB) {
218  // TODO: Check regmasks? Do they occur anywhere except calls?
219  for (const MachineOperand &MO : MI.operands()) {
220  unsigned Width = 0;
221  bool IsSGPR = false;
222  bool IsAGPR = false;
223 
224  if (!MO.isReg())
225  continue;
226 
227  Register Reg = MO.getReg();
228  switch (Reg) {
229  case AMDGPU::EXEC:
230  case AMDGPU::EXEC_LO:
231  case AMDGPU::EXEC_HI:
232  case AMDGPU::SCC:
233  case AMDGPU::M0:
234  case AMDGPU::M0_LO16:
235  case AMDGPU::M0_HI16:
236  case AMDGPU::SRC_SHARED_BASE:
237  case AMDGPU::SRC_SHARED_LIMIT:
238  case AMDGPU::SRC_PRIVATE_BASE:
239  case AMDGPU::SRC_PRIVATE_LIMIT:
240  case AMDGPU::SGPR_NULL:
241  case AMDGPU::MODE:
242  continue;
243 
244  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
245  llvm_unreachable("src_pops_exiting_wave_id should not be used");
246 
247  case AMDGPU::NoRegister:
248  assert(MI.isDebugInstr() &&
249  "Instruction uses invalid noreg register");
250  continue;
251 
252  case AMDGPU::VCC:
253  case AMDGPU::VCC_LO:
254  case AMDGPU::VCC_HI:
255  case AMDGPU::VCC_LO_LO16:
256  case AMDGPU::VCC_LO_HI16:
257  case AMDGPU::VCC_HI_LO16:
258  case AMDGPU::VCC_HI_HI16:
259  Info.UsesVCC = true;
260  continue;
261 
262  case AMDGPU::FLAT_SCR:
263  case AMDGPU::FLAT_SCR_LO:
264  case AMDGPU::FLAT_SCR_HI:
265  continue;
266 
267  case AMDGPU::XNACK_MASK:
268  case AMDGPU::XNACK_MASK_LO:
269  case AMDGPU::XNACK_MASK_HI:
270  llvm_unreachable("xnack_mask registers should not be used");
271 
272  case AMDGPU::LDS_DIRECT:
273  llvm_unreachable("lds_direct register should not be used");
274 
275  case AMDGPU::TBA:
276  case AMDGPU::TBA_LO:
277  case AMDGPU::TBA_HI:
278  case AMDGPU::TMA:
279  case AMDGPU::TMA_LO:
280  case AMDGPU::TMA_HI:
281  llvm_unreachable("trap handler registers should not be used");
282 
283  case AMDGPU::SRC_VCCZ:
284  llvm_unreachable("src_vccz register should not be used");
285 
286  case AMDGPU::SRC_EXECZ:
287  llvm_unreachable("src_execz register should not be used");
288 
289  case AMDGPU::SRC_SCC:
290  llvm_unreachable("src_scc register should not be used");
291 
292  default:
293  break;
294  }
295 
296  if (AMDGPU::SReg_32RegClass.contains(Reg) ||
297  AMDGPU::SReg_LO16RegClass.contains(Reg) ||
298  AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
299  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
300  "trap handler registers should not be used");
301  IsSGPR = true;
302  Width = 1;
303  } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
304  AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
305  AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
306  IsSGPR = false;
307  Width = 1;
308  } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
309  AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
310  IsSGPR = false;
311  IsAGPR = true;
312  Width = 1;
313  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
314  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
315  "trap handler registers should not be used");
316  IsSGPR = true;
317  Width = 2;
318  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
319  IsSGPR = false;
320  Width = 2;
321  } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
322  IsSGPR = false;
323  IsAGPR = true;
324  Width = 2;
325  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
326  IsSGPR = false;
327  Width = 3;
328  } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
329  IsSGPR = true;
330  Width = 3;
331  } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
332  IsSGPR = false;
333  IsAGPR = true;
334  Width = 3;
335  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
336  assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
337  "trap handler registers should not be used");
338  IsSGPR = true;
339  Width = 4;
340  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
341  IsSGPR = false;
342  Width = 4;
343  } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
344  IsSGPR = false;
345  IsAGPR = true;
346  Width = 4;
347  } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
348  IsSGPR = false;
349  Width = 5;
350  } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
351  IsSGPR = true;
352  Width = 5;
353  } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
354  IsSGPR = false;
355  IsAGPR = true;
356  Width = 5;
357  } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
358  IsSGPR = false;
359  Width = 6;
360  } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
361  IsSGPR = true;
362  Width = 6;
363  } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
364  IsSGPR = false;
365  IsAGPR = true;
366  Width = 6;
367  } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
368  IsSGPR = false;
369  Width = 7;
370  } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
371  IsSGPR = true;
372  Width = 7;
373  } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
374  IsSGPR = false;
375  IsAGPR = true;
376  Width = 7;
377  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
378  assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
379  "trap handler registers should not be used");
380  IsSGPR = true;
381  Width = 8;
382  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
383  IsSGPR = false;
384  Width = 8;
385  } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
386  IsSGPR = false;
387  IsAGPR = true;
388  Width = 8;
389  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
390  assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
391  "trap handler registers should not be used");
392  IsSGPR = true;
393  Width = 16;
394  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
395  IsSGPR = false;
396  Width = 16;
397  } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
398  IsSGPR = false;
399  IsAGPR = true;
400  Width = 16;
401  } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
402  IsSGPR = true;
403  Width = 32;
404  } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
405  IsSGPR = false;
406  Width = 32;
407  } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
408  IsSGPR = false;
409  IsAGPR = true;
410  Width = 32;
411  } else {
412  llvm_unreachable("Unknown register class");
413  }
414  unsigned HWReg = TRI.getHWRegIndex(Reg);
415  int MaxUsed = HWReg + Width - 1;
416  if (IsSGPR) {
417  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
418  } else if (IsAGPR) {
419  MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
420  } else {
421  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
422  }
423  }
424 
425  if (MI.isCall()) {
426  // Pseudo used just to encode the underlying global. Is there a better
427  // way to track this?
428 
429  const MachineOperand *CalleeOp =
430  TII->getNamedOperand(MI, AMDGPU::OpName::callee);
431 
432  const Function *Callee = getCalleeFunction(*CalleeOp);
434  CallGraphResourceInfo.end();
435 
436  // Avoid crashing on undefined behavior with an illegal call to a
437  // kernel. If a callsite's calling convention doesn't match the
438  // function's, it's undefined behavior. If the callsite calling
439  // convention does match, that would have errored earlier.
440  if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
441  report_fatal_error("invalid call to entry function");
442 
443  bool IsIndirect = !Callee || Callee->isDeclaration();
444  if (!IsIndirect)
445  I = CallGraphResourceInfo.find(Callee);
446 
447  if (IsIndirect || I == CallGraphResourceInfo.end()) {
448  CalleeFrameSize =
449  std::max(CalleeFrameSize,
451 
452  // Register usage of indirect calls gets handled later
453  Info.UsesVCC = true;
454  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
455  Info.HasDynamicallySizedStack = true;
456  Info.HasIndirectCall = true;
457  } else {
458  // We force CodeGen to run in SCC order, so the callee's register
459  // usage etc. should be the cumulative usage of all callees.
460  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
461  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
462  MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
463  CalleeFrameSize =
464  std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
465  Info.UsesVCC |= I->second.UsesVCC;
466  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
467  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
468  Info.HasRecursion |= I->second.HasRecursion;
469  Info.HasIndirectCall |= I->second.HasIndirectCall;
470  }
471 
472  // FIXME: Call site could have norecurse on it
473  if (!Callee || !Callee->doesNotRecurse())
474  Info.HasRecursion = true;
475  }
476  }
477  }
478 
479  Info.NumExplicitSGPR = MaxSGPR + 1;
480  Info.NumVGPR = MaxVGPR + 1;
481  Info.NumAGPR = MaxAGPR + 1;
482  Info.PrivateSegmentSize += CalleeFrameSize;
483 
484  return Info;
485 }
486 
487 void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
488  // Collect the maximum number of registers from non-hardware-entrypoints.
489  // All these functions are potential targets for indirect calls.
490  int32_t NonKernelMaxSGPRs = 0;
491  int32_t NonKernelMaxVGPRs = 0;
492  int32_t NonKernelMaxAGPRs = 0;
493 
494  for (const auto &I : CallGraphResourceInfo) {
495  if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
496  auto &Info = I.getSecond();
497  NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
498  NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
499  NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
500  }
501  }
502 
503  // Add register usage for functions with indirect calls.
504  // For calls to unknown functions, we assume the maximum register usage of
505  // all non-hardware-entrypoints in the current module.
506  for (auto &I : CallGraphResourceInfo) {
507  auto &Info = I.getSecond();
508  if (Info.HasIndirectCall) {
509  Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
510  Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
511  Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
512  }
513  }
514 }
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition: AMDGPUArgumentUsageInfo.h:105
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:585
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:704
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::Function
Definition: Function.h:61
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition: AMDGPUBaseInfo.cpp:657
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:827
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::AMDGPUResourceUsageAnalysisID
char & AMDGPUResourceUsageAnalysisID
Definition: AMDGPUResourceUsageAnalysis.cpp:40
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
F
#define F(x, y, z)
Definition: MD5.cpp:56
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUResourceUsageAnalysis.cpp:37
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
hasAnyNonFlatUseOfReg
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
Definition: AMDGPUResourceUsageAnalysis.cpp:68
llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:87
TargetMachine.h
GCNSubtarget.h
AssumedStackSizeForDynamicSizeObjects
static cl::opt< uint32_t > AssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:724
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:22
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::MachineModuleInfo
This class contains meta information specific to a module.
Definition: MachineModuleInfo.h:78
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:28
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1381
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::cl::opt
Definition: CommandLine.h:1434
AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:800
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:20
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
TargetPassConfig.h
llvm::MachineRegisterInfo::reg_operands
iterator_range< reg_iterator > reg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:286
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:642
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:292
llvm::AMDGPUResourceUsageAnalysis::runOnSCC
bool runOnSCC(CallGraphSCC &SCC) override
runOnSCC - This method should be implemented by the subclass to perform whatever action is necessary ...
Definition: AMDGPUResourceUsageAnalysis.cpp:92
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::MachineModuleInfo::getOrCreateMachineFunction
MachineFunction & getOrCreateMachineFunction(Function &F)
Returns the MachineFunction constructed for the IR function F.
Definition: MachineModuleInfo.cpp:291
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineRegisterInfo::isLiveIn
bool isLiveIn(Register Reg) const
Definition: MachineRegisterInfo.cpp:436
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:410
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const
Definition: AMDGPUResourceUsageAnalysis.cpp:78
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
CallGraph.h
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs
int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const
Definition: AMDGPUResourceUsageAnalysis.cpp:85
AssumedStackSizeForExternalCall
static cl::opt< uint32_t > AssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::AMDGPUResourceUsageAnalysis
Definition: AMDGPUResourceUsageAnalysis.h:27
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
callee
Here we don t need to write any variables to the top of the stack since they don t overwrite each other int callee(int32 arg1, int32 arg2)
llvm::cl::desc
Definition: CommandLine.h:414
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
Definition: AMDGPUResourceUsageAnalysis.h:32
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE, "Function register usage analysis", true, true) static const Function *getCalleeFunction(const MachineOperand &Op)
Definition: AMDGPUResourceUsageAnalysis.cpp:56
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:647
llvm::AMDGPUResourceUsageAnalysis::ID
static char ID
Definition: AMDGPUResourceUsageAnalysis.h:28