LLVM  14.0.0git
AMDGPUResourceUsageAnalysis.cpp
Go to the documentation of this file.
1 //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes how many registers and other resources are used by
11 /// functions.
12 ///
13 /// The results of this analysis are used to fill the register usage, flat
14 /// usage, etc. into hardware registers.
15 ///
16 /// The analysis takes callees into account. E.g. if a function A that needs 10
17 /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18 /// will return 20.
19 /// It is assumed that an indirect call can go into any function except
20 /// hardware-entrypoints. Therefore the register usage of functions with
21 /// indirect calls is estimated as the maximum of all non-entrypoint functions
22 /// in the module.
23 ///
24 //===----------------------------------------------------------------------===//
25 
27 #include "AMDGPU.h"
28 #include "GCNSubtarget.h"
29 #include "SIMachineFunctionInfo.h"
32 #include "llvm/IR/GlobalAlias.h"
33 #include "llvm/IR/GlobalValue.h"
35 
36 using namespace llvm;
37 using namespace llvm::AMDGPU;
38 
39 #define DEBUG_TYPE "amdgpu-resource-usage"
40 
43 
44 // We need to tell the runtime some amount ahead of time if we don't know the
45 // true stack size. Assume a smaller number if this is only due to dynamic /
46 // non-entry block allocas.
48  "amdgpu-assume-external-call-stack-size",
49  cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
50  cl::init(16384));
51 
53  "amdgpu-assume-dynamic-stack-object-size",
54  cl::desc("Assumed extra stack use if there are any "
55  "variable sized objects (in bytes)"),
56  cl::Hidden, cl::init(4096));
57 
59  "Function register usage analysis", true, true)
60 
61 static const Function *getCalleeFunction(const MachineOperand &Op) {
62  if (Op.isImm()) {
63  assert(Op.getImm() == 0);
64  return nullptr;
65  }
66  if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
67  return cast<Function>(GA->getOperand(0));
68  return cast<Function>(Op.getGlobal());
69 }
70 
72  const SIInstrInfo &TII, unsigned Reg) {
73  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
74  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
75  return true;
76  }
77 
78  return false;
79 }
80 
82  const GCNSubtarget &ST) const {
83  return NumExplicitSGPR +
84  IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
85  ST.getTargetID().isXnackOnOrAny());
86 }
87 
89  const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
90  if (ST.hasGFX90AInsts() && ArgNumAGPR)
91  return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
92  return std::max(ArgNumVGPR, ArgNumAGPR);
93 }
94 
96  const GCNSubtarget &ST) const {
97  return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
98 }
99 
101  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
102  if (!TPC)
103  return false;
104 
105  const TargetMachine &TM = TPC->getTM<TargetMachine>();
106  bool HasIndirectCall = false;
107 
108  for (CallGraphNode *I : SCC) {
109  Function *F = I->getFunction();
110  if (!F || F->isDeclaration())
111  continue;
112 
113  MachineModuleInfo &MMI =
114  getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
116 
117  auto CI = CallGraphResourceInfo.insert(
118  std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
119  SIFunctionResourceInfo &Info = CI.first->second;
120  assert(CI.second && "should only be called once per function");
121  Info = analyzeResourceUsage(MF, TM);
122  HasIndirectCall |= Info.HasIndirectCall;
123  }
124 
125  if (HasIndirectCall)
126  propagateIndirectCallRegisterUsage();
127 
128  return false;
129 }
130 
132 AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
133  const MachineFunction &MF, const TargetMachine &TM) const {
134  SIFunctionResourceInfo Info;
135 
137  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
138  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
139  const MachineRegisterInfo &MRI = MF.getRegInfo();
140  const SIInstrInfo *TII = ST.getInstrInfo();
141  const SIRegisterInfo &TRI = TII->getRegisterInfo();
142 
143  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
144  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
147 
148  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
149  // instructions aren't used to access the scratch buffer. Inline assembly may
150  // need it though.
151  //
152  // If we only have implicit uses of flat_scr on flat instructions, it is not
153  // really needed.
154  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
155  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
156  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
157  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
158  Info.UsesFlatScratch = false;
159  }
160 
161  Info.PrivateSegmentSize = FrameInfo.getStackSize();
162 
163  // Assume a big number if there are any unknown sized objects.
164  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
165  if (Info.HasDynamicallySizedStack)
166  Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
167 
168  if (MFI->isStackRealigned())
169  Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
170 
171  Info.UsesVCC =
172  MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
173 
174  // If there are no calls, MachineRegisterInfo can tell us the used register
175  // count easily.
176  // A tail call isn't considered a call for MachineFrameInfo's purposes.
177  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
178  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
179  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
180  if (MRI.isPhysRegUsed(Reg)) {
181  HighestVGPRReg = Reg;
182  break;
183  }
184  }
185 
186  if (ST.hasMAIInsts()) {
187  MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
188  for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
189  if (MRI.isPhysRegUsed(Reg)) {
190  HighestAGPRReg = Reg;
191  break;
192  }
193  }
194  Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
195  ? 0
196  : TRI.getHWRegIndex(HighestAGPRReg) + 1;
197  }
198 
199  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
200  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
201  if (MRI.isPhysRegUsed(Reg)) {
202  HighestSGPRReg = Reg;
203  break;
204  }
205  }
206 
207  // We found the maximum register index. They start at 0, so add one to get
208  // the number of registers.
209  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
210  ? 0
211  : TRI.getHWRegIndex(HighestVGPRReg) + 1;
212  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
213  ? 0
214  : TRI.getHWRegIndex(HighestSGPRReg) + 1;
215 
216  return Info;
217  }
218 
219  int32_t MaxVGPR = -1;
220  int32_t MaxAGPR = -1;
221  int32_t MaxSGPR = -1;
222  uint64_t CalleeFrameSize = 0;
223 
224  for (const MachineBasicBlock &MBB : MF) {
225  for (const MachineInstr &MI : MBB) {
226  // TODO: Check regmasks? Do they occur anywhere except calls?
227  for (const MachineOperand &MO : MI.operands()) {
228  unsigned Width = 0;
229  bool IsSGPR = false;
230  bool IsAGPR = false;
231 
232  if (!MO.isReg())
233  continue;
234 
235  Register Reg = MO.getReg();
236  switch (Reg) {
237  case AMDGPU::EXEC:
238  case AMDGPU::EXEC_LO:
239  case AMDGPU::EXEC_HI:
240  case AMDGPU::SCC:
241  case AMDGPU::M0:
242  case AMDGPU::M0_LO16:
243  case AMDGPU::M0_HI16:
244  case AMDGPU::SRC_SHARED_BASE:
245  case AMDGPU::SRC_SHARED_LIMIT:
246  case AMDGPU::SRC_PRIVATE_BASE:
247  case AMDGPU::SRC_PRIVATE_LIMIT:
248  case AMDGPU::SGPR_NULL:
249  case AMDGPU::MODE:
250  continue;
251 
252  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
253  llvm_unreachable("src_pops_exiting_wave_id should not be used");
254 
255  case AMDGPU::NoRegister:
256  assert(MI.isDebugInstr() &&
257  "Instruction uses invalid noreg register");
258  continue;
259 
260  case AMDGPU::VCC:
261  case AMDGPU::VCC_LO:
262  case AMDGPU::VCC_HI:
263  case AMDGPU::VCC_LO_LO16:
264  case AMDGPU::VCC_LO_HI16:
265  case AMDGPU::VCC_HI_LO16:
266  case AMDGPU::VCC_HI_HI16:
267  Info.UsesVCC = true;
268  continue;
269 
270  case AMDGPU::FLAT_SCR:
271  case AMDGPU::FLAT_SCR_LO:
272  case AMDGPU::FLAT_SCR_HI:
273  continue;
274 
275  case AMDGPU::XNACK_MASK:
276  case AMDGPU::XNACK_MASK_LO:
277  case AMDGPU::XNACK_MASK_HI:
278  llvm_unreachable("xnack_mask registers should not be used");
279 
280  case AMDGPU::LDS_DIRECT:
281  llvm_unreachable("lds_direct register should not be used");
282 
283  case AMDGPU::TBA:
284  case AMDGPU::TBA_LO:
285  case AMDGPU::TBA_HI:
286  case AMDGPU::TMA:
287  case AMDGPU::TMA_LO:
288  case AMDGPU::TMA_HI:
289  llvm_unreachable("trap handler registers should not be used");
290 
291  case AMDGPU::SRC_VCCZ:
292  llvm_unreachable("src_vccz register should not be used");
293 
294  case AMDGPU::SRC_EXECZ:
295  llvm_unreachable("src_execz register should not be used");
296 
297  case AMDGPU::SRC_SCC:
298  llvm_unreachable("src_scc register should not be used");
299 
300  default:
301  break;
302  }
303 
304  if (AMDGPU::SReg_32RegClass.contains(Reg) ||
305  AMDGPU::SReg_LO16RegClass.contains(Reg) ||
306  AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
307  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
308  "trap handler registers should not be used");
309  IsSGPR = true;
310  Width = 1;
311  } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
312  AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
313  AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
314  IsSGPR = false;
315  Width = 1;
316  } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
317  AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
318  IsSGPR = false;
319  IsAGPR = true;
320  Width = 1;
321  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
322  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
323  "trap handler registers should not be used");
324  IsSGPR = true;
325  Width = 2;
326  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
327  IsSGPR = false;
328  Width = 2;
329  } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
330  IsSGPR = false;
331  IsAGPR = true;
332  Width = 2;
333  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
334  IsSGPR = false;
335  Width = 3;
336  } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
337  IsSGPR = true;
338  Width = 3;
339  } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
340  IsSGPR = false;
341  IsAGPR = true;
342  Width = 3;
343  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
344  assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
345  "trap handler registers should not be used");
346  IsSGPR = true;
347  Width = 4;
348  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
349  IsSGPR = false;
350  Width = 4;
351  } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
352  IsSGPR = false;
353  IsAGPR = true;
354  Width = 4;
355  } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
356  IsSGPR = false;
357  Width = 5;
358  } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
359  IsSGPR = true;
360  Width = 5;
361  } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
362  IsSGPR = false;
363  IsAGPR = true;
364  Width = 5;
365  } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
366  IsSGPR = false;
367  Width = 6;
368  } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
369  IsSGPR = true;
370  Width = 6;
371  } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
372  IsSGPR = false;
373  IsAGPR = true;
374  Width = 6;
375  } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
376  IsSGPR = false;
377  Width = 7;
378  } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
379  IsSGPR = true;
380  Width = 7;
381  } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
382  IsSGPR = false;
383  IsAGPR = true;
384  Width = 7;
385  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
386  assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
387  "trap handler registers should not be used");
388  IsSGPR = true;
389  Width = 8;
390  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
391  IsSGPR = false;
392  Width = 8;
393  } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
394  IsSGPR = false;
395  IsAGPR = true;
396  Width = 8;
397  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
398  assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
399  "trap handler registers should not be used");
400  IsSGPR = true;
401  Width = 16;
402  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
403  IsSGPR = false;
404  Width = 16;
405  } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
406  IsSGPR = false;
407  IsAGPR = true;
408  Width = 16;
409  } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
410  IsSGPR = true;
411  Width = 32;
412  } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
413  IsSGPR = false;
414  Width = 32;
415  } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
416  IsSGPR = false;
417  IsAGPR = true;
418  Width = 32;
419  } else {
420  llvm_unreachable("Unknown register class");
421  }
422  unsigned HWReg = TRI.getHWRegIndex(Reg);
423  int MaxUsed = HWReg + Width - 1;
424  if (IsSGPR) {
425  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
426  } else if (IsAGPR) {
427  MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
428  } else {
429  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
430  }
431  }
432 
433  if (MI.isCall()) {
434  // Pseudo used just to encode the underlying global. Is there a better
435  // way to track this?
436 
437  const MachineOperand *CalleeOp =
438  TII->getNamedOperand(MI, AMDGPU::OpName::callee);
439 
440  const Function *Callee = getCalleeFunction(*CalleeOp);
442  CallGraphResourceInfo.end();
443 
444  // Avoid crashing on undefined behavior with an illegal call to a
445  // kernel. If a callsite's calling convention doesn't match the
446  // function's, it's undefined behavior. If the callsite calling
447  // convention does match, that would have errored earlier.
448  if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
449  report_fatal_error("invalid call to entry function");
450 
451  bool IsIndirect = !Callee || Callee->isDeclaration();
452  if (!IsIndirect)
453  I = CallGraphResourceInfo.find(Callee);
454 
455  // FIXME: Call site could have norecurse on it
456  if (!Callee || !Callee->doesNotRecurse()) {
457  Info.HasRecursion = true;
458 
459  // TODO: If we happen to know there is no stack usage in the
460  // callgraph, we don't need to assume an infinitely growing stack.
461  if (!MI.isReturn()) {
462  // We don't need to assume an unknown stack size for tail calls.
463 
464  // FIXME: This only benefits in the case where the kernel does not
465  // directly call the tail called function. If a kernel directly
466  // calls a tail recursive function, we'll assume maximum stack size
467  // based on the regular call instruction.
468  CalleeFrameSize =
469  std::max(CalleeFrameSize,
471  }
472  }
473 
474  if (IsIndirect || I == CallGraphResourceInfo.end()) {
475  CalleeFrameSize =
476  std::max(CalleeFrameSize,
478 
479  // Register usage of indirect calls gets handled later
480  Info.UsesVCC = true;
481  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
482  Info.HasDynamicallySizedStack = true;
483  Info.HasIndirectCall = true;
484  } else {
485  // We force CodeGen to run in SCC order, so the callee's register
486  // usage etc. should be the cumulative usage of all callees.
487  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
488  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
489  MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
490  CalleeFrameSize =
491  std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
492  Info.UsesVCC |= I->second.UsesVCC;
493  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
494  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
495  Info.HasRecursion |= I->second.HasRecursion;
496  Info.HasIndirectCall |= I->second.HasIndirectCall;
497  }
498  }
499  }
500  }
501 
502  Info.NumExplicitSGPR = MaxSGPR + 1;
503  Info.NumVGPR = MaxVGPR + 1;
504  Info.NumAGPR = MaxAGPR + 1;
505  Info.PrivateSegmentSize += CalleeFrameSize;
506 
507  return Info;
508 }
509 
510 void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
511  // Collect the maximum number of registers from non-hardware-entrypoints.
512  // All these functions are potential targets for indirect calls.
513  int32_t NonKernelMaxSGPRs = 0;
514  int32_t NonKernelMaxVGPRs = 0;
515  int32_t NonKernelMaxAGPRs = 0;
516 
517  for (const auto &I : CallGraphResourceInfo) {
518  if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
519  auto &Info = I.getSecond();
520  NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
521  NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
522  NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
523  }
524  }
525 
526  // Add register usage for functions with indirect calls.
527  // For calls to unknown functions, we assume the maximum register usage of
528  // all non-hardware-entrypoints in the current module.
529  for (auto &I : CallGraphResourceInfo) {
530  auto &Info = I.getSecond();
531  if (Info.HasIndirectCall) {
532  Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
533  Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
534  Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
535  }
536  }
537 }
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition: AMDGPUArgumentUsageInfo.h:105
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1558
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:583
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:706
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::Function
Definition: Function.h:62
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:359
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition: AMDGPUBaseInfo.cpp:658
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:722
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:842
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::AMDGPUResourceUsageAnalysisID
char & AMDGPUResourceUsageAnalysisID
Definition: AMDGPUResourceUsageAnalysis.cpp:42
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1559
F
#define F(x, y, z)
Definition: MD5.cpp:56
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUResourceUsageAnalysis.cpp:39
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
hasAnyNonFlatUseOfReg
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
Definition: AMDGPUResourceUsageAnalysis.cpp:71
GlobalValue.h
llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:87
TargetMachine.h
GCNSubtarget.h
AssumedStackSizeForDynamicSizeObjects
static cl::opt< uint32_t > AssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:739
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:22
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::MachineModuleInfo
This class contains meta information specific to a module.
Definition: MachineModuleInfo.h:78
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1383
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
llvm::cl::opt
Definition: CommandLine.h:1432
AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:802
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
TargetPassConfig.h
llvm::MachineRegisterInfo::reg_operands
iterator_range< reg_iterator > reg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:286
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs
int32_t getTotalNumVGPRs(const GCNSubtarget &ST, int32_t NumAGPR, int32_t NumVGPR) const
Definition: AMDGPUResourceUsageAnalysis.cpp:88
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:657
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:296
llvm::AMDGPUResourceUsageAnalysis::runOnSCC
bool runOnSCC(CallGraphSCC &SCC) override
runOnSCC - This method should be implemented by the subclass to perform whatever action is necessary ...
Definition: AMDGPUResourceUsageAnalysis.cpp:100
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::MachineModuleInfo::getOrCreateMachineFunction
MachineFunction & getOrCreateMachineFunction(Function &F)
Returns the MachineFunction constructed for the IR function F.
Definition: MachineModuleInfo.cpp:291
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineRegisterInfo::isLiveIn
bool isLiveIn(Register Reg) const
Definition: MachineRegisterInfo.cpp:434
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:607
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
GlobalAlias.h
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:414
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const
Definition: AMDGPUResourceUsageAnalysis.cpp:81
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
CallGraph.h
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
AssumedStackSizeForExternalCall
static cl::opt< uint32_t > AssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::AMDGPUResourceUsageAnalysis
Definition: AMDGPUResourceUsageAnalysis.h:27
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
callee
Here we don t need to write any variables to the top of the stack since they don t overwrite each other int callee(int32 arg1, int32 arg2)
llvm::cl::desc
Definition: CommandLine.h:412
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
Definition: AMDGPUResourceUsageAnalysis.h:32
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE, "Function register usage analysis", true, true) static const Function *getCalleeFunction(const MachineOperand &Op)
Definition: AMDGPUResourceUsageAnalysis.cpp:58
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:649
llvm::AMDGPUResourceUsageAnalysis::ID
static char ID
Definition: AMDGPUResourceUsageAnalysis.h:28