LLVM  15.0.0git
AMDGPUResourceUsageAnalysis.cpp
Go to the documentation of this file.
1 //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes how many registers and other resources are used by
11 /// functions.
12 ///
13 /// The results of this analysis are used to fill the register usage, flat
14 /// usage, etc. into hardware registers.
15 ///
16 /// The analysis takes callees into account. E.g. if a function A that needs 10
17 /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18 /// will return 20.
19 /// It is assumed that an indirect call can go into any function except
20 /// hardware-entrypoints. Therefore the register usage of functions with
21 /// indirect calls is estimated as the maximum of all non-entrypoint functions
22 /// in the module.
23 ///
24 //===----------------------------------------------------------------------===//
25 
27 #include "AMDGPU.h"
28 #include "GCNSubtarget.h"
29 #include "SIMachineFunctionInfo.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalValue.h"
36 
37 using namespace llvm;
38 using namespace llvm::AMDGPU;
39 
40 #define DEBUG_TYPE "amdgpu-resource-usage"
41 
44 
45 // We need to tell the runtime some amount ahead of time if we don't know the
46 // true stack size. Assume a smaller number if this is only due to dynamic /
47 // non-entry block allocas.
49  "amdgpu-assume-external-call-stack-size",
50  cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
51  cl::init(16384));
52 
54  "amdgpu-assume-dynamic-stack-object-size",
55  cl::desc("Assumed extra stack use if there are any "
56  "variable sized objects (in bytes)"),
57  cl::Hidden, cl::init(4096));
58 
60  "Function register usage analysis", true, true)
61 
62 static const Function *getCalleeFunction(const MachineOperand &Op) {
63  if (Op.isImm()) {
64  assert(Op.getImm() == 0);
65  return nullptr;
66  }
67  if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
68  return cast<Function>(GA->getOperand(0));
69  return cast<Function>(Op.getGlobal());
70 }
71 
73  const SIInstrInfo &TII, unsigned Reg) {
74  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
75  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
76  return true;
77  }
78 
79  return false;
80 }
81 
83  const GCNSubtarget &ST) const {
84  return NumExplicitSGPR +
85  IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
86  ST.getTargetID().isXnackOnOrAny());
87 }
88 
90  const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
91  return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
92 }
93 
95  const GCNSubtarget &ST) const {
96  return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
97 }
98 
100  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
101  if (!TPC)
102  return false;
103 
104  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
105  const TargetMachine &TM = TPC->getTM<TargetMachine>();
106  bool HasIndirectCall = false;
107 
108  for (Function &F : M) {
109  if (F.isDeclaration())
110  continue;
111 
113  assert(MF && "function must have been generated already");
114 
115  auto CI = CallGraphResourceInfo.insert(
116  std::make_pair(&F, SIFunctionResourceInfo()));
117  SIFunctionResourceInfo &Info = CI.first->second;
118  assert(CI.second && "should only be called once per function");
119  Info = analyzeResourceUsage(*MF, TM);
120  HasIndirectCall |= Info.HasIndirectCall;
121  }
122 
123  if (HasIndirectCall)
124  propagateIndirectCallRegisterUsage();
125 
126  return false;
127 }
128 
130 AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
131  const MachineFunction &MF, const TargetMachine &TM) const {
132  SIFunctionResourceInfo Info;
133 
135  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
136  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
137  const MachineRegisterInfo &MRI = MF.getRegInfo();
138  const SIInstrInfo *TII = ST.getInstrInfo();
139  const SIRegisterInfo &TRI = TII->getRegisterInfo();
140 
141  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
142  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
145 
146  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
147  // instructions aren't used to access the scratch buffer. Inline assembly may
148  // need it though.
149  //
150  // If we only have implicit uses of flat_scr on flat instructions, it is not
151  // really needed.
152  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
153  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
154  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
155  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
156  Info.UsesFlatScratch = false;
157  }
158 
159  Info.PrivateSegmentSize = FrameInfo.getStackSize();
160 
161  // Assume a big number if there are any unknown sized objects.
162  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
163  if (Info.HasDynamicallySizedStack)
164  Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
165 
166  if (MFI->isStackRealigned())
167  Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
168 
169  Info.UsesVCC =
170  MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
171 
172  // If there are no calls, MachineRegisterInfo can tell us the used register
173  // count easily.
174  // A tail call isn't considered a call for MachineFrameInfo's purposes.
175  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
176  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
177  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
178  if (MRI.isPhysRegUsed(Reg)) {
179  HighestVGPRReg = Reg;
180  break;
181  }
182  }
183 
184  if (ST.hasMAIInsts()) {
185  MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
186  for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
187  if (MRI.isPhysRegUsed(Reg)) {
188  HighestAGPRReg = Reg;
189  break;
190  }
191  }
192  Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
193  ? 0
194  : TRI.getHWRegIndex(HighestAGPRReg) + 1;
195  }
196 
197  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
198  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
199  if (MRI.isPhysRegUsed(Reg)) {
200  HighestSGPRReg = Reg;
201  break;
202  }
203  }
204 
205  // We found the maximum register index. They start at 0, so add one to get
206  // the number of registers.
207  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
208  ? 0
209  : TRI.getHWRegIndex(HighestVGPRReg) + 1;
210  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
211  ? 0
212  : TRI.getHWRegIndex(HighestSGPRReg) + 1;
213 
214  return Info;
215  }
216 
217  int32_t MaxVGPR = -1;
218  int32_t MaxAGPR = -1;
219  int32_t MaxSGPR = -1;
220  uint64_t CalleeFrameSize = 0;
221 
222  for (const MachineBasicBlock &MBB : MF) {
223  for (const MachineInstr &MI : MBB) {
224  // TODO: Check regmasks? Do they occur anywhere except calls?
225  for (const MachineOperand &MO : MI.operands()) {
226  unsigned Width = 0;
227  bool IsSGPR = false;
228  bool IsAGPR = false;
229 
230  if (!MO.isReg())
231  continue;
232 
233  Register Reg = MO.getReg();
234  switch (Reg) {
235  case AMDGPU::EXEC:
236  case AMDGPU::EXEC_LO:
237  case AMDGPU::EXEC_HI:
238  case AMDGPU::SCC:
239  case AMDGPU::M0:
240  case AMDGPU::M0_LO16:
241  case AMDGPU::M0_HI16:
242  case AMDGPU::SRC_SHARED_BASE:
243  case AMDGPU::SRC_SHARED_LIMIT:
244  case AMDGPU::SRC_PRIVATE_BASE:
245  case AMDGPU::SRC_PRIVATE_LIMIT:
246  case AMDGPU::SGPR_NULL:
247  case AMDGPU::MODE:
248  continue;
249 
250  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
251  llvm_unreachable("src_pops_exiting_wave_id should not be used");
252 
253  case AMDGPU::NoRegister:
254  assert(MI.isDebugInstr() &&
255  "Instruction uses invalid noreg register");
256  continue;
257 
258  case AMDGPU::VCC:
259  case AMDGPU::VCC_LO:
260  case AMDGPU::VCC_HI:
261  case AMDGPU::VCC_LO_LO16:
262  case AMDGPU::VCC_LO_HI16:
263  case AMDGPU::VCC_HI_LO16:
264  case AMDGPU::VCC_HI_HI16:
265  Info.UsesVCC = true;
266  continue;
267 
268  case AMDGPU::FLAT_SCR:
269  case AMDGPU::FLAT_SCR_LO:
270  case AMDGPU::FLAT_SCR_HI:
271  continue;
272 
273  case AMDGPU::XNACK_MASK:
274  case AMDGPU::XNACK_MASK_LO:
275  case AMDGPU::XNACK_MASK_HI:
276  llvm_unreachable("xnack_mask registers should not be used");
277 
278  case AMDGPU::LDS_DIRECT:
279  llvm_unreachable("lds_direct register should not be used");
280 
281  case AMDGPU::TBA:
282  case AMDGPU::TBA_LO:
283  case AMDGPU::TBA_HI:
284  case AMDGPU::TMA:
285  case AMDGPU::TMA_LO:
286  case AMDGPU::TMA_HI:
287  llvm_unreachable("trap handler registers should not be used");
288 
289  case AMDGPU::SRC_VCCZ:
290  llvm_unreachable("src_vccz register should not be used");
291 
292  case AMDGPU::SRC_EXECZ:
293  llvm_unreachable("src_execz register should not be used");
294 
295  case AMDGPU::SRC_SCC:
296  llvm_unreachable("src_scc register should not be used");
297 
298  default:
299  break;
300  }
301 
302  if (AMDGPU::SReg_32RegClass.contains(Reg) ||
303  AMDGPU::SReg_LO16RegClass.contains(Reg) ||
304  AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
305  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
306  "trap handler registers should not be used");
307  IsSGPR = true;
308  Width = 1;
309  } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
310  AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
311  AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
312  IsSGPR = false;
313  Width = 1;
314  } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
315  AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
316  IsSGPR = false;
317  IsAGPR = true;
318  Width = 1;
319  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
320  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
321  "trap handler registers should not be used");
322  IsSGPR = true;
323  Width = 2;
324  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
325  IsSGPR = false;
326  Width = 2;
327  } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
328  IsSGPR = false;
329  IsAGPR = true;
330  Width = 2;
331  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
332  IsSGPR = false;
333  Width = 3;
334  } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
335  IsSGPR = true;
336  Width = 3;
337  } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
338  IsSGPR = false;
339  IsAGPR = true;
340  Width = 3;
341  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
342  assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
343  "trap handler registers should not be used");
344  IsSGPR = true;
345  Width = 4;
346  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
347  IsSGPR = false;
348  Width = 4;
349  } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
350  IsSGPR = false;
351  IsAGPR = true;
352  Width = 4;
353  } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
354  IsSGPR = false;
355  Width = 5;
356  } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
357  IsSGPR = true;
358  Width = 5;
359  } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
360  IsSGPR = false;
361  IsAGPR = true;
362  Width = 5;
363  } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
364  IsSGPR = false;
365  Width = 6;
366  } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
367  IsSGPR = true;
368  Width = 6;
369  } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
370  IsSGPR = false;
371  IsAGPR = true;
372  Width = 6;
373  } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
374  IsSGPR = false;
375  Width = 7;
376  } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
377  IsSGPR = true;
378  Width = 7;
379  } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
380  IsSGPR = false;
381  IsAGPR = true;
382  Width = 7;
383  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
384  assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
385  "trap handler registers should not be used");
386  IsSGPR = true;
387  Width = 8;
388  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
389  IsSGPR = false;
390  Width = 8;
391  } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
392  IsSGPR = false;
393  IsAGPR = true;
394  Width = 8;
395  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
396  assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
397  "trap handler registers should not be used");
398  IsSGPR = true;
399  Width = 16;
400  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
401  IsSGPR = false;
402  Width = 16;
403  } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
404  IsSGPR = false;
405  IsAGPR = true;
406  Width = 16;
407  } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
408  IsSGPR = true;
409  Width = 32;
410  } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
411  IsSGPR = false;
412  Width = 32;
413  } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
414  IsSGPR = false;
415  IsAGPR = true;
416  Width = 32;
417  } else {
418  llvm_unreachable("Unknown register class");
419  }
420  unsigned HWReg = TRI.getHWRegIndex(Reg);
421  int MaxUsed = HWReg + Width - 1;
422  if (IsSGPR) {
423  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
424  } else if (IsAGPR) {
425  MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
426  } else {
427  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
428  }
429  }
430 
431  if (MI.isCall()) {
432  // Pseudo used just to encode the underlying global. Is there a better
433  // way to track this?
434 
435  const MachineOperand *CalleeOp =
436  TII->getNamedOperand(MI, AMDGPU::OpName::callee);
437 
438  const Function *Callee = getCalleeFunction(*CalleeOp);
440  CallGraphResourceInfo.end();
441 
442  // Avoid crashing on undefined behavior with an illegal call to a
443  // kernel. If a callsite's calling convention doesn't match the
444  // function's, it's undefined behavior. If the callsite calling
445  // convention does match, that would have errored earlier.
446  if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
447  report_fatal_error("invalid call to entry function");
448 
449  bool IsIndirect = !Callee || Callee->isDeclaration();
450  if (!IsIndirect)
451  I = CallGraphResourceInfo.find(Callee);
452 
453  // FIXME: Call site could have norecurse on it
454  if (!Callee || !Callee->doesNotRecurse()) {
455  Info.HasRecursion = true;
456 
457  // TODO: If we happen to know there is no stack usage in the
458  // callgraph, we don't need to assume an infinitely growing stack.
459  if (!MI.isReturn()) {
460  // We don't need to assume an unknown stack size for tail calls.
461 
462  // FIXME: This only benefits in the case where the kernel does not
463  // directly call the tail called function. If a kernel directly
464  // calls a tail recursive function, we'll assume maximum stack size
465  // based on the regular call instruction.
466  CalleeFrameSize =
467  std::max(CalleeFrameSize,
469  }
470  }
471 
472  if (IsIndirect || I == CallGraphResourceInfo.end()) {
473  CalleeFrameSize =
474  std::max(CalleeFrameSize,
476 
477  // Register usage of indirect calls gets handled later
478  Info.UsesVCC = true;
479  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
480  Info.HasDynamicallySizedStack = true;
481  Info.HasIndirectCall = true;
482  } else {
483  // We force CodeGen to run in SCC order, so the callee's register
484  // usage etc. should be the cumulative usage of all callees.
485  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
486  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
487  MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
488  CalleeFrameSize =
489  std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
490  Info.UsesVCC |= I->second.UsesVCC;
491  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
492  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
493  Info.HasRecursion |= I->second.HasRecursion;
494  Info.HasIndirectCall |= I->second.HasIndirectCall;
495  }
496  }
497  }
498  }
499 
500  Info.NumExplicitSGPR = MaxSGPR + 1;
501  Info.NumVGPR = MaxVGPR + 1;
502  Info.NumAGPR = MaxAGPR + 1;
503  Info.PrivateSegmentSize += CalleeFrameSize;
504 
505  return Info;
506 }
507 
508 void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
509  // Collect the maximum number of registers from non-hardware-entrypoints.
510  // All these functions are potential targets for indirect calls.
511  int32_t NonKernelMaxSGPRs = 0;
512  int32_t NonKernelMaxVGPRs = 0;
513  int32_t NonKernelMaxAGPRs = 0;
514 
515  for (const auto &I : CallGraphResourceInfo) {
516  if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
517  auto &Info = I.getSecond();
518  NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
519  NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
520  NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
521  }
522  }
523 
524  // Add register usage for functions with indirect calls.
525  // For calls to unknown functions, we assume the maximum register usage of
526  // all non-hardware-entrypoints in the current module.
527  for (auto &I : CallGraphResourceInfo) {
528  auto &Info = I.getSecond();
529  if (Info.HasIndirectCall) {
530  Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
531  Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
532  Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
533  }
534  }
535 }
llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition: AMDGPUArgumentUsageInfo.h:105
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:581
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:740
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::Function
Definition: Function.h:60
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition: AMDGPUBaseInfo.cpp:725
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::AMDGPUResourceUsageAnalysisID
char & AMDGPUResourceUsageAnalysisID
Definition: AMDGPUResourceUsageAnalysis.cpp:43
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1620
F
#define F(x, y, z)
Definition: MD5.cpp:55
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUResourceUsageAnalysis.cpp:40
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
hasAnyNonFlatUseOfReg
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
Definition: AMDGPUResourceUsageAnalysis.cpp:72
GlobalValue.h
TargetMachine.h
GCNSubtarget.h
AssumedStackSizeForDynamicSizeObjects
static cl::opt< uint32_t > AssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:738
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:34
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:370
llvm::MachineModuleInfo
This class contains meta information specific to a module.
Definition: MachineModuleInfo.h:75
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1613
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::cl::opt
Definition: CommandLine.h:1392
AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:832
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::AMDGPUResourceUsageAnalysis::runOnModule
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
Definition: AMDGPUResourceUsageAnalysis.cpp:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
TargetPassConfig.h
llvm::MachineRegisterInfo::reg_operands
iterator_range< reg_iterator > reg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:294
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs
int32_t getTotalNumVGPRs(const GCNSubtarget &ST, int32_t NumAGPR, int32_t NumVGPR) const
Definition: AMDGPUResourceUsageAnalysis.cpp:89
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:656
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:303
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineRegisterInfo::isLiveIn
bool isLiveIn(Register Reg) const
Definition: MachineRegisterInfo.cpp:432
llvm::MachineModuleInfo::getMachineFunction
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
Definition: MachineModuleInfo.cpp:103
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:344
MachineFrameInfo.h
llvm::AMDGPU::getTotalNumVGPRs
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
Definition: AMDGPUBaseInfo.cpp:1760
GlobalAlias.h
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:435
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const
Definition: AMDGPUResourceUsageAnalysis.cpp:82
llvm::SIInstrInfo
Definition: SIInstrInfo.h:43
CallGraph.h
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
AssumedStackSizeForExternalCall
static cl::opt< uint32_t > AssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:348
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::AMDGPUResourceUsageAnalysis
Definition: AMDGPUResourceUsageAnalysis.h:27
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
callee
Here we don t need to write any variables to the top of the stack since they don t overwrite each other int callee(int32 arg1, int32 arg2)
llvm::cl::desc
Definition: CommandLine.h:405
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
Definition: AMDGPUResourceUsageAnalysis.h:32
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE, "Function register usage analysis", true, true) static const Function *getCalleeFunction(const MachineOperand &Op)
Definition: AMDGPUResourceUsageAnalysis.cpp:59
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:683
llvm::AMDGPUResourceUsageAnalysis::ID
static char ID
Definition: AMDGPUResourceUsageAnalysis.h:28