LLVM 19.0.0git
AMDGPUResourceUsageAnalysis.cpp
Go to the documentation of this file.
1//===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief Analyzes how many registers and other resources are used by
11/// functions.
12///
13/// The results of this analysis are used to fill the register usage, flat
14/// usage, etc. into hardware registers.
15///
16/// The analysis takes callees into account. E.g. if a function A that needs 10
17/// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18/// will return 20.
19/// It is assumed that an indirect call can go into any function except
20/// hardware-entrypoints. Therefore the register usage of functions with
21/// indirect calls is estimated as the maximum of all non-entrypoint functions
22/// in the module.
23///
24//===----------------------------------------------------------------------===//
25
27#include "AMDGPU.h"
28#include "GCNSubtarget.h"
34#include "llvm/IR/GlobalAlias.h"
35#include "llvm/IR/GlobalValue.h"
37
38using namespace llvm;
39using namespace llvm::AMDGPU;
40
41#define DEBUG_TYPE "amdgpu-resource-usage"
42
45
46// In code object v4 and older, we need to tell the runtime some amount ahead of
47// time if we don't know the true stack size. Assume a smaller number if this is
48// only due to dynamic / non-entry block allocas.
50 "amdgpu-assume-external-call-stack-size",
51 cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
52 cl::init(16384));
53
55 "amdgpu-assume-dynamic-stack-object-size",
56 cl::desc("Assumed extra stack use if there are any "
57 "variable sized objects (in bytes)"),
58 cl::Hidden, cl::init(4096));
59
61 "Function register usage analysis", true, true)
62
63static const Function *getCalleeFunction(const MachineOperand &Op) {
64 if (Op.isImm()) {
65 assert(Op.getImm() == 0);
66 return nullptr;
67 }
68 if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
69 return cast<Function>(GA->getOperand(0));
70 return cast<Function>(Op.getGlobal());
71}
72
74 const SIInstrInfo &TII, unsigned Reg) {
75 for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
76 if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
77 return true;
78 }
79
80 return false;
81}
82
84 const GCNSubtarget &ST) const {
85 return NumExplicitSGPR +
87 ST.getTargetID().isXnackOnOrAny());
88}
89
91 const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
92 return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
93}
94
96 const GCNSubtarget &ST) const {
97 return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
98}
99
101 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
102 if (!TPC)
103 return false;
104
105 MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
106 const TargetMachine &TM = TPC->getTM<TargetMachine>();
107 const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
108 bool HasIndirectCall = false;
109
110 CallGraph CG = CallGraph(M);
111 auto End = po_end(&CG);
112
113 // By default, for code object v5 and later, track only the minimum scratch
114 // size
115 uint32_t AssumedStackSizeForDynamicSizeObjects =
117 uint32_t AssumedStackSizeForExternalCall = clAssumedStackSizeForExternalCall;
119 STI.getTargetTriple().getOS() == Triple::AMDPAL) {
120 if (clAssumedStackSizeForDynamicSizeObjects.getNumOccurrences() == 0)
121 AssumedStackSizeForDynamicSizeObjects = 0;
122 if (clAssumedStackSizeForExternalCall.getNumOccurrences() == 0)
123 AssumedStackSizeForExternalCall = 0;
124 }
125
126 for (auto IT = po_begin(&CG); IT != End; ++IT) {
127 Function *F = IT->getFunction();
128 if (!F || F->isDeclaration())
129 continue;
130
132 assert(MF && "function must have been generated already");
133
134 auto CI =
135 CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
136 SIFunctionResourceInfo &Info = CI.first->second;
137 assert(CI.second && "should only be called once per function");
138 Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects,
139 AssumedStackSizeForExternalCall);
140 HasIndirectCall |= Info.HasIndirectCall;
141 }
142
143 // It's possible we have unreachable functions in the module which weren't
144 // visited by the PO traversal. Make sure we have some resource counts to
145 // report.
146 for (const auto &IT : CG) {
147 const Function *F = IT.first;
148 if (!F || F->isDeclaration())
149 continue;
150
151 auto CI =
152 CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
153 if (!CI.second) // Skip already visited functions
154 continue;
155
156 SIFunctionResourceInfo &Info = CI.first->second;
158 assert(MF && "function must have been generated already");
159 Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects,
160 AssumedStackSizeForExternalCall);
161 HasIndirectCall |= Info.HasIndirectCall;
162 }
163
164 if (HasIndirectCall)
165 propagateIndirectCallRegisterUsage();
166
167 return false;
168}
169
171AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
172 const MachineFunction &MF, const TargetMachine &TM,
173 uint32_t AssumedStackSizeForDynamicSizeObjects,
174 uint32_t AssumedStackSizeForExternalCall) const {
175 SIFunctionResourceInfo Info;
176
178 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
179 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
180 const MachineRegisterInfo &MRI = MF.getRegInfo();
181 const SIInstrInfo *TII = ST.getInstrInfo();
182 const SIRegisterInfo &TRI = TII->getRegisterInfo();
183
184 Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
185 MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
186 MRI.isLiveIn(MFI->getPreloadedReg(
188
189 // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
190 // instructions aren't used to access the scratch buffer. Inline assembly may
191 // need it though.
192 //
193 // If we only have implicit uses of flat_scr on flat instructions, it is not
194 // really needed.
195 if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() &&
196 (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
197 !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
198 !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
199 Info.UsesFlatScratch = false;
200 }
201
202 Info.PrivateSegmentSize = FrameInfo.getStackSize();
203
204 // Assume a big number if there are any unknown sized objects.
205 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
206 if (Info.HasDynamicallySizedStack)
207 Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
208
209 if (MFI->isStackRealigned())
210 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
211
212 Info.UsesVCC =
213 MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
214
215 // If there are no calls, MachineRegisterInfo can tell us the used register
216 // count easily.
217 // A tail call isn't considered a call for MachineFrameInfo's purposes.
218 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
219 MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
220 for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
221 if (MRI.isPhysRegUsed(Reg)) {
222 HighestVGPRReg = Reg;
223 break;
224 }
225 }
226
227 if (ST.hasMAIInsts()) {
228 MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
229 for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
230 if (MRI.isPhysRegUsed(Reg)) {
231 HighestAGPRReg = Reg;
232 break;
233 }
234 }
235 Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
236 ? 0
237 : TRI.getHWRegIndex(HighestAGPRReg) + 1;
238 }
239
240 MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
241 for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
242 if (MRI.isPhysRegUsed(Reg)) {
243 HighestSGPRReg = Reg;
244 break;
245 }
246 }
247
248 // We found the maximum register index. They start at 0, so add one to get
249 // the number of registers.
250 Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
251 ? 0
252 : TRI.getHWRegIndex(HighestVGPRReg) + 1;
253 Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
254 ? 0
255 : TRI.getHWRegIndex(HighestSGPRReg) + 1;
256
257 return Info;
258 }
259
260 int32_t MaxVGPR = -1;
261 int32_t MaxAGPR = -1;
262 int32_t MaxSGPR = -1;
263 uint64_t CalleeFrameSize = 0;
264
265 for (const MachineBasicBlock &MBB : MF) {
266 for (const MachineInstr &MI : MBB) {
267 // TODO: Check regmasks? Do they occur anywhere except calls?
268 for (const MachineOperand &MO : MI.operands()) {
269 unsigned Width = 0;
270 bool IsSGPR = false;
271 bool IsAGPR = false;
272
273 if (!MO.isReg())
274 continue;
275
276 Register Reg = MO.getReg();
277 switch (Reg) {
278 case AMDGPU::EXEC:
279 case AMDGPU::EXEC_LO:
280 case AMDGPU::EXEC_HI:
281 case AMDGPU::SCC:
282 case AMDGPU::M0:
283 case AMDGPU::M0_LO16:
284 case AMDGPU::M0_HI16:
285 case AMDGPU::SRC_SHARED_BASE_LO:
286 case AMDGPU::SRC_SHARED_BASE:
287 case AMDGPU::SRC_SHARED_LIMIT_LO:
288 case AMDGPU::SRC_SHARED_LIMIT:
289 case AMDGPU::SRC_PRIVATE_BASE_LO:
290 case AMDGPU::SRC_PRIVATE_BASE:
291 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
292 case AMDGPU::SRC_PRIVATE_LIMIT:
293 case AMDGPU::SGPR_NULL:
294 case AMDGPU::SGPR_NULL64:
295 case AMDGPU::MODE:
296 continue;
297
298 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
299 llvm_unreachable("src_pops_exiting_wave_id should not be used");
300
301 case AMDGPU::NoRegister:
302 assert(MI.isDebugInstr() &&
303 "Instruction uses invalid noreg register");
304 continue;
305
306 case AMDGPU::VCC:
307 case AMDGPU::VCC_LO:
308 case AMDGPU::VCC_HI:
309 case AMDGPU::VCC_LO_LO16:
310 case AMDGPU::VCC_LO_HI16:
311 case AMDGPU::VCC_HI_LO16:
312 case AMDGPU::VCC_HI_HI16:
313 Info.UsesVCC = true;
314 continue;
315
316 case AMDGPU::FLAT_SCR:
317 case AMDGPU::FLAT_SCR_LO:
318 case AMDGPU::FLAT_SCR_HI:
319 continue;
320
321 case AMDGPU::XNACK_MASK:
322 case AMDGPU::XNACK_MASK_LO:
323 case AMDGPU::XNACK_MASK_HI:
324 llvm_unreachable("xnack_mask registers should not be used");
325
326 case AMDGPU::LDS_DIRECT:
327 llvm_unreachable("lds_direct register should not be used");
328
329 case AMDGPU::TBA:
330 case AMDGPU::TBA_LO:
331 case AMDGPU::TBA_HI:
332 case AMDGPU::TMA:
333 case AMDGPU::TMA_LO:
334 case AMDGPU::TMA_HI:
335 llvm_unreachable("trap handler registers should not be used");
336
337 case AMDGPU::SRC_VCCZ:
338 llvm_unreachable("src_vccz register should not be used");
339
340 case AMDGPU::SRC_EXECZ:
341 llvm_unreachable("src_execz register should not be used");
342
343 case AMDGPU::SRC_SCC:
344 llvm_unreachable("src_scc register should not be used");
345
346 default:
347 break;
348 }
349
350 if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
351 AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
352 AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
353 IsSGPR = true;
354 Width = 1;
355 } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
356 AMDGPU::VGPR_16RegClass.contains(Reg)) {
357 IsSGPR = false;
358 Width = 1;
359 } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
360 AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
361 IsSGPR = false;
362 IsAGPR = true;
363 Width = 1;
364 } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
365 IsSGPR = true;
366 Width = 2;
367 } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
368 IsSGPR = false;
369 Width = 2;
370 } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
371 IsSGPR = false;
372 IsAGPR = true;
373 Width = 2;
374 } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
375 IsSGPR = false;
376 Width = 3;
377 } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
378 IsSGPR = true;
379 Width = 3;
380 } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
381 IsSGPR = false;
382 IsAGPR = true;
383 Width = 3;
384 } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
385 IsSGPR = true;
386 Width = 4;
387 } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
388 IsSGPR = false;
389 Width = 4;
390 } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
391 IsSGPR = false;
392 IsAGPR = true;
393 Width = 4;
394 } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
395 IsSGPR = false;
396 Width = 5;
397 } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
398 IsSGPR = true;
399 Width = 5;
400 } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
401 IsSGPR = false;
402 IsAGPR = true;
403 Width = 5;
404 } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
405 IsSGPR = false;
406 Width = 6;
407 } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
408 IsSGPR = true;
409 Width = 6;
410 } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
411 IsSGPR = false;
412 IsAGPR = true;
413 Width = 6;
414 } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
415 IsSGPR = false;
416 Width = 7;
417 } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
418 IsSGPR = true;
419 Width = 7;
420 } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
421 IsSGPR = false;
422 IsAGPR = true;
423 Width = 7;
424 } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
425 IsSGPR = true;
426 Width = 8;
427 } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
428 IsSGPR = false;
429 Width = 8;
430 } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
431 IsSGPR = false;
432 IsAGPR = true;
433 Width = 8;
434 } else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
435 IsSGPR = false;
436 Width = 9;
437 } else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
438 IsSGPR = true;
439 Width = 9;
440 } else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
441 IsSGPR = false;
442 IsAGPR = true;
443 Width = 9;
444 } else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
445 IsSGPR = false;
446 Width = 10;
447 } else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
448 IsSGPR = true;
449 Width = 10;
450 } else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
451 IsSGPR = false;
452 IsAGPR = true;
453 Width = 10;
454 } else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
455 IsSGPR = false;
456 Width = 11;
457 } else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
458 IsSGPR = true;
459 Width = 11;
460 } else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
461 IsSGPR = false;
462 IsAGPR = true;
463 Width = 11;
464 } else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
465 IsSGPR = false;
466 Width = 12;
467 } else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
468 IsSGPR = true;
469 Width = 12;
470 } else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
471 IsSGPR = false;
472 IsAGPR = true;
473 Width = 12;
474 } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
475 IsSGPR = true;
476 Width = 16;
477 } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
478 IsSGPR = false;
479 Width = 16;
480 } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
481 IsSGPR = false;
482 IsAGPR = true;
483 Width = 16;
484 } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
485 IsSGPR = true;
486 Width = 32;
487 } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
488 IsSGPR = false;
489 Width = 32;
490 } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
491 IsSGPR = false;
492 IsAGPR = true;
493 Width = 32;
494 } else {
495 // We only expect TTMP registers or registers that do not belong to
496 // any RC.
497 assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
498 AMDGPU::TTMP_64RegClass.contains(Reg) ||
499 AMDGPU::TTMP_128RegClass.contains(Reg) ||
500 AMDGPU::TTMP_256RegClass.contains(Reg) ||
501 AMDGPU::TTMP_512RegClass.contains(Reg) ||
502 !TRI.getPhysRegBaseClass(Reg)) &&
503 "Unknown register class");
504 }
505 unsigned HWReg = TRI.getHWRegIndex(Reg);
506 int MaxUsed = HWReg + Width - 1;
507 if (IsSGPR) {
508 MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
509 } else if (IsAGPR) {
510 MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
511 } else {
512 MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
513 }
514 }
515
516 if (MI.isCall()) {
517 // Pseudo used just to encode the underlying global. Is there a better
518 // way to track this?
519
520 const MachineOperand *CalleeOp =
521 TII->getNamedOperand(MI, AMDGPU::OpName::callee);
522
523 const Function *Callee = getCalleeFunction(*CalleeOp);
525 CallGraphResourceInfo.end();
526
527 // Avoid crashing on undefined behavior with an illegal call to a
528 // kernel. If a callsite's calling convention doesn't match the
529 // function's, it's undefined behavior. If the callsite calling
530 // convention does match, that would have errored earlier.
531 if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
532 report_fatal_error("invalid call to entry function");
533
534 bool IsIndirect = !Callee || Callee->isDeclaration();
535 if (!IsIndirect)
536 I = CallGraphResourceInfo.find(Callee);
537
538 // FIXME: Call site could have norecurse on it
539 if (!Callee || !Callee->doesNotRecurse()) {
540 Info.HasRecursion = true;
541
542 // TODO: If we happen to know there is no stack usage in the
543 // callgraph, we don't need to assume an infinitely growing stack.
544 if (!MI.isReturn()) {
545 // We don't need to assume an unknown stack size for tail calls.
546
547 // FIXME: This only benefits in the case where the kernel does not
548 // directly call the tail called function. If a kernel directly
549 // calls a tail recursive function, we'll assume maximum stack size
550 // based on the regular call instruction.
551 CalleeFrameSize = std::max(
552 CalleeFrameSize,
553 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
554 }
555 }
556
557 if (IsIndirect || I == CallGraphResourceInfo.end()) {
558 CalleeFrameSize =
559 std::max(CalleeFrameSize,
560 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
561
562 // Register usage of indirect calls gets handled later
563 Info.UsesVCC = true;
564 Info.UsesFlatScratch = ST.hasFlatAddressSpace();
565 Info.HasDynamicallySizedStack = true;
566 Info.HasIndirectCall = true;
567 } else {
568 // We force CodeGen to run in SCC order, so the callee's register
569 // usage etc. should be the cumulative usage of all callees.
570 MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
571 MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
572 MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
573 CalleeFrameSize =
574 std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
575 Info.UsesVCC |= I->second.UsesVCC;
576 Info.UsesFlatScratch |= I->second.UsesFlatScratch;
577 Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
578 Info.HasRecursion |= I->second.HasRecursion;
579 Info.HasIndirectCall |= I->second.HasIndirectCall;
580 }
581 }
582 }
583 }
584
585 Info.NumExplicitSGPR = MaxSGPR + 1;
586 Info.NumVGPR = MaxVGPR + 1;
587 Info.NumAGPR = MaxAGPR + 1;
588 Info.PrivateSegmentSize += CalleeFrameSize;
589
590 return Info;
591}
592
593void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
594 // Collect the maximum number of registers from non-hardware-entrypoints.
595 // All these functions are potential targets for indirect calls.
596 int32_t NonKernelMaxSGPRs = 0;
597 int32_t NonKernelMaxVGPRs = 0;
598 int32_t NonKernelMaxAGPRs = 0;
599
600 for (const auto &I : CallGraphResourceInfo) {
601 if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
602 auto &Info = I.getSecond();
603 NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
604 NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
605 NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
606 }
607 }
608
609 // Add register usage for functions with indirect calls.
610 // For calls to unknown functions, we assume the maximum register usage of
611 // all non-hardware-entrypoints in the current module.
612 for (auto &I : CallGraphResourceInfo) {
613 auto &Info = I.getSecond();
614 if (Info.HasIndirectCall) {
615 Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
616 Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
617 Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
618 }
619 }
620}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
MachineBasicBlock & MBB
static cl::opt< uint32_t > clAssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
static cl::opt< uint32_t > clAssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
#define DEBUG_TYPE
Analyzes how many registers and other resources are used by functions.
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
bool End
Definition: ELF_riscv.cpp:480
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
This class represents an Operation in the Expression.
Generic base class for all target subtargets.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:69
This class contains meta information specific to a module.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & AMDGPUResourceUsageAnalysisID
po_iterator< T > po_begin(const T &G)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
po_iterator< T > po_end(const T &G)
int32_t getTotalNumVGPRs(const GCNSubtarget &ST, int32_t NumAGPR, int32_t NumVGPR) const
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.