LLVM 23.0.0git
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10///
11/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12/// code. When passed an MCAsmStreamer it prints assembly and when passed
13/// an MCObjectStreamer it outputs binary code.
14//
15//===----------------------------------------------------------------------===//
16//
17
18#include "AMDGPUAsmPrinter.h"
19#include "AMDGPU.h"
23#include "AMDGPUTargetMachine.h"
24#include "GCNSubtarget.h"
29#include "R600AsmPrinter.h"
35#include "llvm/ADT/StringSet.h"
43#include "llvm/MC/MCAssembler.h"
44#include "llvm/MC/MCContext.h"
46#include "llvm/MC/MCStreamer.h"
47#include "llvm/MC/MCValue.h"
54
55using namespace llvm;
56using namespace llvm::AMDGPU;
57
58// This should get the default rounding mode from the kernel. We just set the
59// default here, but this could change if the OpenCL rounding mode pragmas are
60// used.
61//
62// The denormal mode here should match what is reported by the OpenCL runtime
63// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
64// can also be override to flush with the -cl-denorms-are-zero compiler flag.
65//
66// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
67// precision, and leaves single precision to flush all and does not report
68// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
69// CL_FP_DENORM for both.
70//
71// FIXME: It seems some instructions do not support single precision denormals
72// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
73// and sin_f32, cos_f32 on most parts).
74
75// We want to use these instructions, and using fp32 denormals also causes
76// instructions to run at the double precision rate for the device so it's
77// probably best to just report no single precision denormals.
84
85static AsmPrinter *
87 std::unique_ptr<MCStreamer> &&Streamer) {
88 return new AMDGPUAsmPrinter(tm, std::move(Streamer));
89}
90
98
99namespace {
100class AMDGPUAsmPrinterHandler : public AsmPrinterHandler {
101protected:
102 AMDGPUAsmPrinter *Asm;
103
104public:
105 AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *A) : Asm(A) {}
106
107 void beginFunction(const MachineFunction *MF) override {}
108
109 void endFunction(const MachineFunction *MF) override { Asm->endFunction(MF); }
110
111 void endModule() override {}
112};
113} // End anonymous namespace
114
116 std::unique_ptr<MCStreamer> Streamer)
117 : AsmPrinter(TM, std::move(Streamer)) {
118 assert(OutStreamer && "AsmPrinter constructed without streamer");
119}
120
122 return "AMDGPU Assembly Printer";
123}
124
126 return &TM.getMCSubtargetInfo();
127}
128
130 if (!OutStreamer)
131 return nullptr;
132 return static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
133}
134
138
139void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
141
142 // TODO: Which one is called first, emitStartOfAsmFile or
143 // emitFunctionBodyStart?
144 if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
145 initializeTargetID(M);
146
149 return;
150
152
155 CodeObjectVersion);
156 HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
157 }
158
161}
162
164 // Init target streamer if it has not yet happened
166 initTargetStreamer(M);
167
168 if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
170
171 // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
172 // Emit HSA Metadata (NT_AMD_HSA_METADATA).
173 if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
174 HSAMetadataStream->end();
175 bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
176 (void)Success;
177 assert(Success && "Malformed HSA Metadata");
178 }
179}
180
182 const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
183 const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
184 const Function &F = MF->getFunction();
185
186 // TODO: We're checking this late, would be nice to check it earlier.
187 if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) {
189 STM.getCPU() + " is only available on code object version 6 or better");
190 }
191
192 // TODO: Which one is called first, emitStartOfAsmFile or
193 // emitFunctionBodyStart?
194 if (!getTargetStreamer()->getTargetID())
195 initializeTargetID(*F.getParent());
196
197 const auto &FunctionTargetID = STM.getTargetID();
198 // Make sure function's xnack settings are compatible with module's
199 // xnack settings.
200 if (FunctionTargetID.isXnackSupported() &&
201 FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
202 FunctionTargetID.getXnackSetting() !=
203 getTargetStreamer()->getTargetID()->getXnackSetting()) {
204 OutContext.reportError(
205 {}, "xnack setting of '" + Twine(MF->getName()) +
206 "' function does not match module xnack setting");
207 return;
208 }
209 // Make sure function's sramecc settings are compatible with module's
210 // sramecc settings.
211 if (FunctionTargetID.isSramEccSupported() &&
212 FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
213 FunctionTargetID.getSramEccSetting() !=
214 getTargetStreamer()->getTargetID()->getSramEccSetting()) {
215 OutContext.reportError(
216 {}, "sramecc setting of '" + Twine(MF->getName()) +
217 "' function does not match module sramecc setting");
218 return;
219 }
220
221 if (!MFI.isEntryFunction())
222 return;
223
224 if (STM.isMesaKernel(F) &&
225 (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
226 F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
227 AMDGPUMCKernelCodeT KernelCode;
228 getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
229 KernelCode.validate(&STM, MF->getContext());
231 }
232
233 if (STM.isAmdHsaOS())
234 HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
235}
236
237/// Set bits in a kernel descriptor MCExpr field:
238/// return ((Dst & ~Mask) | (Value << Shift))
239static const MCExpr *setBits(const MCExpr *Dst, const MCExpr *Value,
240 uint32_t Mask, uint32_t Shift, MCContext &Ctx) {
241 const auto *Shft = MCConstantExpr::create(Shift, Ctx);
242 const auto *Msk = MCConstantExpr::create(Mask, Ctx);
243 Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);
245 Ctx);
246 return Dst;
247}
248
250 const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
251 if (!MFI.isEntryFunction())
252 return;
253
254 assert(TM.getTargetTriple().getOS() == Triple::AMDHSA);
255
256 const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
257 MCContext &Ctx = MF->getContext();
258
260 getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo);
261
262 // Compute inst_pref_size using MCExpr label subtraction for exact code
263 // size. At this point .Lfunc_end has been emitted (by the base AsmPrinter)
264 // right after the function code, so (Lfunc_end - func_sym) gives the
265 // exact function code size in bytes.
266 if (STM.hasInstPrefSize()) {
267 const MCExpr *CodeSizeExpr = MCBinaryExpr::createSub(
270
271 uint32_t Mask, Shift, Width, CacheLineSize;
272 STM.getInstPrefSizeArgs(Mask, Shift, Width, CacheLineSize);
273 const MCExpr *InstPrefSize =
274 AMDGPUMCExpr::createInstPrefSize(CodeSizeExpr, Ctx);
276 setBits(KD.compute_pgm_rsrc3, InstPrefSize, Mask, Shift, Ctx);
277 }
278
279 auto &Streamer = getTargetStreamer()->getStreamer();
280 auto &Context = Streamer.getContext();
281 auto &ObjectFileInfo = *Context.getObjectFileInfo();
282 auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
283
284 Streamer.pushSection();
285 Streamer.switchSection(&ReadOnlySection);
286
287 // CP microcode requires the kernel descriptor to be allocated on 64 byte
288 // alignment.
289 Streamer.emitValueToAlignment(Align(64), 0, 1, 0);
290 ReadOnlySection.ensureMinAlignment(Align(64));
291
292 SmallString<128> KernelName;
293 getNameWithPrefix(KernelName, &MF->getFunction());
295 STM, KernelName, KD, CurrentProgramInfo.NumVGPRsForWavesPerEU,
297 CurrentProgramInfo.NumSGPRsForWavesPerEU,
299 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
300 getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Context),
301 Context),
302 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
303
304 Streamer.popSection();
305}
306
308 Register RegNo = MI->getOperand(0).getReg();
309
311 raw_svector_ostream OS(Str);
312 OS << "implicit-def: "
313 << printReg(RegNo, MF->getSubtarget().getRegisterInfo());
314
315 if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)
316 OS << " : SGPR spill to VGPR lane";
317
318 OutStreamer->AddComment(OS.str());
319 OutStreamer->addBlankLine();
320}
321
323 if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
325 return;
326 }
327
328 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
329 const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
330 if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
331 SmallString<128> SymbolName;
332 getNameWithPrefix(SymbolName, &MF->getFunction()),
335 }
336 if (DumpCodeInstEmitter) {
337 // Disassemble function name label to text.
338 DisasmLines.push_back(MF->getName().str() + ":");
339 DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
340 HexLines.emplace_back("");
341 }
342
344}
345
347 if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
348 // Write a line for the basic block label if it is not only fallthrough.
349 DisasmLines.push_back((Twine("BB") + Twine(getFunctionNumber()) + "_" +
350 Twine(MBB.getNumber()) + ":")
351 .str());
352 DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
353 HexLines.emplace_back("");
354 }
356}
357
360 if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
361 OutContext.reportError({},
362 Twine(GV->getName()) +
363 ": unsupported initializer for address space");
364 return;
365 }
366
367 const Triple::OSType OS = TM.getTargetTriple().getOS();
368 if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) {
370 return;
371 // With object linking, LDS definitions should have been externalized
372 // by earlier passes (e.g. LDS lowering, named barrier lowering).
373 // Only declarations reach here, emitted as SHN_AMDGPU_LDS symbols
374 // so the linker can assign their offsets.
375 assert(GV->isDeclaration() &&
376 "LDS definitions should have been externalized when object "
377 "linking is enabled");
378 }
379
380 MCSymbol *GVSym = getSymbol(GV);
381
382 GVSym->redefineIfPossible();
383 if (GVSym->isDefined() || GVSym->isVariable())
384 report_fatal_error("symbol '" + Twine(GVSym->getName()) +
385 "' is already defined");
386
387 const DataLayout &DL = GV->getDataLayout();
389 Align Alignment = GV->getAlign().value_or(Align(4));
390
391 emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
392 emitLinkage(GV, GVSym);
393 auto *TS = getTargetStreamer();
394 TS->emitAMDGPULDS(GVSym, Size, Alignment);
395 return;
396 }
397
399}
400
402 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(M);
403
404 if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
405 switch (CodeObjectVersion) {
407 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV4>();
408 break;
410 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV5>();
411 break;
413 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV6>();
414 break;
415 default:
416 reportFatalUsageError("unsupported code object version");
417 }
418
419 addAsmPrinterHandler(std::make_unique<AMDGPUAsmPrinterHandler>(this));
420 }
421
423}
424
425/// Mimics GCNSubtarget::computeOccupancy for MCExpr.
426///
427/// Remove dependency on GCNSubtarget and depend only only the necessary values
428/// for said occupancy computation. Should match computeOccupancy implementation
429/// without passing \p STM on.
430const AMDGPUMCExpr *createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
431 const MCExpr *NumVGPRs,
432 unsigned DynamicVGPRBlockSize,
433 const GCNSubtarget &STM, MCContext &Ctx) {
434 unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(STM);
435 unsigned Granule = IsaInfo::getVGPRAllocGranule(STM, DynamicVGPRBlockSize);
436 unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(STM);
437 unsigned Generation = STM.getGeneration();
438
439 auto CreateExpr = [&Ctx](unsigned Value) {
440 return MCConstantExpr::create(Value, Ctx);
441 };
442
444 {CreateExpr(MaxWaves), CreateExpr(Granule),
445 CreateExpr(TargetTotalNumVGPRs),
446 CreateExpr(Generation), CreateExpr(InitOcc),
447 NumSGPRs, NumVGPRs},
448 Ctx);
449}
450
451void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
452 if (F.isDeclaration() || !AMDGPU::isModuleEntryFunctionCC(F.getCallingConv()))
453 return;
454
456 const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
457 MCSymbol *FnSym = TM.getSymbol(&F);
458
459 auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
460 int64_t Val;
461 if (Value->evaluateAsAbsolute(Val)) {
462 Res = Val;
463 return true;
464 }
465 return false;
466 };
467
468 const uint64_t MaxScratchPerWorkitem =
470 MCSymbol *ScratchSizeSymbol =
471 RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);
472 uint64_t ScratchSize;
473 if (ScratchSizeSymbol->isVariable() &&
474 TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
475 ScratchSize > MaxScratchPerWorkitem) {
476 DiagnosticInfoStackSize DiagStackSize(F, ScratchSize, MaxScratchPerWorkitem,
477 DS_Error);
478 F.getContext().diagnose(DiagStackSize);
479 }
480
481 // Validate addressable scalar registers (i.e., prior to added implicit
482 // SGPRs).
483 MCSymbol *NumSGPRSymbol =
484 RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);
486 !STM.hasSGPRInitBug()) {
487 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
488 uint64_t NumSgpr;
489 if (NumSGPRSymbol->isVariable() &&
490 TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&
491 NumSgpr > MaxAddressableNumSGPRs) {
492 F.getContext().diagnose(DiagnosticInfoResourceLimit(
493 F, "addressable scalar registers", NumSgpr, MaxAddressableNumSGPRs,
495 return;
496 }
497 }
498
499 MCSymbol *VCCUsedSymbol =
500 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);
501 MCSymbol *FlatUsedSymbol =
502 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);
503 uint64_t VCCUsed, FlatUsed, NumSgpr;
504
505 if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
506 FlatUsedSymbol->isVariable() &&
507 TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&
508 TryGetMCExprValue(VCCUsedSymbol->getVariableValue(), VCCUsed) &&
509 TryGetMCExprValue(FlatUsedSymbol->getVariableValue(), FlatUsed)) {
510
511 // Recomputes NumSgprs + implicit SGPRs but all symbols should now be
512 // resolvable.
513 NumSgpr += IsaInfo::getNumExtraSGPRs(
514 STM, VCCUsed, FlatUsed,
515 getTargetStreamer()->getTargetID()->isXnackOnOrAny());
517 STM.hasSGPRInitBug()) {
518 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
519 if (NumSgpr > MaxAddressableNumSGPRs) {
520 F.getContext().diagnose(DiagnosticInfoResourceLimit(
521 F, "scalar registers", NumSgpr, MaxAddressableNumSGPRs, DS_Error,
523 return;
524 }
525 }
526
527 MCSymbol *NumVgprSymbol =
528 RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);
529 MCSymbol *NumAgprSymbol =
530 RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);
531 uint64_t NumVgpr, NumAgpr;
532
533 MachineModuleInfo &MMI =
535 MachineFunction *MF = MMI.getMachineFunction(F);
536 if (MF && NumVgprSymbol->isVariable() && NumAgprSymbol->isVariable() &&
537 TryGetMCExprValue(NumVgprSymbol->getVariableValue(), NumVgpr) &&
538 TryGetMCExprValue(NumAgprSymbol->getVariableValue(), NumAgpr)) {
539 const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
540 unsigned MaxWaves = MFI.getMaxWavesPerEU();
541 uint64_t TotalNumVgpr =
542 getTotalNumVGPRs(STM.hasGFX90AInsts(), NumAgpr, NumVgpr);
543 uint64_t NumVGPRsForWavesPerEU =
544 std::max({TotalNumVgpr, (uint64_t)1,
545 (uint64_t)STM.getMinNumVGPRs(
546 MaxWaves, MFI.getDynamicVGPRBlockSize())});
547 uint64_t NumSGPRsForWavesPerEU = std::max(
548 {NumSgpr, (uint64_t)1, (uint64_t)STM.getMinNumSGPRs(MaxWaves)});
549 const MCExpr *OccupancyExpr = createOccupancy(
550 STM.getOccupancyWithWorkGroupSizes(*MF).second,
551 MCConstantExpr::create(NumSGPRsForWavesPerEU, OutContext),
552 MCConstantExpr::create(NumVGPRsForWavesPerEU, OutContext),
554 uint64_t Occupancy;
555
556 const auto [MinWEU, MaxWEU] = AMDGPU::getIntegerPairAttribute(
557 F, "amdgpu-waves-per-eu", {0, 0}, true);
558
559 if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {
560 DiagnosticInfoOptimizationFailure Diag(
561 F, F.getSubprogram(),
562 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
563 "'" +
564 F.getName() + "': desired occupancy was " + Twine(MinWEU) +
565 ", final occupancy is " + Twine(Occupancy));
566 F.getContext().diagnose(Diag);
567 return;
568 }
569 }
570 }
571}
572
573static void appendTypeEncoding(std::string &Enc, Type *Ty, const DataLayout &DL,
574 bool IsReturnType) {
575 if (Ty->isVoidTy()) {
576 Enc += 'v';
577 return;
578 }
579 unsigned Bits = DL.getTypeSizeInBits(Ty);
580 // Zero-sized non-void types (e.g. `{}` or `[0 x i8]`) consume no ABI
581 // registers. For returns, emit the same no-result marker as void so the
582 // parameter encoding still has an explicit return-type prefix.
583 if (Bits == 0) {
584 if (IsReturnType)
585 Enc += 'v';
586 return;
587 }
588 if (Bits <= 32)
589 Enc += 'i';
590 else if (Bits <= 64)
591 Enc += 'l';
592 else
593 Enc.append(divideCeil(Bits, 32), 'i');
594}
595
596static std::string computeTypeId(const FunctionType *FTy,
597 const DataLayout &DL) {
598 std::string Enc;
599 appendTypeEncoding(Enc, FTy->getReturnType(), DL, /*IsReturnType=*/true);
600 for (Type *ParamTy : FTy->params())
601 appendTypeEncoding(Enc, ParamTy, DL, /*IsReturnType=*/false);
602 return Enc;
603}
604
605void AMDGPUAsmPrinter::collectCallEdge(const MachineInstr &MI) {
607 return;
608 const SIInstrInfo *TII = MF->getSubtarget<GCNSubtarget>().getInstrInfo();
609 const MachineOperand *Callee =
610 TII->getNamedOperand(MI, AMDGPU::OpName::callee);
611 if (!Callee || !Callee->isGlobal())
612 return;
613 DirectCallEdges.insert(
614 {getSymbol(&MF->getFunction()), getSymbol(Callee->getGlobal())});
615}
616
617void AMDGPUAsmPrinter::emitAMDGPUInfo(Module &M) {
619 return;
620
621 const NamedMDNode *LDSMD = M.getNamedMetadata("amdgpu.lds.uses");
622 bool HasLDSUses = LDSMD && LDSMD->getNumOperands() > 0;
623
624 const NamedMDNode *BarMD = M.getNamedMetadata("amdgpu.named_barrier.uses");
625 bool HasNamedBarriers = BarMD && BarMD->getNumOperands() > 0;
626
627 // Collect address-taken functions (with type IDs) and indirect call sites.
628 DenseMap<const Function *, std::string> AddrTakenTypeIds;
629 using IndirectCallInfo = std::pair<const Function *, std::string>;
631
632 for (const Function &F : M) {
633 bool IsKernel = AMDGPU::isKernel(F.getCallingConv());
634
635 if (!IsKernel && F.hasAddressTaken(/*PutOffender=*/nullptr,
636 /*IgnoreCallbackUses=*/false,
637 /*IgnoreAssumeLikeCalls=*/true,
638 /*IgnoreLLVMUsed=*/true)) {
639 AddrTakenTypeIds[&F] =
640 computeTypeId(F.getFunctionType(), M.getDataLayout());
641 }
642
643 if (F.isDeclaration())
644 continue;
645
646 StringSet<> SeenTypeIds;
647 for (const BasicBlock &BB : F) {
648 for (const Instruction &I : BB) {
649 const auto *CB = dyn_cast<CallBase>(&I);
650 if (!CB || !CB->isIndirectCall())
651 continue;
652 std::string TId =
653 computeTypeId(CB->getFunctionType(), M.getDataLayout());
654 if (SeenTypeIds.insert(TId).second)
655 IndirectCalls.push_back({&F, std::move(TId)});
656 }
657 }
658 }
659
660 if (FunctionInfos.empty() && DirectCallEdges.empty() && !HasLDSUses &&
661 !HasNamedBarriers && AddrTakenTypeIds.empty() && IndirectCalls.empty())
662 return;
663
664 AMDGPU::InfoSectionData Data;
665 Data.Funcs = std::move(FunctionInfos);
666
667 for (auto &[F, TypeId] : AddrTakenTypeIds) {
668 MCSymbol *Sym = getSymbol(F);
669 Data.TypeIds.push_back({Sym, TypeId});
670 }
671
672 for (auto &[CallerSym, CalleeSym] : DirectCallEdges)
673 Data.Calls.push_back({CallerSym, CalleeSym});
674 DirectCallEdges.clear();
675
676 if (HasLDSUses) {
677 for (const MDNode *N : LDSMD->operands()) {
678 auto *Func = mdconst::extract<Function>(N->getOperand(0));
679 auto *LdsVar = mdconst::extract<GlobalVariable>(N->getOperand(1));
680 Data.Uses.push_back({getSymbol(Func), getSymbol(LdsVar)});
681 }
682 }
683
684 if (HasNamedBarriers) {
685 for (const MDNode *N : BarMD->operands()) {
686 auto *BarVar = mdconst::extract<GlobalVariable>(N->getOperand(0));
687 MCSymbol *BarSym = getSymbol(BarVar);
688 for (unsigned I = 1, E = N->getNumOperands(); I < E; ++I) {
689 auto *Func = mdconst::extract<Function>(N->getOperand(I));
690 Data.Uses.push_back({getSymbol(Func), BarSym});
691 }
692 }
693 }
694
695 for (auto &[Caller, Enc] : IndirectCalls) {
696 MCSymbol *CallerSym = getSymbol(Caller);
697 Data.IndirectCalls.push_back({CallerSym, Enc});
698 }
699
701}
702
704 // Pad with s_code_end to help tools and guard against instruction prefetch
705 // causing stale data in caches. Arguably this should be done by the linker,
706 // which is why this isn't done for Mesa.
707 // Don't do it if there is no code.
708 const MCSubtargetInfo &STI = *getGlobalSTI();
709 if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
713 if (TextSect->hasInstructions()) {
714 OutStreamer->switchSection(TextSect);
716 }
717 }
718
719 // Emit the unified .amdgpu.info section (per-function resources, call graph,
720 // LDS/named-barrier use edges, indirect calls, and address-taken type IDs).
721 emitAMDGPUInfo(M);
722
723 // Assign expressions which can only be resolved when all other functions are
724 // known.
725 RI.finalize(OutContext);
726
727 // Switch section and emit all GPR maximums within the processed module.
728 OutStreamer->pushSection();
729 MCSectionELF *MaxGPRSection =
730 OutContext.getELFSection(".AMDGPU.gpr_maximums", ELF::SHT_PROGBITS, 0);
731 OutStreamer->switchSection(MaxGPRSection);
733 RI.getMaxVGPRSymbol(OutContext), RI.getMaxAGPRSymbol(OutContext),
734 RI.getMaxSGPRSymbol(OutContext), RI.getMaxNamedBarrierSymbol(OutContext));
735 OutStreamer->popSection();
736
737 // In the object-linking pipeline per-function resource MCExprs reference
738 // external callee symbols that cannot be evaluated here, so cross-TU limit
739 // checks would silently no-op for every non-leaf function. Defer resource
740 // sanity checking to the linker, which re-validates against the aggregated
741 // call graph in the combined .amdgpu.info metadata.
743 for (Function &F : M.functions())
744 validateMCResourceInfo(F);
745 }
746
747 RI.reset();
748
750}
751
752SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) {
754 raw_svector_ostream OSS(Str);
755 auto &Streamer = getTargetStreamer()->getStreamer();
756 auto &Context = Streamer.getContext();
757 const MCExpr *New = foldAMDGPUMCExpr(Value, Context);
758 printAMDGPUMCExpr(New, OSS, &MAI);
759 return Str;
760}
761
762// Print comments that apply to both callable functions and entry points.
763void AMDGPUAsmPrinter::emitCommonFunctionComments(
764 const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR,
765 const MCExpr *NumSGPR, const MCExpr *ScratchSize, uint64_t CodeSize,
766 const AMDGPUMachineFunctionInfo *MFI) {
767 OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
768 OutStreamer->emitRawComment(" TotalNumSgprs: " + getMCExprStr(NumSGPR),
769 false);
770 OutStreamer->emitRawComment(" NumVgprs: " + getMCExprStr(NumVGPR), false);
771 if (NumAGPR && TotalNumVGPR) {
772 OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(NumAGPR), false);
773 OutStreamer->emitRawComment(" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),
774 false);
775 }
776 OutStreamer->emitRawComment(" ScratchSize: " + getMCExprStr(ScratchSize),
777 false);
778 OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
779 false);
780}
781
782const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
783 const MachineFunction &MF) const {
784 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
785 MCContext &Ctx = MF.getContext();
786 uint16_t KernelCodeProperties = 0;
787 const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();
788
789 if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
790 KernelCodeProperties |=
791 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
792 }
793 if (UserSGPRInfo.hasDispatchPtr()) {
794 KernelCodeProperties |=
795 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
796 }
797 if (UserSGPRInfo.hasQueuePtr()) {
798 KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
799 }
800 if (UserSGPRInfo.hasKernargSegmentPtr()) {
801 KernelCodeProperties |=
802 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
803 }
804 if (UserSGPRInfo.hasDispatchID()) {
805 KernelCodeProperties |=
806 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
807 }
808 if (UserSGPRInfo.hasFlatScratchInit()) {
809 KernelCodeProperties |=
810 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
811 }
812 if (UserSGPRInfo.hasPrivateSegmentSize()) {
813 KernelCodeProperties |=
814 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
815 }
816 if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
817 KernelCodeProperties |=
818 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
819 }
820
821 // CurrentProgramInfo.DynamicCallStack is a MCExpr and could be
822 // un-evaluatable at this point so it cannot be conditionally checked here.
823 // Instead, we'll directly shift the possibly unknown MCExpr into its place
824 // and bitwise-or it into KernelCodeProperties.
825 const MCExpr *KernelCodePropExpr =
826 MCConstantExpr::create(KernelCodeProperties, Ctx);
827 const MCExpr *OrValue = MCConstantExpr::create(
828 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);
829 OrValue = MCBinaryExpr::createShl(CurrentProgramInfo.DynamicCallStack,
830 OrValue, Ctx);
831 KernelCodePropExpr = MCBinaryExpr::createOr(KernelCodePropExpr, OrValue, Ctx);
832
833 return KernelCodePropExpr;
834}
835
836MCKernelDescriptor
837AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
838 const SIProgramInfo &PI) const {
839 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
840 const Function &F = MF.getFunction();
841 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
842 MCContext &Ctx = MF.getContext();
843
844 MCKernelDescriptor KernelDescriptor;
845
846 KernelDescriptor.group_segment_fixed_size =
848 KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
849
850 Align MaxKernArgAlign;
851 KernelDescriptor.kernarg_size = MCConstantExpr::create(
852 STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);
853
854 KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM, Ctx);
855 KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(STM, Ctx);
856 KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
857
858 int64_t PGM_Rsrc3 = 1;
859 bool EvaluatableRsrc3 =
860 CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGM_Rsrc3);
861 (void)PGM_Rsrc3;
862 (void)EvaluatableRsrc3;
864 STM.hasGFX90AInsts() || STM.hasGFX1250Insts() || !EvaluatableRsrc3 ||
865 static_cast<uint64_t>(PGM_Rsrc3) == 0);
866 KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3;
867
868 KernelDescriptor.kernarg_preload = MCConstantExpr::create(
869 AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
870 Ctx);
871
872 return KernelDescriptor;
873}
874
876 // Init target streamer lazily on the first function so that previous passes
877 // can set metadata.
879 initTargetStreamer(*MF.getFunction().getParent());
880
881 ResourceUsage =
883 CurrentProgramInfo.reset(MF);
884
885 const AMDGPUMachineFunctionInfo *MFI =
886 MF.getInfo<AMDGPUMachineFunctionInfo>();
887 MCContext &Ctx = MF.getContext();
888
889 // The starting address of all shader programs must be 256 bytes aligned.
890 // Regular functions just need the basic required instruction alignment.
891 MF.ensureAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
892
894
895 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
897 // FIXME: This should be an explicit check for Mesa.
898 if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
899 MCSectionELF *ConfigSection =
900 Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
901 OutStreamer->switchSection(ConfigSection);
902 }
903
904 RI.gatherResourceInfo(MF, *ResourceUsage, OutContext);
905
908 *ResourceUsage;
909 FunctionInfos.push_back(
910 {/*NumSGPR=*/static_cast<uint32_t>(RU.NumExplicitSGPR),
911 /*NumArchVGPR=*/static_cast<uint32_t>(RU.NumVGPR),
912 /*NumAccVGPR=*/static_cast<uint32_t>(RU.NumAGPR),
913 /*PrivateSegmentSize=*/static_cast<uint32_t>(RU.PrivateSegmentSize),
914 /*UsesVCC=*/RU.UsesVCC,
915 /*UsesFlatScratch=*/RU.UsesFlatScratch,
916 /*HasDynStack=*/RU.HasDynamicallySizedStack,
917 /*Sym=*/getSymbol(&MF.getFunction())});
918 }
919
920 if (MFI->isModuleEntryFunction()) {
921 getSIProgramInfo(CurrentProgramInfo, MF);
922 }
923
924 if (STM.isAmdPalOS()) {
925 if (MFI->isEntryFunction())
926 EmitPALMetadata(MF, CurrentProgramInfo);
927 else if (MFI->isModuleEntryFunction())
928 emitPALFunctionMetadata(MF);
929 } else if (!STM.isAmdHsaOS()) {
930 EmitProgramInfoSI(MF, CurrentProgramInfo);
931 }
932
933 DumpCodeInstEmitter = nullptr;
934 if (STM.dumpCode()) {
935 // For -dumpcode, get the assembler out of the streamer. This only works
936 // with -filetype=obj.
937 MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
938 if (Assembler)
939 DumpCodeInstEmitter = Assembler->getEmitterPtr();
940 }
941
942 DisasmLines.clear();
943 HexLines.clear();
945
947
948 emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
949 STM.hasMAIInsts());
950
951 {
954 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),
955 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),
956 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),
957 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumNamedBarrier,
958 OutContext),
959 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
960 OutContext),
961 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),
962 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
963 OutContext),
964 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
965 OutContext),
966 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,
967 OutContext),
968 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
969 OutContext));
970 }
971
972 // Emit _dvgpr$ symbol when appropriate.
973 emitDVgprSymbol(MF);
974
975 if (isVerbose()) {
976 MCSectionELF *CommentSection =
977 Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
978 OutStreamer->switchSection(CommentSection);
979
980 if (!MFI->isEntryFunction()) {
982 OutStreamer->emitRawComment(" Function info:", false);
983
984 emitCommonFunctionComments(
985 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)
986 ->getVariableValue(),
987 STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),
988 RIK::RIK_NumAGPR, OutContext)
989 ->getVariableValue()
990 : nullptr,
991 RI.createTotalNumVGPRs(MF, Ctx),
992 RI.createTotalNumSGPRs(
993 MF,
994 MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
995 Ctx),
996 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
998 ->getVariableValue(),
999 CurrentProgramInfo.getFunctionCodeSize(MF), MFI);
1000 return false;
1001 }
1002
1003 OutStreamer->emitRawComment(" Kernel info:", false);
1004 emitCommonFunctionComments(
1005 CurrentProgramInfo.NumArchVGPR,
1006 STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR : nullptr,
1007 CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,
1008 CurrentProgramInfo.ScratchSize,
1009 CurrentProgramInfo.getFunctionCodeSize(MF), MFI);
1010
1011 OutStreamer->emitRawComment(
1012 " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
1013 OutStreamer->emitRawComment(
1014 " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
1015 OutStreamer->emitRawComment(
1016 " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
1017 " bytes/workgroup (compile time only)",
1018 false);
1019
1020 OutStreamer->emitRawComment(
1021 " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks), false);
1022
1023 OutStreamer->emitRawComment(
1024 " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks), false);
1025
1026 OutStreamer->emitRawComment(
1027 " NumSGPRsForWavesPerEU: " +
1028 getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU),
1029 false);
1030 OutStreamer->emitRawComment(
1031 " NumVGPRsForWavesPerEU: " +
1032 getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU),
1033 false);
1034
1035 if (STM.hasGFX90AInsts()) {
1036 const MCExpr *AdjustedAccum = MCBinaryExpr::createAdd(
1037 CurrentProgramInfo.AccumOffset, MCConstantExpr::create(1, Ctx), Ctx);
1038 AdjustedAccum = MCBinaryExpr::createMul(
1039 AdjustedAccum, MCConstantExpr::create(4, Ctx), Ctx);
1040 OutStreamer->emitRawComment(
1041 " AccumOffset: " + getMCExprStr(AdjustedAccum), false);
1042 }
1043
1044 if (STM.hasGFX1250Insts())
1045 OutStreamer->emitRawComment(
1046 " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),
1047 false);
1048
1049 OutStreamer->emitRawComment(
1050 " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false);
1051
1052 OutStreamer->emitRawComment(
1053 " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
1054
1055 OutStreamer->emitRawComment(
1056 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
1057 getMCExprStr(CurrentProgramInfo.ScratchEnable),
1058 false);
1059 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
1060 Twine(CurrentProgramInfo.UserSGPR),
1061 false);
1062 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
1063 Twine(CurrentProgramInfo.TrapHandlerEnable),
1064 false);
1065 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
1066 Twine(CurrentProgramInfo.TGIdXEnable),
1067 false);
1068 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
1069 Twine(CurrentProgramInfo.TGIdYEnable),
1070 false);
1071 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
1072 Twine(CurrentProgramInfo.TGIdZEnable),
1073 false);
1074 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
1075 Twine(CurrentProgramInfo.TIdIGCompCount),
1076 false);
1077
1078 [[maybe_unused]] int64_t PGMRSrc3;
1080 STM.hasGFX90AInsts() || STM.hasGFX1250Insts() ||
1081 (CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGMRSrc3) &&
1082 static_cast<uint64_t>(PGMRSrc3) == 0));
1083 if (STM.hasGFX90AInsts()) {
1084 OutStreamer->emitRawComment(
1085 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
1086 getMCExprStr(MCKernelDescriptor::bits_get(
1087 CurrentProgramInfo.ComputePGMRSrc3,
1088 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
1089 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
1090 false);
1091 OutStreamer->emitRawComment(
1092 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
1093 getMCExprStr(MCKernelDescriptor::bits_get(
1094 CurrentProgramInfo.ComputePGMRSrc3,
1095 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
1096 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
1097 false);
1098 }
1099 }
1100
1101 if (DumpCodeInstEmitter) {
1102
1103 OutStreamer->switchSection(
1104 Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
1105
1106 for (size_t i = 0; i < DisasmLines.size(); ++i) {
1107 std::string Comment = "\n";
1108 if (!HexLines[i].empty()) {
1109 Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
1110 Comment += " ; " + HexLines[i] + "\n";
1111 }
1112
1113 OutStreamer->emitBytes(StringRef(DisasmLines[i]));
1114 OutStreamer->emitBytes(StringRef(Comment));
1115 }
1116 }
1117
1118 return false;
1119}
1120
1121// When appropriate, add a _dvgpr$ symbol, with the value of the function
1122// symbol, plus an offset encoding one less than the number of VGPR blocks used
1123// by the function in bits 5..3 of the symbol value. A "VGPR block" can be
1124// either 16 VGPRs (for a max of 128), or 32 VGPRs (for a max of 256). This is
1125// used by a front-end to have functions that are chained rather than called,
1126// and a dispatcher that dynamically resizes the VGPR count before dispatching
1127// to a function.
1128void AMDGPUAsmPrinter::emitDVgprSymbol(MachineFunction &MF) {
1130 if (MFI.isDynamicVGPREnabled() &&
1132 MCContext &Ctx = MF.getContext();
1133 unsigned BlockSize = MFI.getDynamicVGPRBlockSize();
1134 MCValue NumVGPRs;
1135 if (!CurrentProgramInfo.NumVGPRsForWavesPerEU->evaluateAsRelocatable(
1136 NumVGPRs, nullptr) ||
1137 !NumVGPRs.isAbsolute()) {
1138 llvm_unreachable("unable to resolve NumVGPRs for _dvgpr$ symbol");
1139 }
1140 // Calculate number of VGPR blocks.
1141 // Treat 0 VGPRs as 1 VGPR to avoid underflowing.
1142 unsigned NumBlocks =
1143 divideCeil(std::max(unsigned(NumVGPRs.getConstant()), 1U), BlockSize);
1144
1145 if (NumBlocks > 8) {
1147 "too many DVGPR blocks for _dvgpr$ symbol for '" +
1148 Twine(CurrentFnSym->getName()) + "'");
1149 return;
1150 }
1151 unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;
1152 // Add to function symbol to create _dvgpr$ symbol.
1153 const MCExpr *DVgprFuncVal = MCBinaryExpr::createAdd(
1155 MCConstantExpr::create(EncodedNumBlocks, Ctx), Ctx);
1156 MCSymbol *DVgprFuncSym =
1157 Ctx.getOrCreateSymbol(Twine("_dvgpr$") + CurrentFnSym->getName());
1158 OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
1159 emitVisibility(DVgprFuncSym, MF.getFunction().getVisibility());
1160 emitLinkage(&MF.getFunction(), DVgprFuncSym);
1161 }
1162}
1163
1164// TODO: Fold this into emitFunctionBodyStart.
1165void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
1166 // In the beginning all features are either 'Any' or 'NotSupported',
1167 // depending on global target features. This will cover empty modules.
1169 getGlobalSTI()->getFeatureString());
1170
1171 // If module is empty, we are done.
1172 if (M.empty())
1173 return;
1174
1175 // If module is not empty, need to find first 'Off' or 'On' feature
1176 // setting per feature from functions in module.
1177 for (auto &F : M) {
1178 auto &TSTargetID = getTargetStreamer()->getTargetID();
1179 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
1180 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
1181 break;
1182
1183 const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
1184 const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
1185 if (TSTargetID->isXnackSupported())
1186 if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
1187 TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
1188 if (TSTargetID->isSramEccSupported())
1189 if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
1190 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
1191 }
1192}
1193
1194// AccumOffset computed for the MCExpr equivalent of:
1195// alignTo(std::max(1, NumVGPR), 4) / 4 - 1;
1196static const MCExpr *computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx) {
1197 const MCExpr *ConstFour = MCConstantExpr::create(4, Ctx);
1198 const MCExpr *ConstOne = MCConstantExpr::create(1, Ctx);
1199
1200 // Can't be lower than 1 for subsequent alignTo.
1201 const MCExpr *MaximumTaken =
1202 AMDGPUMCExpr::createMax({ConstOne, NumVGPR}, Ctx);
1203
1204 // Practically, it's computing divideCeil(MaximumTaken, 4).
1205 const MCExpr *DivCeil = MCBinaryExpr::createDiv(
1206 AMDGPUMCExpr::createAlignTo(MaximumTaken, ConstFour, Ctx), ConstFour,
1207 Ctx);
1208
1209 return MCBinaryExpr::createSub(DivCeil, ConstOne, Ctx);
1210}
1211
1212void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1213 const MachineFunction &MF) {
1214 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1215 MCContext &Ctx = MF.getContext();
1216
1217 auto CreateExpr = [&Ctx](int64_t Value) {
1218 return MCConstantExpr::create(Value, Ctx);
1219 };
1220
1221 auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
1222 int64_t Val;
1223 if (Value->evaluateAsAbsolute(Val)) {
1224 Res = Val;
1225 return true;
1226 }
1227 return false;
1228 };
1229
1230 auto GetSymRefExpr =
1231 [&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
1232 MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);
1233 return MCSymbolRefExpr::create(Sym, Ctx);
1234 };
1235
1237 ProgInfo.NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);
1238 ProgInfo.NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);
1240 ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
1241
1242 ProgInfo.AccumOffset = computeAccumOffset(ProgInfo.NumArchVGPR, Ctx);
1243 ProgInfo.TgSplit = STM.isTgSplitEnabled();
1244 ProgInfo.NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);
1245 ProgInfo.ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);
1246 ProgInfo.VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);
1247 ProgInfo.FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);
1248 ProgInfo.DynamicCallStack =
1249 MCBinaryExpr::createOr(GetSymRefExpr(RIK::RIK_HasDynSizedStack),
1250 GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
1251
1252 const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx);
1253 const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo(
1254 GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx);
1255 ProgInfo.NamedBarCnt = MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx);
1256
1257 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1258
1259 // The calculations related to SGPR/VGPR blocks are
1260 // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
1261 // unified.
1262 const MCExpr *ExtraSGPRs = AMDGPUMCExpr::createExtraSGPRs(
1263 ProgInfo.VCCUsed, ProgInfo.FlatUsed,
1264 getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Ctx);
1265
1266 // Check the addressable register limit before we add ExtraSGPRs.
1268 !STM.hasSGPRInitBug()) {
1269 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
1270 uint64_t NumSgpr;
1271 if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&
1272 NumSgpr > MaxAddressableNumSGPRs) {
1273 // This can happen due to a compiler bug or when using inline asm.
1274 LLVMContext &Ctx = MF.getFunction().getContext();
1275 Ctx.diagnose(DiagnosticInfoResourceLimit(
1276 MF.getFunction(), "addressable scalar registers", NumSgpr,
1277 MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit));
1278 ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);
1279 }
1280 }
1281
1282 // Account for extra SGPRs and VGPRs reserved for debugger use.
1283 ProgInfo.NumSGPR = MCBinaryExpr::createAdd(ProgInfo.NumSGPR, ExtraSGPRs, Ctx);
1284
1285 const Function &F = MF.getFunction();
1286
1287 // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
1288 // dispatch registers as function args.
1289 unsigned WaveDispatchNumSGPR = MFI->getNumWaveDispatchSGPRs(),
1290 WaveDispatchNumVGPR = MFI->getNumWaveDispatchVGPRs();
1291
1292 if (WaveDispatchNumSGPR) {
1294 {ProgInfo.NumSGPR,
1295 MCBinaryExpr::createAdd(CreateExpr(WaveDispatchNumSGPR), ExtraSGPRs,
1296 Ctx)},
1297 Ctx);
1298 }
1299
1300 if (WaveDispatchNumVGPR) {
1302 {ProgInfo.NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);
1303
1305 ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
1306 }
1307
1308 // Adjust number of registers used to meet default/requested minimum/maximum
1309 // number of waves per execution unit request.
1310 unsigned MaxWaves = MFI->getMaxWavesPerEU();
1311 ProgInfo.NumSGPRsForWavesPerEU =
1312 AMDGPUMCExpr::createMax({ProgInfo.NumSGPR, CreateExpr(1ul),
1313 CreateExpr(STM.getMinNumSGPRs(MaxWaves))},
1314 Ctx);
1315 ProgInfo.NumVGPRsForWavesPerEU =
1316 AMDGPUMCExpr::createMax({ProgInfo.NumVGPR, CreateExpr(1ul),
1317 CreateExpr(STM.getMinNumVGPRs(
1318 MaxWaves, MFI->getDynamicVGPRBlockSize()))},
1319 Ctx);
1320
1322 STM.hasSGPRInitBug()) {
1323 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
1324 uint64_t NumSgpr;
1325 if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&
1326 NumSgpr > MaxAddressableNumSGPRs) {
1327 // This can happen due to a compiler bug or when using inline asm to use
1328 // the registers which are usually reserved for vcc etc.
1329 LLVMContext &Ctx = MF.getFunction().getContext();
1330 Ctx.diagnose(DiagnosticInfoResourceLimit(
1331 MF.getFunction(), "scalar registers", NumSgpr, MaxAddressableNumSGPRs,
1333 ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs);
1334 ProgInfo.NumSGPRsForWavesPerEU = CreateExpr(MaxAddressableNumSGPRs);
1335 }
1336 }
1337
1338 if (STM.hasSGPRInitBug()) {
1339 ProgInfo.NumSGPR =
1341 ProgInfo.NumSGPRsForWavesPerEU =
1343 }
1344
1345 if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
1346 LLVMContext &Ctx = MF.getFunction().getContext();
1347 Ctx.diagnose(DiagnosticInfoResourceLimit(
1348 MF.getFunction(), "user SGPRs", MFI->getNumUserSGPRs(),
1350 }
1351
1352 if (MFI->getLDSSize() > STM.getAddressableLocalMemorySize()) {
1353 LLVMContext &Ctx = MF.getFunction().getContext();
1354 Ctx.diagnose(DiagnosticInfoResourceLimit(
1355 MF.getFunction(), "local memory", MFI->getLDSSize(),
1357 }
1358 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
1359 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
1360 auto GetNumGPRBlocks = [&CreateExpr, &Ctx](const MCExpr *NumGPR,
1361 unsigned Granule) {
1362 const MCExpr *OneConst = CreateExpr(1ul);
1363 const MCExpr *GranuleConst = CreateExpr(Granule);
1364 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
1365 const MCExpr *AlignToGPR =
1366 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
1367 const MCExpr *DivGPR =
1368 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
1369 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
1370 return SubGPR;
1371 };
1372 // GFX10+ will always allocate 128 SGPRs and this field must be 0
1374 ProgInfo.SGPRBlocks = CreateExpr(0ul);
1375 } else {
1376 ProgInfo.SGPRBlocks = GetNumGPRBlocks(ProgInfo.NumSGPRsForWavesPerEU,
1378 }
1379 ProgInfo.VGPRBlocks = GetNumGPRBlocks(ProgInfo.NumVGPRsForWavesPerEU,
1381
1382 const SIModeRegisterDefaults Mode = MFI->getMode();
1383
1384 // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
1385 // register.
1386 ProgInfo.FloatMode = getFPMode(Mode);
1387
1388 ProgInfo.IEEEMode = Mode.IEEE;
1389
1390 // Make clamp modifier on NaN input returns 0.
1391 ProgInfo.DX10Clamp = Mode.DX10Clamp;
1392
1393 unsigned LDSAlignShift = 8;
1394 switch (getLdsDwGranularity(STM)) {
1395 case 512:
1396 case 320:
1397 LDSAlignShift = 11;
1398 break;
1399 case 128:
1400 LDSAlignShift = 9;
1401 break;
1402 case 64:
1403 LDSAlignShift = 8;
1404 break;
1405 default:
1406 llvm_unreachable("invald LDS block size");
1407 }
1408
1409 ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
1410 ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
1411
1412 ProgInfo.LDSSize = MFI->getLDSSize();
1413 ProgInfo.LDSBlocks =
1414 alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1415
1416 // The MCExpr equivalent of divideCeil.
1417 auto DivideCeil = [&Ctx](const MCExpr *Numerator, const MCExpr *Denominator) {
1418 const MCExpr *Ceil =
1419 AMDGPUMCExpr::createAlignTo(Numerator, Denominator, Ctx);
1420 return MCBinaryExpr::createDiv(Ceil, Denominator, Ctx);
1421 };
1422
1423 // Scratch is allocated in 64-dword or 256-dword blocks.
1424 unsigned ScratchAlignShift =
1425 STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
1426 // We need to program the hardware with the amount of scratch memory that
1427 // is used by the entire wave. ProgInfo.ScratchSize is the amount of
1428 // scratch memory used per thread.
1429 ProgInfo.ScratchBlocks = DivideCeil(
1431 CreateExpr(STM.getWavefrontSize()), Ctx),
1432 CreateExpr(1ULL << ScratchAlignShift));
1433
1434 if (STM.supportsWGP()) {
1435 ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
1436 }
1437
1438 if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
1439 ProgInfo.MemOrdered = 1;
1440 ProgInfo.FwdProgress = !F.hasFnAttribute("amdgpu-no-fwd-progress");
1441 }
1442
1443 // 0 = X, 1 = XY, 2 = XYZ
1444 unsigned TIDIGCompCnt = 0;
1445 if (MFI->hasWorkItemIDZ())
1446 TIDIGCompCnt = 2;
1447 else if (MFI->hasWorkItemIDY())
1448 TIDIGCompCnt = 1;
1449
1450 // The private segment wave byte offset is the last of the system SGPRs. We
1451 // initially assumed it was allocated, and may have used it. It shouldn't harm
1452 // anything to disable it if we know the stack isn't used here. We may still
1453 // have emitted code reading it to initialize scratch, but if that's unused
1454 // reading garbage should be OK.
1457 MCConstantExpr::create(0, Ctx), Ctx),
1458 ProgInfo.DynamicCallStack, Ctx);
1459
1460 ProgInfo.UserSGPR = MFI->getNumUserSGPRs();
1461 // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
1462 ProgInfo.TrapHandlerEnable = STM.isAmdHsaOS() ? 0 : STM.hasTrapHandler();
1463 ProgInfo.TGIdXEnable = MFI->hasWorkGroupIDX();
1464 ProgInfo.TGIdYEnable = MFI->hasWorkGroupIDY();
1465 ProgInfo.TGIdZEnable = MFI->hasWorkGroupIDZ();
1466 ProgInfo.TGSizeEnable = MFI->hasWorkGroupInfo();
1467 ProgInfo.TIdIGCompCount = TIDIGCompCnt;
1468 ProgInfo.EXCPEnMSB = 0;
1469 // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
1470 ProgInfo.LdsSize = STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks;
1471 ProgInfo.EXCPEnable = 0;
1472
1473 if (STM.hasGFX90AInsts()) {
1474 ProgInfo.ComputePGMRSrc3 =
1475 setBits(ProgInfo.ComputePGMRSrc3, ProgInfo.AccumOffset,
1476 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1477 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, Ctx);
1478 ProgInfo.ComputePGMRSrc3 =
1479 setBits(ProgInfo.ComputePGMRSrc3, CreateExpr(ProgInfo.TgSplit),
1480 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1481 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, Ctx);
1482 }
1483
1484 if (STM.hasGFX1250Insts())
1485 ProgInfo.ComputePGMRSrc3 =
1486 setBits(ProgInfo.ComputePGMRSrc3, ProgInfo.NamedBarCnt,
1487 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
1488 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT, Ctx);
1489
1490 ProgInfo.Occupancy = createOccupancy(
1491 STM.computeOccupancy(F, ProgInfo.LDSSize).second,
1493 MFI->getDynamicVGPRBlockSize(), STM, Ctx);
1494
1495 const auto [MinWEU, MaxWEU] =
1496 AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
1497 uint64_t Occupancy;
1498 if (TryGetMCExprValue(ProgInfo.Occupancy, Occupancy) && Occupancy < MinWEU) {
1499 DiagnosticInfoOptimizationFailure Diag(
1500 F, F.getSubprogram(),
1501 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
1502 "'" +
1503 F.getName() + "': desired occupancy was " + Twine(MinWEU) +
1504 ", final occupancy is " + Twine(Occupancy));
1505 F.getContext().diagnose(Diag);
1506 }
1507}
1508
1509static unsigned getRsrcReg(CallingConv::ID CallConv) {
1510 switch (CallConv) {
1511 default:
1512 [[fallthrough]];
1527 }
1528}
1529
1530void AMDGPUAsmPrinter::EmitProgramInfoSI(
1531 const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) {
1532 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1533 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1534 unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
1535 MCContext &Ctx = MF.getContext();
1536
1537 // (((Value) & Mask) << Shift)
1538 auto SetBits = [&Ctx](const MCExpr *Value, uint32_t Mask, uint32_t Shift) {
1539 const MCExpr *msk = MCConstantExpr::create(Mask, Ctx);
1540 const MCExpr *shft = MCConstantExpr::create(Shift, Ctx);
1542 shft, Ctx);
1543 };
1544
1545 auto EmitResolvedOrExpr = [this](const MCExpr *Value, unsigned Size) {
1546 int64_t Val;
1547 if (Value->evaluateAsAbsolute(Val))
1548 OutStreamer->emitIntValue(static_cast<uint64_t>(Val), Size);
1549 else
1550 OutStreamer->emitValue(Value, Size);
1551 };
1552
1553 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
1555
1556 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx),
1557 /*Size=*/4);
1558
1560 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx),
1561 /*Size=*/4);
1562
1564
1565 // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the
1566 // appropriate generation.
1567 if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {
1568 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1569 /*Mask=*/0x3FFFF, /*Shift=*/12),
1570 /*Size=*/4);
1571 } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {
1572 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1573 /*Mask=*/0x7FFF, /*Shift=*/12),
1574 /*Size=*/4);
1575 } else {
1576 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1577 /*Mask=*/0x1FFF, /*Shift=*/12),
1578 /*Size=*/4);
1579 }
1580
1581 // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
1582 // 0" comment but I don't see a corresponding field in the register spec.
1583 } else {
1584 OutStreamer->emitInt32(RsrcReg);
1585
1586 const MCExpr *GPRBlocks = MCBinaryExpr::createOr(
1587 SetBits(CurrentProgramInfo.VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0),
1588 SetBits(CurrentProgramInfo.SGPRBlocks, /*Mask=*/0x0F, /*Shift=*/6),
1589 MF.getContext());
1590 EmitResolvedOrExpr(GPRBlocks, /*Size=*/4);
1592
1593 // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the
1594 // appropriate generation.
1595 if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {
1596 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1597 /*Mask=*/0x3FFFF, /*Shift=*/12),
1598 /*Size=*/4);
1599 } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {
1600 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1601 /*Mask=*/0x7FFF, /*Shift=*/12),
1602 /*Size=*/4);
1603 } else {
1604 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1605 /*Mask=*/0x1FFF, /*Shift=*/12),
1606 /*Size=*/4);
1607 }
1608 }
1609
1610 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
1612 unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
1613 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1614 : CurrentProgramInfo.LDSBlocks;
1615 OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1617 OutStreamer->emitInt32(MFI->getPSInputEnable());
1619 OutStreamer->emitInt32(MFI->getPSInputAddr());
1620 }
1621
1622 OutStreamer->emitInt32(R_SPILLED_SGPRS);
1623 OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
1624 OutStreamer->emitInt32(R_SPILLED_VGPRS);
1625 OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
1626}
1627
1628// Helper function to add common PAL Metadata 3.0+
1630 const SIProgramInfo &CurrentProgramInfo,
1631 CallingConv::ID CC, const GCNSubtarget &ST,
1632 unsigned DynamicVGPRBlockSize) {
1633 if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
1634 MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
1635
1636 MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
1637 MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
1638 MD->setHwStage(CC, ".forward_progress", (bool)CurrentProgramInfo.FwdProgress);
1639
1640 if (AMDGPU::isCompute(CC)) {
1641 MD->setHwStage(CC, ".trap_present",
1642 (bool)CurrentProgramInfo.TrapHandlerEnable);
1643 MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1644
1645 if (DynamicVGPRBlockSize != 0)
1646 MD->setComputeRegisters(".dynamic_vgpr_en", true);
1647 }
1648
1650 CC, ".lds_size",
1651 (unsigned)(CurrentProgramInfo.LdsSize * getLdsDwGranularity(ST) *
1652 sizeof(uint32_t)));
1653}
1654
1655// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1656// is AMDPAL. It stores each compute/SPI register setting and other PAL
1657// metadata items into the PALMD::Metadata, combining with any provided by the
1658// frontend as LLVM metadata. Once all functions are written, the PAL metadata
1659// is then written as a single block in the .note section.
1660void AMDGPUAsmPrinter::EmitPALMetadata(
1661 const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) {
1662 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1663 auto CC = MF.getFunction().getCallingConv();
1664 auto *MD = getTargetStreamer()->getPALMetadata();
1665 auto &Ctx = MF.getContext();
1666
1667 MD->setEntryPoint(CC, MF.getFunction().getName());
1668 MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);
1669
1670 // For targets that support dynamic VGPRs, set the number of saved dynamic
1671 // VGPRs (if any) in the PAL metadata.
1672 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1673 if (MFI->isDynamicVGPREnabled() &&
1675 MD->setHwStage(CC, ".dynamic_vgpr_saved_count",
1677
1678 // Only set AGPRs for supported devices
1679 if (STM.hasMAIInsts()) {
1680 MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
1681 }
1682
1683 MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);
1684 if (MD->getPALMajorVersion() < 3) {
1685 MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);
1686 if (AMDGPU::isCompute(CC)) {
1687 MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx), Ctx);
1688 } else {
1689 const MCExpr *HasScratchBlocks =
1690 MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks,
1691 MCConstantExpr::create(0, Ctx), Ctx);
1692 auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN);
1693 MD->setRsrc2(CC, maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
1694 }
1695 } else {
1696 MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
1697 MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,
1698 CurrentProgramInfo.ScratchEnable);
1699 EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM,
1701 }
1702
1703 // ScratchSize is in bytes, 16 aligned.
1704 MD->setScratchSize(
1705 CC,
1706 AMDGPUMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize,
1707 MCConstantExpr::create(16, Ctx), Ctx),
1708 Ctx);
1709
1710 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
1711 unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
1712 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1713 : CurrentProgramInfo.LDSBlocks;
1714 if (MD->getPALMajorVersion() < 3) {
1715 MD->setRsrc2(
1716 CC,
1718 Ctx);
1719 MD->setSpiPsInputEna(MFI->getPSInputEnable());
1720 MD->setSpiPsInputAddr(MFI->getPSInputAddr());
1721 } else {
1722 // Graphics registers
1723 const unsigned ExtraLdsDwGranularity =
1724 STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 256 : 128;
1725 MD->setGraphicsRegisters(
1726 ".ps_extra_lds_size",
1727 (unsigned)(ExtraLDSSize * ExtraLdsDwGranularity * sizeof(uint32_t)));
1728
1729 // Set PsInputEna and PsInputAddr .spi_ps_input_ena and .spi_ps_input_addr
1730 static StringLiteral const PsInputFields[] = {
1731 ".persp_sample_ena", ".persp_center_ena",
1732 ".persp_centroid_ena", ".persp_pull_model_ena",
1733 ".linear_sample_ena", ".linear_center_ena",
1734 ".linear_centroid_ena", ".line_stipple_tex_ena",
1735 ".pos_x_float_ena", ".pos_y_float_ena",
1736 ".pos_z_float_ena", ".pos_w_float_ena",
1737 ".front_face_ena", ".ancillary_ena",
1738 ".sample_coverage_ena", ".pos_fixed_pt_ena"};
1739 unsigned PSInputEna = MFI->getPSInputEnable();
1740 unsigned PSInputAddr = MFI->getPSInputAddr();
1741 for (auto [Idx, Field] : enumerate(PsInputFields)) {
1742 MD->setGraphicsRegisters(".spi_ps_input_ena", Field,
1743 (bool)((PSInputEna >> Idx) & 1));
1744 MD->setGraphicsRegisters(".spi_ps_input_addr", Field,
1745 (bool)((PSInputAddr >> Idx) & 1));
1746 }
1747 }
1748 }
1749
1750 // For version 3 and above the wave front size is already set in the metadata
1751 if (MD->getPALMajorVersion() < 3 && STM.isWave32())
1752 MD->setWave32(MF.getFunction().getCallingConv());
1753}
1754
1755void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
1756 auto *MD = getTargetStreamer()->getPALMetadata();
1757 const MachineFrameInfo &MFI = MF.getFrameInfo();
1758 StringRef FnName = MF.getFunction().getName();
1759 MD->setFunctionScratchSize(FnName, MFI.getStackSize());
1760 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1761 MCContext &Ctx = MF.getContext();
1762
1763 if (MD->getPALMajorVersion() < 3) {
1764 // Set compute registers
1765 MD->setRsrc1(
1767 CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx);
1768 MD->setRsrc2(CallingConv::AMDGPU_CS,
1769 CurrentProgramInfo.getComputePGMRSrc2(ST, Ctx), Ctx);
1770 } else {
1772 MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST,
1773 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize());
1774 }
1775
1776 // Set optional info
1777 MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
1778 MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1779 MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1780}
1781
1782// This is supposed to be log2(Size)
1784 switch (Size) {
1785 case 4:
1786 return AMD_ELEMENT_4_BYTES;
1787 case 8:
1788 return AMD_ELEMENT_8_BYTES;
1789 case 16:
1790 return AMD_ELEMENT_16_BYTES;
1791 default:
1792 llvm_unreachable("invalid private_element_size");
1793 }
1794}
1795
1796void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
1797 const SIProgramInfo &CurrentProgramInfo,
1798 const MachineFunction &MF) const {
1799 const Function &F = MF.getFunction();
1800 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
1801 F.getCallingConv() == CallingConv::SPIR_KERNEL);
1802
1803 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1804 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1805 MCContext &Ctx = MF.getContext();
1806
1807 Out.initDefault(STM, Ctx, /*InitMCExpr=*/false);
1808
1810 CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
1812 CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx);
1814
1815 Out.is_dynamic_callstack = CurrentProgramInfo.DynamicCallStack;
1816
1818 getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
1819
1820 const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
1821 if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
1823 }
1824
1825 if (UserSGPRInfo.hasDispatchPtr())
1827
1828 if (UserSGPRInfo.hasQueuePtr())
1830
1831 if (UserSGPRInfo.hasKernargSegmentPtr())
1833
1834 if (UserSGPRInfo.hasDispatchID())
1836
1837 if (UserSGPRInfo.hasFlatScratchInit())
1839
1840 if (UserSGPRInfo.hasPrivateSegmentSize())
1842
1843 if (STM.isXNACKEnabled())
1845
1846 Align MaxKernArgAlign;
1847 Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1848 Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1849 Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1850 Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1851 Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1852
1853 // kernarg_segment_alignment is specified as log of the alignment.
1854 // The minimum alignment is 16.
1855 // FIXME: The metadata treats the minimum as 4?
1856 Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
1857}
1858
1860 const char *ExtraCode, raw_ostream &O) {
1861 // First try the generic code, which knows about modifiers like 'c' and 'n'.
1862 if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
1863 return false;
1864
1865 if (ExtraCode && ExtraCode[0]) {
1866 if (ExtraCode[1] != 0)
1867 return true; // Unknown modifier.
1868
1869 switch (ExtraCode[0]) {
1870 case 'r':
1871 break;
1872 default:
1873 return true;
1874 }
1875 }
1876
1877 // TODO: Should be able to support other operand types like globals.
1878 const MachineOperand &MO = MI->getOperand(OpNo);
1879 if (MO.isReg()) {
1881 *MF->getSubtarget().getRegisterInfo());
1882 return false;
1883 }
1884 if (MO.isImm()) {
1885 int64_t Val = MO.getImm();
1887 O << Val;
1888 } else if (isUInt<16>(Val)) {
1889 O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
1890 } else if (isUInt<32>(Val)) {
1891 O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
1892 } else {
1893 O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
1894 }
1895 return false;
1896 }
1897 return true;
1898}
1899
1907
1908void AMDGPUAsmPrinter::emitResourceUsageRemarks(
1909 const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
1910 bool isModuleEntryFunction, bool hasMAIInsts) {
1911 if (!ORE)
1912 return;
1913
1914 const char *Name = "kernel-resource-usage";
1915 const char *Indent = " ";
1916
1917 // If the remark is not specifically enabled, do not output to yaml
1919 if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
1920 return;
1921
1922 // Currently non-kernel functions have no resources to emit.
1924 return;
1925
1926 auto EmitResourceUsageRemark = [&](StringRef RemarkName,
1927 StringRef RemarkLabel, auto Argument) {
1928 // Add an indent for every line besides the line with the kernel name. This
1929 // makes it easier to tell which resource usage go with which kernel since
1930 // the kernel name will always be displayed first.
1931 std::string LabelStr = RemarkLabel.str() + ": ";
1932 if (RemarkName != "FunctionName")
1933 LabelStr = Indent + LabelStr;
1934
1935 ORE->emit([&]() {
1936 return MachineOptimizationRemarkAnalysis(Name, RemarkName,
1938 &MF.front())
1939 << LabelStr << ore::NV(RemarkName, Argument);
1940 });
1941 };
1942
1943 // FIXME: Formatting here is pretty nasty because clang does not accept
1944 // newlines from diagnostics. This forces us to emit multiple diagnostic
1945 // remarks to simulate newlines. If and when clang does accept newlines, this
1946 // formatting should be aggregated into one remark with newlines to avoid
1947 // printing multiple diagnostic location and diag opts.
1948 EmitResourceUsageRemark("FunctionName", "Function Name",
1949 MF.getFunction().getName());
1950 EmitResourceUsageRemark("NumSGPR", "TotalSGPRs",
1951 getMCExprStr(CurrentProgramInfo.NumSGPR));
1952 EmitResourceUsageRemark("NumVGPR", "VGPRs",
1953 getMCExprStr(CurrentProgramInfo.NumArchVGPR));
1954 if (hasMAIInsts) {
1955 EmitResourceUsageRemark("NumAGPR", "AGPRs",
1956 getMCExprStr(CurrentProgramInfo.NumAccVGPR));
1957 }
1958 EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
1959 getMCExprStr(CurrentProgramInfo.ScratchSize));
1960 int64_t DynStack;
1961 bool DynStackEvaluatable =
1962 CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack);
1963 StringRef DynamicStackStr =
1964 DynStackEvaluatable && DynStack ? "True" : "False";
1965 EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
1966 EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
1967 getMCExprStr(CurrentProgramInfo.Occupancy));
1968 EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
1969 CurrentProgramInfo.SGPRSpill);
1970 EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
1971 CurrentProgramInfo.VGPRSpill);
1972 if (isModuleEntryFunction)
1973 EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
1974 CurrentProgramInfo.LDSSize);
1975}
1976
1977char AMDGPUAsmPrinter::ID = 0;
1978
1979INITIALIZE_PASS(AMDGPUAsmPrinter, "amdgpu-asm-printer",
1980 "AMDGPU Assembly Printer", false, false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize)
const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, unsigned DynamicVGPRBlockSize, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
static unsigned getRsrcReg(CallingConv::ID CallConv)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
static const MCExpr * setBits(const MCExpr *Dst, const MCExpr *Value, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Set bits in a kernel descriptor MCExpr field: return ((Dst & ~Mask) | (Value << Shift))
static uint32_t getFPMode(SIModeRegisterDefaults Mode)
static std::string computeTypeId(const FunctionType *FTy, const DataLayout &DL)
static const MCExpr * computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx)
static void appendTypeEncoding(std::string &Enc, Type *Ty, const DataLayout &DL, bool IsReturnType)
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
AMDGPU Assembly printer class.
AMDGPU HSA Metadata Streamer.
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
MC infrastructure to propagate the function level resource usage info.
Analyzes how many registers and other resources are used by functions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
@ AMD_ELEMENT_8_BYTES
@ AMD_ELEMENT_16_BYTES
@ AMD_ELEMENT_4_BYTES
#define AMD_HSA_BITS_SET(dst, mask, val)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_IS_PTR64
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
OptimizedStructLayoutField Field
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
R600 Assembly printer class.
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition SIDefines.h:1144
#define R_0286E8_SPI_TMPRING_SIZE
Definition SIDefines.h:1286
#define FP_ROUND_MODE_DP(x)
Definition SIDefines.h:1268
#define C_00B84C_SCRATCH_EN
Definition SIDefines.h:1180
#define FP_ROUND_ROUND_TO_NEAREST
Definition SIDefines.h:1260
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition SIDefines.h:1219
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition SIDefines.h:1281
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition SIDefines.h:1167
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition SIDefines.h:1166
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition SIDefines.h:1175
#define R_0286CC_SPI_PS_INPUT_ENA
Definition SIDefines.h:1218
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition SIDefines.h:1153
#define FP_DENORM_MODE_DP(x)
Definition SIDefines.h:1279
#define R_00B848_COMPUTE_PGM_RSRC1
Definition SIDefines.h:1221
#define R_SPILLED_SGPRS
Definition SIDefines.h:1300
#define FP_ROUND_MODE_SP(x)
Definition SIDefines.h:1267
#define FP_DENORM_MODE_SP(x)
Definition SIDefines.h:1278
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition SIDefines.h:1158
#define R_SPILLED_VGPRS
Definition SIDefines.h:1301
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition SIDefines.h:1152
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition SIDefines.h:1177
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition SIDefines.h:1151
StringSet - A set-like wrapper for the StringMap.
static const int BlockSize
Definition TarWriter.cpp:33
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
const MCSubtargetInfo * getGlobalSTI() const
void emitImplicitDef(const MachineInstr *MI) const override
Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.
std::vector< std::string > DisasmLines
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
void endFunction(const MachineFunction *MF)
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
std::vector< std::string > HexLines
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
AMDGPUTargetStreamer * getTargetStreamer() const
static void printRegOperand(MCRegister Reg, raw_ostream &O, const MCRegisterInfo &MRI)
AMDGPU target specific MCExpr operations.
static const AMDGPUMCExpr * createInstPrefSize(const MCExpr *CodeSizeBytes, MCContext &Ctx)
Create an expression for instruction prefetch size computation: min(divideCeil(CodeSizeBytes,...
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
void setHwStage(unsigned CC, StringRef field, unsigned Val)
void updateHwStageMaximum(unsigned CC, StringRef field, unsigned Val)
void setComputeRegisters(StringRef field, unsigned Val)
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)
virtual void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data)
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
void initializeTargetID(const MCSubtargetInfo &STI)
virtual void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall)
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
virtual void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier)
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Collects and handles AsmPrinter objects required to build debug or EH information.
This class is intended to be used as a driving class for all asm writers.
Definition AsmPrinter.h:91
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
MCSymbol * getSymbol(const GlobalValue *GV) const
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:94
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:109
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
void emitFunctionBody()
This method emits the body and trailer for a function.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
Definition AsmPrinter.h:121
AsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer, char &ID=AsmPrinter::ID)
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:128
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition AsmPrinter.h:112
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:101
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:106
const MCAsmInfo & MAI
Target Asm Printer information.
Definition AsmPrinter.h:97
bool isVerbose() const
Return true if assembly output should contain comments.
Definition AsmPrinter.h:310
MCSymbol * getFunctionEnd() const
Definition AsmPrinter.h:320
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void addAsmPrinterHandler(std::unique_ptr< AsmPrinterHandler > Handler)
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool empty() const
Definition DenseMap.h:109
DISubprogram * getSubprogram() const
Get the attached subprogram.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool isTgSplitEnabled() const
bool hasInstPrefSize() const
bool isCuModeEnabled() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool isWave32() const
bool supportsWGP() const
void getInstPrefSizeArgs(uint32_t &Mask, uint32_t &Shift, uint32_t &Width, uint32_t &CacheLineSize) const
unsigned getMaxNumUserSGPRs() const
Generation getGeneration() const
unsigned getAddressableNumSGPRs() const
unsigned getMaxWaveScratchSize() const
bool hasPrivateSegmentBuffer() const
VisibilityTypes getVisibility() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
unsigned getAddressSpace() const
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:569
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MCCodeEmitter * getEmitterPtr() const
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:378
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:398
static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:363
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:413
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCObjectFileInfo * getObjectFileInfo() const
Definition MCContext.h:413
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
LLVM_ABI bool evaluateAsRelocatable(MCValue &Res, const MCAssembler *Asm) const
Try to evaluate the expression to a relocatable value, i.e.
Definition MCExpr.cpp:450
MCSection * getReadOnlySection() const
MCSection * getTextSection() const
MCContext & getContext() const
This represents a section on linux, lots of unix variants and some bare metal systems.
Instances of this class represent a uniqued identifier for a section in the current translation unit.
Definition MCSection.h:573
void ensureMinAlignment(Align MinAlignment)
Makes sure that Alignment is at least MinAlignment.
Definition MCSection.h:661
bool hasInstructions() const
Definition MCSection.h:669
MCContext & getContext() const
Definition MCStreamer.h:323
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition MCSymbol.h:233
StringRef getName() const
getName - Get the symbol name.
Definition MCSymbol.h:188
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition MCSymbol.h:212
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCStreamer & getStreamer()
Definition MCStreamer.h:103
static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:273
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
MCContext & getContext() const
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Diagnostic information for optimization analysis remarks.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVM_ABI unsigned getNumOperands() const
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
SIModeRegisterDefaults getMode() const
unsigned getScratchReservedForDynamicVGPRs() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
OSType getOS() const
Get the parsed operating system type of this triple.
Definition Triple.h:445
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
bool isGFX10Plus(const MCSubtargetInfo &STI)
constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)
Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ SHT_PROGBITS
Definition ELF.h:1150
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Target & getTheR600Target()
The target for R600 GPUs.
@ DK_ResourceLimit
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Target & getTheGCNTarget()
The target for GCN GPUs.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
#define N
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo FunctionResourceInfo
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Track resource usage for kernels / entry functions.
const MCExpr * NumSGPR
const MCExpr * NumArchVGPR
const MCExpr * VGPRBlocks
const MCExpr * ScratchBlocks
const MCExpr * ComputePGMRSrc3
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
const MCExpr * VCCUsed
const MCExpr * FlatUsed
const MCExpr * NamedBarCnt
const MCExpr * ScratchEnable
const MCExpr * AccumOffset
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * SGPRBlocks
const MCExpr * NumVGPRsForWavesPerEU
const MCExpr * NumVGPR
const MCExpr * Occupancy
const MCExpr * ScratchSize
const MCExpr * NumSGPRsForWavesPerEU
const MCExpr * getComputePGMRSrc2(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.