87 std::unique_ptr<MCStreamer> &&Streamer) {
102 AMDGPUAsmPrinter *Asm;
105 AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *
A) : Asm(
A) {}
107 void beginFunction(
const MachineFunction *MF)
override {}
109 void endFunction(
const MachineFunction *MF)
override { Asm->endFunction(MF); }
111 void endModule()
override {}
116 std::unique_ptr<MCStreamer> Streamer)
122 return "AMDGPU Assembly Printer";
126 return &
TM.getMCSubtargetInfo();
139void AMDGPUAsmPrinter::initTargetStreamer(
Module &M) {
145 initializeTargetID(M);
166 initTargetStreamer(M);
174 HSAMetadataStream->end();
189 STM.getCPU() +
" is only available on code object version 6 or better");
195 initializeTargetID(*
F.getParent());
197 const auto &FunctionTargetID = STM.getTargetID();
200 if (FunctionTargetID.isXnackSupported() &&
202 FunctionTargetID.getXnackSetting() !=
205 {},
"xnack setting of '" +
Twine(
MF->getName()) +
206 "' function does not match module xnack setting");
211 if (FunctionTargetID.isSramEccSupported() &&
213 FunctionTargetID.getSramEccSetting() !=
216 {},
"sramecc setting of '" +
Twine(
MF->getName()) +
217 "' function does not match module sramecc setting");
224 if (STM.isMesaKernel(
F) &&
228 getAmdKernelCode(KernelCode, CurrentProgramInfo, *
MF);
233 if (STM.isAmdHsaOS())
234 HSAMetadataStream->emitKernel(*
MF, CurrentProgramInfo);
260 getAmdhsaKernelDescriptor(*
MF, CurrentProgramInfo);
273 const MCExpr *InstPrefSize =
284 Streamer.pushSection();
285 Streamer.switchSection(&ReadOnlySection);
289 Streamer.emitValueToAlignment(
Align(64), 0, 1, 0);
295 STM, KernelName, KD, CurrentProgramInfo.NumVGPRsForWavesPerEU,
297 CurrentProgramInfo.NumSGPRsForWavesPerEU,
299 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
302 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
304 Streamer.popSection();
312 OS <<
"implicit-def: "
313 <<
printReg(RegNo,
MF->getSubtarget().getRegisterInfo());
316 OS <<
" : SGPR spill to VGPR lane";
336 if (DumpCodeInstEmitter) {
363 ": unsupported initializer for address space");
376 "LDS definitions should have been externalized when object "
377 "linking is enabled");
385 "' is already defined");
394 TS->emitAMDGPULDS(GVSym,
Size, Alignment);
405 switch (CodeObjectVersion) {
407 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV4>();
410 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV5>();
413 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV6>();
432 unsigned DynamicVGPRBlockSize,
445 auto CreateExpr = [&Ctx](
unsigned Value) {
455 {CreateExpr(MaxWaves), CreateExpr(Granule),
456 CreateExpr(TargetTotalNumVGPRs),
457 CreateExpr(InitOcc), CreateExpr(SGPRTotal),
458 CreateExpr(SGPRGranule),
459 CreateExpr(SGPRTrapReserve), SGPRArg, NumVGPRs},
463void AMDGPUAsmPrinter::validateMCResourceInfo(
Function &
F) {
468 const GCNSubtarget &STM =
TM.getSubtarget<GCNSubtarget>(
F);
471 auto TryGetMCExprValue = [](
const MCExpr *
Value, uint64_t &Res) ->
bool {
473 if (
Value->evaluateAsAbsolute(Val)) {
480 const uint64_t MaxScratchPerWorkitem =
483 RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize,
OutContext);
484 uint64_t ScratchSize;
487 ScratchSize > MaxScratchPerWorkitem) {
488 DiagnosticInfoStackSize DiagStackSize(
F, ScratchSize, MaxScratchPerWorkitem,
490 F.getContext().diagnose(DiagStackSize);
496 RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR,
OutContext);
498 !STM.hasSGPRInitBug()) {
503 NumSgpr > MaxAddressableNumSGPRs) {
504 F.getContext().diagnose(DiagnosticInfoResourceLimit(
505 F,
"addressable scalar registers", NumSgpr, MaxAddressableNumSGPRs,
512 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC,
OutContext);
514 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch,
OutContext);
515 uint64_t VCCUsed, FlatUsed, NumSgpr;
526 STM, VCCUsed, FlatUsed,
529 STM.hasSGPRInitBug()) {
531 if (NumSgpr > MaxAddressableNumSGPRs) {
532 F.getContext().diagnose(DiagnosticInfoResourceLimit(
533 F,
"scalar registers", NumSgpr, MaxAddressableNumSGPRs,
DS_Error,
540 RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR,
OutContext);
542 RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR,
OutContext);
543 uint64_t NumVgpr, NumAgpr;
545 MachineModuleInfo &
MMI =
547 MachineFunction *
MF =
MMI.getMachineFunction(
F);
551 const SIMachineFunctionInfo &MFI = *
MF->getInfo<SIMachineFunctionInfo>();
553 uint64_t TotalNumVgpr =
555 uint64_t NumVGPRsForWavesPerEU =
556 std::max({TotalNumVgpr, (uint64_t)1,
559 uint64_t NumSGPRsForWavesPerEU = std::max(
569 F,
"amdgpu-waves-per-eu", {0, 0},
true);
571 if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {
572 DiagnosticInfoOptimizationFailure Diag(
573 F,
F.getSubprogram(),
574 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
576 F.getName() +
"': desired occupancy was " + Twine(MinWEU) +
577 ", final occupancy is " + Twine(Occupancy));
578 F.getContext().diagnose(Diag);
587 if (Ty->isVoidTy()) {
591 unsigned Bits =
DL.getTypeSizeInBits(Ty);
612 for (
Type *ParamTy : FTy->params())
620 const SIInstrInfo *
TII =
MF->getSubtarget<GCNSubtarget>().getInstrInfo();
621 const MachineOperand *
Callee =
622 TII->getNamedOperand(
MI, AMDGPU::OpName::callee);
623 if (!Callee || !
Callee->isGlobal())
625 DirectCallEdges.insert(
629void AMDGPUAsmPrinter::emitAMDGPUInfo(
Module &M) {
633 const NamedMDNode *LDSMD =
M.getNamedMetadata(
"amdgpu.lds.uses");
636 const NamedMDNode *BarMD =
M.getNamedMetadata(
"amdgpu.named_barrier.uses");
640 DenseMap<const Function *, std::string> AddrTakenTypeIds;
641 using IndirectCallInfo = std::pair<const Function *, std::string>;
644 for (
const Function &
F : M) {
647 if (!IsKernel &&
F.hasAddressTaken(
nullptr,
651 AddrTakenTypeIds[&
F] =
655 if (
F.isDeclaration())
658 StringSet<> SeenTypeIds;
659 for (
const BasicBlock &BB :
F) {
660 for (
const Instruction &
I : BB) {
662 if (!CB || !CB->isIndirectCall())
666 if (SeenTypeIds.
insert(TId).second)
667 IndirectCalls.
push_back({&
F, std::move(TId)});
672 if (FunctionInfos.empty() && DirectCallEdges.empty() && !HasLDSUses &&
673 !HasNamedBarriers && AddrTakenTypeIds.
empty() && IndirectCalls.
empty())
676 AMDGPU::InfoSectionData
Data;
677 Data.Funcs = std::move(FunctionInfos);
679 for (
auto &[
F, TypeId] : AddrTakenTypeIds) {
681 Data.TypeIds.push_back({Sym, TypeId});
684 for (
auto &[CallerSym, CalleeSym] : DirectCallEdges)
685 Data.Calls.push_back({CallerSym, CalleeSym});
686 DirectCallEdges.clear();
689 for (
const MDNode *
N : LDSMD->
operands()) {
696 if (HasNamedBarriers) {
697 for (
const MDNode *
N : BarMD->
operands()) {
700 for (
unsigned I = 1,
E =
N->getNumOperands();
I <
E; ++
I) {
707 for (
auto &[Caller, Enc] : IndirectCalls) {
709 Data.IndirectCalls.push_back({CallerSym, Enc});
756 validateMCResourceInfo(
F);
775void AMDGPUAsmPrinter::emitCommonFunctionComments(
780 OutStreamer->emitRawComment(
" TotalNumSgprs: " + getMCExprStr(NumSGPR),
782 OutStreamer->emitRawComment(
" NumVgprs: " + getMCExprStr(NumVGPR),
false);
783 if (NumAGPR && TotalNumVGPR) {
784 OutStreamer->emitRawComment(
" NumAgprs: " + getMCExprStr(NumAGPR),
false);
785 OutStreamer->emitRawComment(
" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),
788 OutStreamer->emitRawComment(
" ScratchSize: " + getMCExprStr(ScratchSize),
794const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
796 const SIMachineFunctionInfo &MFI = *
MF.getInfo<SIMachineFunctionInfo>();
797 MCContext &Ctx =
MF.getContext();
798 uint16_t KernelCodeProperties = 0;
802 KernelCodeProperties |=
803 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
806 KernelCodeProperties |=
807 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
810 KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
813 KernelCodeProperties |=
814 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
817 KernelCodeProperties |=
818 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
821 KernelCodeProperties |=
822 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
825 KernelCodeProperties |=
826 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
828 if (
MF.getSubtarget<GCNSubtarget>().isWave32()) {
829 KernelCodeProperties |=
830 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
837 const MCExpr *KernelCodePropExpr =
840 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);
845 return KernelCodePropExpr;
851 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
853 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
854 MCContext &Ctx =
MF.getContext();
856 MCKernelDescriptor KernelDescriptor;
862 Align MaxKernArgAlign;
870 int64_t PGM_Rsrc3 = 1;
871 bool EvaluatableRsrc3 =
872 CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGM_Rsrc3);
874 (void)EvaluatableRsrc3;
876 STM.hasGFX90AInsts() || STM.hasGFX1250Insts() || !EvaluatableRsrc3 ||
877 static_cast<uint64_t
>(PGM_Rsrc3) == 0);
884 return KernelDescriptor;
891 initTargetStreamer(*
MF.getFunction().getParent());
895 CurrentProgramInfo.reset(
MF);
921 FunctionInfos.push_back(
933 getSIProgramInfo(CurrentProgramInfo,
MF);
938 EmitPALMetadata(
MF, CurrentProgramInfo);
940 emitPALFunctionMetadata(
MF);
942 EmitProgramInfoSI(
MF, CurrentProgramInfo);
945 DumpCodeInstEmitter =
nullptr;
946 if (STM.dumpCode()) {
969 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_NumNamedBarrier,
971 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
974 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
976 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
978 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_HasRecursion,
980 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
994 OutStreamer->emitRawComment(
" Function info:",
false);
996 emitCommonFunctionComments(
998 ->getVariableValue(),
999 STM.hasMAIInsts() ? RI.getSymbol(
CurrentFnSym->getName(),
1001 ->getVariableValue()
1003 RI.createTotalNumVGPRs(
MF, Ctx),
1004 RI.createTotalNumSGPRs(
1008 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
1010 ->getVariableValue(),
1011 CurrentProgramInfo.getFunctionCodeSize(
MF), MFI);
1015 OutStreamer->emitRawComment(
" Kernel info:",
false);
1016 emitCommonFunctionComments(
1017 CurrentProgramInfo.NumArchVGPR,
1018 STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR :
nullptr,
1019 CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,
1020 CurrentProgramInfo.ScratchSize,
1021 CurrentProgramInfo.getFunctionCodeSize(
MF), MFI);
1024 " FloatMode: " +
Twine(CurrentProgramInfo.FloatMode),
false);
1026 " IeeeMode: " +
Twine(CurrentProgramInfo.IEEEMode),
false);
1028 " LDSByteSize: " +
Twine(CurrentProgramInfo.LDSSize) +
1029 " bytes/workgroup (compile time only)",
1033 " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks),
false);
1036 " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks),
false);
1039 " NumSGPRsForWavesPerEU: " +
1040 getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU),
1043 " NumVGPRsForWavesPerEU: " +
1044 getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU),
1047 if (STM.hasGFX90AInsts()) {
1053 " AccumOffset: " + getMCExprStr(AdjustedAccum),
false);
1056 if (STM.hasGFX1250Insts())
1058 " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),
1062 " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy),
false);
1068 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
1069 getMCExprStr(CurrentProgramInfo.ScratchEnable),
1071 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:USER_SGPR: " +
1072 Twine(CurrentProgramInfo.UserSGPR),
1074 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
1075 Twine(CurrentProgramInfo.TrapHandlerEnable),
1077 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
1078 Twine(CurrentProgramInfo.TGIdXEnable),
1080 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
1081 Twine(CurrentProgramInfo.TGIdYEnable),
1083 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
1084 Twine(CurrentProgramInfo.TGIdZEnable),
1086 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
1087 Twine(CurrentProgramInfo.TIdIGCompCount),
1090 [[maybe_unused]] int64_t PGMRSrc3;
1092 STM.hasGFX90AInsts() || STM.hasGFX1250Insts() ||
1093 (CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGMRSrc3) &&
1094 static_cast<uint64_t>(PGMRSrc3) == 0));
1095 if (STM.hasGFX90AInsts()) {
1097 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
1099 CurrentProgramInfo.ComputePGMRSrc3,
1100 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
1101 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
1104 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
1106 CurrentProgramInfo.ComputePGMRSrc3,
1107 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
1108 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
1113 if (DumpCodeInstEmitter) {
1119 std::string Comment =
"\n";
1122 Comment +=
" ; " +
HexLines[i] +
"\n";
1147 const MCExpr *EncodedBlocks;
1150 NumVGPRs,
nullptr) &&
1151 NumVGPRs.isAbsolute()) {
1155 unsigned NumBlocks =
1160 {},
"DVGPR block count " +
Twine(NumBlocks) +
1161 " exceeds maximum of " +
1163 " for __dvgpr$ symbol for '" +
1167 unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;
1175 {CurrentProgramInfo.NumVGPRsForWavesPerEU, One}, Ctx);
1178 BlockSizeConst, Ctx);
1189 OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
1196void AMDGPUAsmPrinter::initializeTargetID(
const Module &M) {
1210 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
1211 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
1214 const GCNSubtarget &STM =
TM.getSubtarget<GCNSubtarget>(
F);
1215 const IsaInfo::AMDGPUTargetID &STMTargetID = STM.
getTargetID();
1216 if (TSTargetID->isXnackSupported())
1217 if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
1219 if (TSTargetID->isSramEccSupported())
1220 if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
1221 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
1232 const MCExpr *MaximumTaken =
1243void AMDGPUAsmPrinter::getSIProgramInfo(
SIProgramInfo &ProgInfo,
1245 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1246 MCContext &Ctx =
MF.getContext();
1248 auto CreateExpr = [&Ctx](int64_t
Value) {
1252 auto TryGetMCExprValue = [](
const MCExpr *
Value, uint64_t &Res) ->
bool {
1254 if (
Value->evaluateAsAbsolute(Val)) {
1261 auto GetSymRefExpr =
1268 ProgInfo.
NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);
1269 ProgInfo.
NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);
1275 ProgInfo.
NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);
1276 ProgInfo.
ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);
1277 ProgInfo.
VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);
1278 ProgInfo.
FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);
1281 GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
1285 GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx);
1288 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1299 !STM.hasSGPRInitBug()) {
1302 if (TryGetMCExprValue(ProgInfo.
NumSGPR, NumSgpr) &&
1303 NumSgpr > MaxAddressableNumSGPRs) {
1305 LLVMContext &Ctx =
MF.getFunction().getContext();
1306 Ctx.
diagnose(DiagnosticInfoResourceLimit(
1307 MF.getFunction(),
"addressable scalar registers", NumSgpr,
1309 ProgInfo.
NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);
1323 if (WaveDispatchNumSGPR) {
1331 if (WaveDispatchNumVGPR) {
1333 {ProgInfo.
NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);
1353 STM.hasSGPRInitBug()) {
1356 if (TryGetMCExprValue(ProgInfo.
NumSGPR, NumSgpr) &&
1357 NumSgpr > MaxAddressableNumSGPRs) {
1360 LLVMContext &Ctx =
MF.getFunction().getContext();
1361 Ctx.
diagnose(DiagnosticInfoResourceLimit(
1362 MF.getFunction(),
"scalar registers", NumSgpr, MaxAddressableNumSGPRs,
1364 ProgInfo.
NumSGPR = CreateExpr(MaxAddressableNumSGPRs);
1369 if (STM.hasSGPRInitBug()) {
1377 LLVMContext &Ctx =
MF.getFunction().getContext();
1378 Ctx.
diagnose(DiagnosticInfoResourceLimit(
1384 LLVMContext &Ctx =
MF.getFunction().getContext();
1385 Ctx.
diagnose(DiagnosticInfoResourceLimit(
1391 auto GetNumGPRBlocks = [&CreateExpr, &Ctx](
const MCExpr *NumGPR,
1393 const MCExpr *OneConst = CreateExpr(1ul);
1394 const MCExpr *GranuleConst = CreateExpr(Granule);
1396 const MCExpr *AlignToGPR =
1398 const MCExpr *DivGPR =
1413 const SIModeRegisterDefaults
Mode = MFI->
getMode();
1424 unsigned LDSAlignShift = 8;
1445 alignTo(ProgInfo.
LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1448 auto DivideCeil = [&Ctx](
const MCExpr *Numerator,
const MCExpr *Denominator) {
1449 const MCExpr *Ceil =
1455 unsigned ScratchAlignShift =
1463 CreateExpr(1ULL << ScratchAlignShift));
1471 ProgInfo.
FwdProgress = !
F.hasFnAttribute(
"amdgpu-no-fwd-progress");
1475 unsigned TIDIGCompCnt = 0;
1504 if (STM.hasGFX90AInsts()) {
1507 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1508 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, Ctx);
1511 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1512 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, Ctx);
1515 if (STM.hasGFX1250Insts())
1518 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
1519 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT, Ctx);
1526 const auto [MinWEU, MaxWEU] =
1529 if (TryGetMCExprValue(ProgInfo.
Occupancy, Occupancy) && Occupancy < MinWEU) {
1530 DiagnosticInfoOptimizationFailure Diag(
1531 F,
F.getSubprogram(),
1532 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
1534 F.getName() +
"': desired occupancy was " + Twine(MinWEU) +
1535 ", final occupancy is " + Twine(Occupancy));
1536 F.getContext().diagnose(Diag);
1561void AMDGPUAsmPrinter::EmitProgramInfoSI(
1563 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1564 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1565 unsigned RsrcReg =
getRsrcReg(
MF.getFunction().getCallingConv());
1566 MCContext &Ctx =
MF.getContext();
1569 auto SetBits = [&Ctx](
const MCExpr *
Value, uint32_t
Mask, uint32_t Shift) {
1576 auto EmitResolvedOrExpr = [
this](
const MCExpr *
Value,
unsigned Size) {
1578 if (
Value->evaluateAsAbsolute(Val))
1587 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx),
1591 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx),
1599 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1603 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1607 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1618 SetBits(CurrentProgramInfo.VGPRBlocks, 0x3F, 0),
1619 SetBits(CurrentProgramInfo.SGPRBlocks, 0x0F, 6),
1621 EmitResolvedOrExpr(GPRBlocks, 4);
1627 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1631 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1635 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1644 ?
divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1645 : CurrentProgramInfo.LDSBlocks;
1663 unsigned DynamicVGPRBlockSize) {
1664 if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
1676 if (DynamicVGPRBlockSize != 0)
1691void AMDGPUAsmPrinter::EmitPALMetadata(
1693 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1694 auto CC =
MF.getFunction().getCallingConv();
1696 auto &Ctx =
MF.getContext();
1698 MD->setEntryPoint(CC,
MF.getFunction().getName());
1699 MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);
1703 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1706 MD->setHwStage(CC,
".dynamic_vgpr_saved_count",
1710 if (STM.hasMAIInsts()) {
1711 MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
1714 MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);
1715 if (MD->getPALMajorVersion() < 3) {
1716 MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);
1718 MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx), Ctx);
1720 const MCExpr *HasScratchBlocks =
1724 MD->setRsrc2(CC,
maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
1727 MD->setHwStage(CC,
".debug_mode", (
bool)CurrentProgramInfo.DebugMode);
1729 CurrentProgramInfo.ScratchEnable);
1743 ?
divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1744 : CurrentProgramInfo.LDSBlocks;
1745 if (MD->getPALMajorVersion() < 3) {
1754 const unsigned ExtraLdsDwGranularity =
1756 MD->setGraphicsRegisters(
1757 ".ps_extra_lds_size",
1758 (
unsigned)(ExtraLDSSize * ExtraLdsDwGranularity *
sizeof(uint32_t)));
1761 static StringLiteral
const PsInputFields[] = {
1762 ".persp_sample_ena",
".persp_center_ena",
1763 ".persp_centroid_ena",
".persp_pull_model_ena",
1764 ".linear_sample_ena",
".linear_center_ena",
1765 ".linear_centroid_ena",
".line_stipple_tex_ena",
1766 ".pos_x_float_ena",
".pos_y_float_ena",
1767 ".pos_z_float_ena",
".pos_w_float_ena",
1768 ".front_face_ena",
".ancillary_ena",
1769 ".sample_coverage_ena",
".pos_fixed_pt_ena"};
1773 MD->setGraphicsRegisters(
".spi_ps_input_ena",
Field,
1774 (
bool)((PSInputEna >> Idx) & 1));
1775 MD->setGraphicsRegisters(
".spi_ps_input_addr",
Field,
1776 (
bool)((PSInputAddr >> Idx) & 1));
1782 if (MD->getPALMajorVersion() < 3 && STM.
isWave32())
1783 MD->setWave32(
MF.getFunction().getCallingConv());
1786void AMDGPUAsmPrinter::emitPALFunctionMetadata(
const MachineFunction &MF) {
1788 const MachineFrameInfo &MFI =
MF.getFrameInfo();
1789 StringRef FnName =
MF.getFunction().getName();
1790 MD->setFunctionScratchSize(FnName, MFI.
getStackSize());
1791 const GCNSubtarget &
ST =
MF.getSubtarget<GCNSubtarget>();
1792 MCContext &Ctx =
MF.getContext();
1794 if (MD->getPALMajorVersion() < 3) {
1800 CurrentProgramInfo.getComputePGMRSrc2(ST, Ctx), Ctx);
1804 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize());
1808 MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
1809 MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1810 MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1827void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
1834 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1835 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1836 MCContext &Ctx =
MF.getContext();
1841 CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
1843 CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx);
1874 if (STM.isXNACKEnabled())
1877 Align MaxKernArgAlign;
1896 if (ExtraCode && ExtraCode[0]) {
1897 if (ExtraCode[1] != 0)
1900 switch (ExtraCode[0]) {
1912 *
MF->getSubtarget().getRegisterInfo());
1916 int64_t Val = MO.
getImm();
1939void AMDGPUAsmPrinter::emitResourceUsageRemarks(
1945 const char *Name =
"kernel-resource-usage";
1946 const char *Indent =
" ";
1950 if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
1957 auto EmitResourceUsageRemark = [&](
StringRef RemarkName,
1962 std::string LabelStr = RemarkLabel.str() +
": ";
1963 if (RemarkName !=
"FunctionName")
1964 LabelStr = Indent + LabelStr;
1979 EmitResourceUsageRemark(
"FunctionName",
"Function Name",
1980 MF.getFunction().getName());
1981 EmitResourceUsageRemark(
"NumSGPR",
"TotalSGPRs",
1982 getMCExprStr(CurrentProgramInfo.NumSGPR));
1983 EmitResourceUsageRemark(
"NumVGPR",
"VGPRs",
1984 getMCExprStr(CurrentProgramInfo.NumArchVGPR));
1986 EmitResourceUsageRemark(
"NumAGPR",
"AGPRs",
1987 getMCExprStr(CurrentProgramInfo.NumAccVGPR));
1989 EmitResourceUsageRemark(
"ScratchSize",
"ScratchSize [bytes/lane]",
1990 getMCExprStr(CurrentProgramInfo.ScratchSize));
1992 bool DynStackEvaluatable =
1993 CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack);
1994 StringRef DynamicStackStr =
1995 DynStackEvaluatable && DynStack ?
"True" :
"False";
1996 EmitResourceUsageRemark(
"DynamicStack",
"Dynamic Stack", DynamicStackStr);
1997 EmitResourceUsageRemark(
"Occupancy",
"Occupancy [waves/SIMD]",
1998 getMCExprStr(CurrentProgramInfo.Occupancy));
1999 EmitResourceUsageRemark(
"SGPRSpill",
"SGPRs Spill",
2000 CurrentProgramInfo.SGPRSpill);
2001 EmitResourceUsageRemark(
"VGPRSpill",
"VGPRs Spill",
2002 CurrentProgramInfo.VGPRSpill);
2003 if (isModuleEntryFunction)
2004 EmitResourceUsageRemark(
"BytesLDS",
"LDS Size [bytes/block]",
2005 CurrentProgramInfo.LDSSize);
2011 "AMDGPU Assembly Printer",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize)
const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, unsigned DynamicVGPRBlockSize, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
static unsigned getRsrcReg(CallingConv::ID CallConv)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
static const MCExpr * setBits(const MCExpr *Dst, const MCExpr *Value, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Set bits in a kernel descriptor MCExpr field: return ((Dst & ~Mask) | (Value << Shift))
static uint32_t getFPMode(SIModeRegisterDefaults Mode)
static std::string computeTypeId(const FunctionType *FTy, const DataLayout &DL)
static const MCExpr * computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx)
static void appendTypeEncoding(std::string &Enc, Type *Ty, const DataLayout &DL, bool IsReturnType)
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
AMDGPU Assembly printer class.
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
MC infrastructure to propagate the function level resource usage info.
Analyzes how many registers and other resources are used by functions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
#define AMD_HSA_BITS_SET(dst, mask, val)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_IS_PTR64
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
OptimizedStructLayoutField Field
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
R600 Assembly printer class.
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_0286E8_SPI_TMPRING_SIZE
#define FP_ROUND_MODE_DP(x)
#define C_00B84C_SCRATCH_EN
#define FP_ROUND_ROUND_TO_NEAREST
#define R_0286D0_SPI_PS_INPUT_ADDR
#define R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_0286CC_SPI_PS_INPUT_ENA
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define FP_DENORM_MODE_DP(x)
#define R_00B848_COMPUTE_PGM_RSRC1
#define FP_ROUND_MODE_SP(x)
#define FP_DENORM_MODE_SP(x)
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define S_00B02C_EXTRA_LDS_SIZE(x)
#define R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
StringSet - A set-like wrapper for the StringMap.
static const int BlockSize
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
const MCSubtargetInfo * getGlobalSTI() const
void emitImplicitDef(const MachineInstr *MI) const override
Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.
std::vector< std::string > DisasmLines
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
void endFunction(const MachineFunction *MF)
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
std::vector< std::string > HexLines
bool IsTargetStreamerInitialized
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
AMDGPUTargetStreamer * getTargetStreamer() const
static void printRegOperand(MCRegister Reg, raw_ostream &O, const MCRegisterInfo &MRI)
AMDGPU target specific MCExpr operations.
static const AMDGPUMCExpr * createInstPrefSize(const MCExpr *CodeSizeBytes, MCContext &Ctx)
Create an expression for instruction prefetch size computation: min(divideCeil(CodeSizeBytes,...
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
bool isMemoryBound() const
bool isModuleEntryFunction() const
bool needsWaveLimiter() const
uint32_t getLDSSize() const
bool isEntryFunction() const
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
static bool EnableObjectLinking
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)
virtual void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data)
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitISAVersion()
virtual void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall)
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString)
virtual void EmitDirectiveAMDGCNTarget()
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
virtual void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier)
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
bool isXnackOnOrAny() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
Collects and handles AsmPrinter objects required to build debug or EH information.
This class is intended to be used as a driving class for all asm writers.
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
MCSymbol * getSymbol(const GlobalValue *GV) const
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
TargetMachine & TM
Target machine description.
MachineFunction * MF
The current machine function.
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
void emitFunctionBody()
This method emits the body and trailer for a function.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
AsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer, char &ID=AsmPrinter::ID)
MCSymbol * CurrentFnSym
The symbol for the current function.
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
MCContext & OutContext
This is the context for the output file that we are streaming.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
const MCAsmInfo & MAI
Target Asm Printer information.
bool isVerbose() const
Return true if assembly output should contain comments.
MCSymbol * getFunctionEnd() const
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void addAsmPrinterHandler(std::unique_ptr< AsmPrinterHandler > Handler)
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
A parsed version of the target data layout string in and methods for querying it.
DISubprogram * getSubprogram() const
Get the attached subprogram.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool isTgSplitEnabled() const
bool hasInstPrefSize() const
bool isCuModeEnabled() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
void getInstPrefSizeArgs(uint32_t &Mask, uint32_t &Shift, uint32_t &Width, uint32_t &CacheLineSize) const
unsigned getMaxNumUserSGPRs() const
Generation getGeneration() const
unsigned getAddressableNumSGPRs() const
unsigned getMaxWaveScratchSize() const
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
VisibilityTypes getVisibility() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
unsigned getAddressSpace() const
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MCCodeEmitter * getEmitterPtr() const
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCObjectFileInfo * getObjectFileInfo() const
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
LLVM_ABI bool evaluateAsRelocatable(MCValue &Res, const MCAssembler *Asm) const
Try to evaluate the expression to a relocatable value, i.e.
MCSection * getReadOnlySection() const
MCSection * getTextSection() const
MCContext & getContext() const
This represents a section on linux, lots of unix variants and some bare metal systems.
Instances of this class represent a uniqued identifier for a section in the current translation unit.
void ensureMinAlignment(Align MinAlignment)
Makes sure that Alignment is at least MinAlignment.
bool hasInstructions() const
MCContext & getContext() const
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
StringRef getName() const
getName - Get the symbol name.
bool isVariable() const
isVariable - Check if this is a variable symbol.
void redefineIfPossible()
Prepare this symbol to be redefined.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCStreamer & getStreamer()
static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
MCContext & getContext() const
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI unsigned getNumOperands() const
iterator_range< op_iterator > operands()
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getNumWaveDispatchVGPRs() const
unsigned getNumSpilledVGPRs() const
unsigned getNumWaveDispatchSGPRs() const
unsigned getNumSpilledSGPRs() const
GCNUserSGPRUsageInfo & getUserSGPRInfo()
unsigned getDynamicVGPRBlockSize() const
unsigned getMaxWavesPerEU() const
bool hasWorkGroupIDZ() const
bool hasWorkGroupIDY() const
SIModeRegisterDefaults getMode() const
bool hasWorkGroupInfo() const
bool hasWorkItemIDY() const
bool hasWorkGroupIDX() const
unsigned getNumUserSGPRs() const
unsigned getScratchReservedForDynamicVGPRs() const
bool isDynamicVGPREnabled() const
unsigned getPSInputAddr() const
bool hasWorkItemIDZ() const
unsigned getPSInputEnable() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
std::pair< typename Base::iterator, bool > insert(StringRef key)
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
OSType getOS() const
Get the parsed operating system type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getSGPRAllocGranule(const MCSubtargetInfo &STI)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
bool isSGPROccupancyLimited(const MCSubtargetInfo &STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
static constexpr unsigned MaxDynamicVGPRBlocks
Maximum number of VGPR blocks that can be allocated in dynamic VGPR mode.
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
bool isGFX10Plus(const MCSubtargetInfo &STI)
constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)
Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Target & getTheR600Target()
The target for R600 GPUs.
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
@ Success
The lock was released successfully.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Target & getTheGCNTarget()
The target for GCN GPUs.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Implement std::hash so that hash_code can be used in STL containers.
bool HasDynamicallySizedStack
uint64_t PrivateSegmentSize
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo FunctionResourceInfo
uint64_t kernarg_segment_byte_size
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
const MCExpr * workitem_private_segment_byte_size
const MCExpr * compute_pgm_resource2_registers
uint8_t kernarg_segment_alignment
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * wavefront_sgpr_count
const MCExpr * workitem_vgpr_count
const MCExpr * is_dynamic_callstack
uint32_t workgroup_group_segment_byte_size
const MCExpr * compute_pgm_resource1_registers
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
Track resource usage for kernels / entry functions.
const MCExpr * NumArchVGPR
const MCExpr * VGPRBlocks
const MCExpr * ScratchBlocks
const MCExpr * ComputePGMRSrc3
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
uint32_t TrapHandlerEnable
const MCExpr * NamedBarCnt
const MCExpr * ScratchEnable
const MCExpr * AccumOffset
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * SGPRBlocks
const MCExpr * NumVGPRsForWavesPerEU
const MCExpr * ScratchSize
const MCExpr * NumSGPRsForWavesPerEU
const MCExpr * getComputePGMRSrc2(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.