Go to the documentation of this file.
46 "amdgpu-assume-external-call-stack-size",
47 cl::desc(
"Assumed stack use of any external call (in bytes)"),
52 "amdgpu-assume-dynamic-stack-object-size",
53 cl::desc(
"Assumed extra stack use if there are any "
54 "variable sized objects (in bytes)"),
87 std::unique_ptr<MCStreamer> &&Streamer) {
99 std::unique_ptr<MCStreamer> Streamer)
113 return "AMDGPU Assembly Printer";
130 initializeTargetID(
M);
170 HSAMetadataStream->end();
199 initializeTargetID(*
F.getParent());
201 const auto &FunctionTargetID = STM.getTargetID();
204 if (FunctionTargetID.isXnackSupported() &&
208 "' function does not match module xnack setting");
213 if (FunctionTargetID.isSramEccSupported() &&
217 "' function does not match module sramecc setting");
228 getAmdKernelCode(KernelCode, CurrentProgramInfo, *
MF);
232 if (STM.isAmdHsaOS())
233 HSAMetadataStream->emitKernel(*
MF, CurrentProgramInfo);
246 auto &
Context = Streamer.getContext();
247 auto &ObjectFileInfo = *
Context.getObjectFileInfo();
248 auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
250 Streamer.PushSection();
251 Streamer.SwitchSection(&ReadOnlySection);
255 Streamer.emitValueToAlignment(64, 0, 1, 0);
256 if (ReadOnlySection.getAlignment() < 64)
257 ReadOnlySection.setAlignment(
Align(64));
264 STM, KernelName, getAmdhsaKernelDescriptor(*
MF, CurrentProgramInfo),
272 Streamer.PopSection();
290 if (DumpCodeInstEmitter) {
317 ": unsupported initializer for address space");
331 "' is already defined");
340 TS->emitAMDGPULDS(GVSym,
Size, Alignment);
348 CallGraphResourceInfo.clear();
365 void AMDGPUAsmPrinter::emitCommonFunctionComments(
370 uint64_t ScratchSize,
386 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
392 KernelCodeProperties |=
393 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
396 KernelCodeProperties |=
397 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
400 KernelCodeProperties |=
401 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
404 KernelCodeProperties |=
405 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
408 KernelCodeProperties |=
409 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
412 KernelCodeProperties |=
413 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
416 KernelCodeProperties |=
417 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
420 return KernelCodeProperties;
430 memset(&KernelDescriptor, 0x0,
sizeof(KernelDescriptor));
439 Align MaxKernArgAlign;
451 return KernelDescriptor;
475 getSIProgramInfo(CurrentProgramInfo,
MF);
477 auto I = CallGraphResourceInfo.insert(
478 std::make_pair(&
MF.
getFunction(), SIFunctionResourceInfo()));
479 SIFunctionResourceInfo &
Info =
I.first->second;
480 assert(
I.second &&
"should only be called once per function");
481 Info = analyzeResourceUsage(
MF);
486 EmitPALMetadata(
MF, CurrentProgramInfo);
488 emitPALFunctionMetadata(
MF);
490 EmitProgramInfoSI(
MF, CurrentProgramInfo);
493 DumpCodeInstEmitter =
nullptr;
497 bool SaveFlag =
OutStreamer->getUseAssemblerInfoForParsing();
500 OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
517 OutStreamer->emitRawComment(
" Function info:",
false);
519 emitCommonFunctionComments(
522 Info.getTotalNumVGPRs(STM),
524 Info.PrivateSegmentSize,
525 getFunctionCodeSize(
MF), MFI);
529 OutStreamer->emitRawComment(
" Kernel info:",
false);
530 emitCommonFunctionComments(CurrentProgramInfo.
NumArchVGPR,
537 getFunctionCodeSize(
MF), MFI);
545 " bytes/workgroup (compile time only)",
false);
553 " NumSGPRsForWavesPerEU: " +
556 " NumVGPRsForWavesPerEU: " +
572 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
575 " COMPUTE_PGM_RSRC2:USER_SGPR: " +
578 " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
581 " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
584 " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
587 " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
590 " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
598 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
600 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
603 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
605 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
610 if (DumpCodeInstEmitter) {
616 std::string Comment =
"\n";
631 void AMDGPUAsmPrinter::initializeTargetID(
const Module &M) {
645 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
646 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
651 if (TSTargetID->isXnackSupported())
654 if (TSTargetID->isSramEccSupported())
656 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
660 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(
const MachineFunction &MF)
const {
664 uint64_t CodeSize = 0;
671 if (
MI.isDebugInstr())
674 CodeSize +=
TII->getInstSizeInBytes(
MI);
685 if (!UseOp.isImplicit() || !
TII.isFLAT(*UseOp.getParent()))
695 &
ST, UsesVCC, UsesFlatScratch,
ST.getTargetID().isXnackOnOrAny());
700 if (
ST.hasGFX90AInsts() && NumAGPR)
701 return alignTo(NumVGPR, 4) + NumAGPR;
711 return cast<Function>(
Op.getGlobal());
714 AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
716 SIFunctionResourceInfo
Info;
738 Info.UsesFlatScratch =
false;
741 Info.PrivateSegmentSize = FrameInfo.getStackSize();
744 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
745 if (
Info.HasDynamicallySizedStack)
749 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
757 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
758 MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
761 HighestVGPRReg =
Reg;
766 if (
ST.hasMAIInsts()) {
767 MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
770 HighestAGPRReg =
Reg;
774 Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
775 TRI.getHWRegIndex(HighestAGPRReg) + 1;
778 MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
781 HighestSGPRReg =
Reg;
788 Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
789 TRI.getHWRegIndex(HighestVGPRReg) + 1;
790 Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
791 TRI.getHWRegIndex(HighestSGPRReg) + 1;
796 int32_t MaxVGPR = -1;
797 int32_t MaxAGPR = -1;
798 int32_t MaxSGPR = -1;
799 uint64_t CalleeFrameSize = 0;
815 case AMDGPU::EXEC_LO:
816 case AMDGPU::EXEC_HI:
819 case AMDGPU::SRC_SHARED_BASE:
820 case AMDGPU::SRC_SHARED_LIMIT:
821 case AMDGPU::SRC_PRIVATE_BASE:
822 case AMDGPU::SRC_PRIVATE_LIMIT:
823 case AMDGPU::SGPR_NULL:
827 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
830 case AMDGPU::NoRegister:
831 assert(
MI.isDebugInstr() &&
"Instruction uses invalid noreg register");
837 case AMDGPU::VCC_LO_LO16:
838 case AMDGPU::VCC_LO_HI16:
839 case AMDGPU::VCC_HI_LO16:
840 case AMDGPU::VCC_HI_HI16:
844 case AMDGPU::FLAT_SCR:
845 case AMDGPU::FLAT_SCR_LO:
846 case AMDGPU::FLAT_SCR_HI:
849 case AMDGPU::XNACK_MASK:
850 case AMDGPU::XNACK_MASK_LO:
851 case AMDGPU::XNACK_MASK_HI:
854 case AMDGPU::LDS_DIRECT:
865 case AMDGPU::SRC_VCCZ:
868 case AMDGPU::SRC_EXECZ:
871 case AMDGPU::SRC_SCC:
882 "trap handler registers should not be used");
885 }
else if (AMDGPU::VGPR_32RegClass.
contains(
Reg) ||
890 }
else if (AMDGPU::AGPR_32RegClass.
contains(
Reg) ||
895 }
else if (AMDGPU::SReg_64RegClass.
contains(
Reg)) {
897 "trap handler registers should not be used");
900 }
else if (AMDGPU::VReg_64RegClass.
contains(
Reg)) {
903 }
else if (AMDGPU::AReg_64RegClass.
contains(
Reg)) {
907 }
else if (AMDGPU::VReg_96RegClass.
contains(
Reg)) {
910 }
else if (AMDGPU::SReg_96RegClass.
contains(
Reg)) {
913 }
else if (AMDGPU::AReg_96RegClass.
contains(
Reg)) {
917 }
else if (AMDGPU::SReg_128RegClass.
contains(
Reg)) {
919 "trap handler registers should not be used");
922 }
else if (AMDGPU::VReg_128RegClass.
contains(
Reg)) {
925 }
else if (AMDGPU::AReg_128RegClass.
contains(
Reg)) {
929 }
else if (AMDGPU::VReg_160RegClass.
contains(
Reg)) {
932 }
else if (AMDGPU::SReg_160RegClass.
contains(
Reg)) {
935 }
else if (AMDGPU::AReg_160RegClass.
contains(
Reg)) {
939 }
else if (AMDGPU::VReg_192RegClass.
contains(
Reg)) {
942 }
else if (AMDGPU::SReg_192RegClass.
contains(
Reg)) {
945 }
else if (AMDGPU::AReg_192RegClass.
contains(
Reg)) {
949 }
else if (AMDGPU::SReg_256RegClass.
contains(
Reg)) {
951 "trap handler registers should not be used");
954 }
else if (AMDGPU::VReg_256RegClass.
contains(
Reg)) {
957 }
else if (AMDGPU::AReg_256RegClass.
contains(
Reg)) {
961 }
else if (AMDGPU::SReg_512RegClass.
contains(
Reg)) {
963 "trap handler registers should not be used");
966 }
else if (AMDGPU::VReg_512RegClass.
contains(
Reg)) {
969 }
else if (AMDGPU::AReg_512RegClass.
contains(
Reg)) {
973 }
else if (AMDGPU::SReg_1024RegClass.
contains(
Reg)) {
976 }
else if (AMDGPU::VReg_1024RegClass.
contains(
Reg)) {
979 }
else if (AMDGPU::AReg_1024RegClass.
contains(
Reg)) {
986 unsigned HWReg =
TRI.getHWRegIndex(
Reg);
987 int MaxUsed = HWReg +
Width - 1;
989 MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
991 MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
993 MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
1006 CallGraphResourceInfo.end();
1009 I = CallGraphResourceInfo.find(Callee);
1011 if (IsExternal ||
I == CallGraphResourceInfo.end()) {
1027 MaxSGPR =
std::max(MaxSGPR, MaxSGPRGuess);
1031 CalleeFrameSize =
std::max(CalleeFrameSize,
1034 Info.UsesVCC =
true;
1035 Info.UsesFlatScratch =
ST.hasFlatAddressSpace();
1036 Info.HasDynamicallySizedStack =
true;
1041 MaxSGPR =
std::max(
I->second.NumExplicitSGPR - 1, MaxSGPR);
1042 MaxVGPR =
std::max(
I->second.NumVGPR - 1, MaxVGPR);
1043 MaxAGPR =
std::max(
I->second.NumAGPR - 1, MaxAGPR);
1045 =
std::max(
I->second.PrivateSegmentSize, CalleeFrameSize);
1046 Info.UsesVCC |=
I->second.UsesVCC;
1047 Info.UsesFlatScratch |=
I->second.UsesFlatScratch;
1048 Info.HasDynamicallySizedStack |=
I->second.HasDynamicallySizedStack;
1049 Info.HasRecursion |=
I->second.HasRecursion;
1053 if (!Callee || !
Callee->doesNotRecurse())
1054 Info.HasRecursion =
true;
1059 Info.NumExplicitSGPR = MaxSGPR + 1;
1060 Info.NumVGPR = MaxVGPR + 1;
1061 Info.NumAGPR = MaxAGPR + 1;
1062 Info.PrivateSegmentSize += CalleeFrameSize;
1067 void AMDGPUAsmPrinter::getSIProgramInfo(
SIProgramInfo &ProgInfo,
1069 SIFunctionResourceInfo
Info = analyzeResourceUsage(
MF);
1083 const uint64_t MaxScratchPerWorkitem =
1085 if (ProgInfo.
ScratchSize > MaxScratchPerWorkitem) {
1103 if (ProgInfo.
NumSGPR > MaxAddressableNumSGPRs) {
1107 "addressable scalar registers",
1110 MaxAddressableNumSGPRs);
1112 ProgInfo.
NumSGPR = MaxAddressableNumSGPRs - 1;
1117 ProgInfo.
NumSGPR += ExtraSGPRs;
1123 unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
1129 for (
auto &
Arg :
F.args()) {
1130 unsigned NumRegs = (
DL.getTypeSizeInBits(
Arg.getType()) + 31) / 32;
1131 if (
Arg.hasAttribute(Attribute::InReg))
1132 WaveDispatchNumSGPR += NumRegs;
1134 WaveDispatchNumVGPR += NumRegs;
1150 if (ProgInfo.
NumSGPR > MaxAddressableNumSGPRs) {
1158 MaxAddressableNumSGPRs);
1160 ProgInfo.
NumSGPR = MaxAddressableNumSGPRs;
1202 unsigned LDSAlignShift;
1211 unsigned LDSSpillSize =
1216 alignTo(ProgInfo.
LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1219 unsigned ScratchAlignShift = 10;
1225 1ULL << ScratchAlignShift) >>
1234 unsigned TIDIGCompCnt = 0;
1257 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1260 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1339 MD->setRsrc1(CC, CurrentProgramInfo.
getPGMRSrc1(CC));
1359 void AMDGPUAsmPrinter::emitPALFunctionMetadata(
const MachineFunction &MF) {
1430 if (STM.isXNACKEnabled())
1433 Align MaxKernArgAlign;
1451 if (ExtraCode && ExtraCode[0]) {
1452 if (ExtraCode[1] != 0)
1455 switch (ExtraCode[0]) {
1469 }
else if (MO.
isImm()) {
1470 int64_t Val = MO.
getImm();
1478 O <<
format(
"0x%" PRIx64,
static_cast<uint64_t
>(Val));
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
MCStreamer & getStreamer()
#define S_00B84C_TGID_Y_EN(x)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode)
#define FP_ROUND_ROUND_TO_NEAREST
const Optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
static unsigned getRsrcReg(CallingConv::ID CallConv)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
A parsed version of the target data layout string in and methods for querying it.
IsaVersion getIsaVersion(StringRef GPU)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getLDSWaveSpillSize() const
Context object for machine code objects.
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
unsigned getNumSpilledSGPRs() const
@ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
return AArch64::GPR64RegClass contains(Reg)
virtual void EmitDirectiveAMDGCNTarget()=0
#define FP_DENORM_MODE_DP(x)
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_0286CC_SPI_PS_INPUT_ENA
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
bool isTrapHandlerEnabled() const
unsigned getLDSSize() const
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
#define AMD_HSA_BITS_SET(dst, mask, val)
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
unsigned getPSInputEnable() const
#define S_00B84C_USER_SGPR(x)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
This represents a section on linux, lots of unix variants and some bare metal systems.
uint32_t NumSGPRsForWavesPerEU
MCContext & getContext() const
uint32_t compute_pgm_rsrc2
unsigned const TargetRegisterInfo * TRI
#define R_0286D0_SPI_PS_INPUT_ADDR
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
void initializeTargetID(const MCSubtargetInfo &STI)
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
bool hasInitializer() const
Definitions have initializers, declarations don't.
const SIInstrInfo * getInstrInfo() const override
#define S_00B84C_TIDIG_COMP_CNT(x)
Instruction set architecture version.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
#define S_00B84C_SCRATCH_EN(x)
bool isGFX90A(const MCSubtargetInfo &STI)
static const Function * getCalleeFunction(const MachineOperand &Op)
#define S_00B84C_EXCP_EN_MSB(x)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool needsWaveLimiter() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
bool isShader(CallingConv::ID cc)
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
#define S_00B84C_TGID_Z_EN(x)
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
#define S_00B02C_EXTRA_LDS_SIZE(x)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
#define G_00B84C_TRAP_HANDLER(x)
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
unsigned Log2(Align A)
Returns the log2 of the alignment.
const Triple & getTargetTriple() const
uint32_t NumVGPRsForWavesPerEU
bool hasWorkGroupIDZ() const
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
static cl::opt< uint32_t > AssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
uint32_t group_segment_fixed_size
Diagnostic information for stack size etc.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
const HexagonInstrInfo * TII
MachineOperand class - Representation of each machine instruction operand.
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
unsigned M0(unsigned Val)
bool hasSGPRInitBug() const
#define G_00B84C_TGID_Y_EN(x)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
This class implements an extremely fast bulk output stream that can only output to a stream.
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
uint64_t ComputePGMRSrc3GFX90A
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
#define AMDHSA_BITS_GET(SRC, MSK)
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
uint16_t kernel_code_properties
MCCodeEmitter * getEmitterPtr() const
Analysis containing CSE Info
StringRef getName() const
getName - Get the symbol name.
bool isTgSplitEnabled() const
This struct is a compact representation of a valid (non-zero power of two) alignment.
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
unsigned getLocalMemorySize() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool isGFX10Plus(const MCSubtargetInfo &STI)
uint32_t compute_pgm_rsrc3
AMDGPU::SIModeRegisterDefaults getMode() const
bool isEntryFunctionCC(CallingConv::ID CC)
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr)=0
unsigned getWavefrontSize() const
#define S_00B84C_TGID_X_EN(x)
#define R_00B84C_COMPUTE_PGM_RSRC2
static const unsigned MaxWaveScratchSize
@ AMD_CODE_PROPERTY_IS_PTR64
#define R_0286E8_SPI_TMPRING_SIZE
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION
uint32_t private_segment_fixed_size
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
constexpr bool isUInt< 16 >(uint64_t x)
bool isStackRealigned() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
VisibilityTypes getVisibility() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasWorkGroupInfo() const
Module * getParent()
Get the module that this global value is contained inside of...
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
#define LLVM_EXTERNAL_VISIBILITY
const MCSubtargetInfo * getMCSubtargetInfo() const
unsigned getMaxWavesPerEU() const
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
bool hasDispatchID() const
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
This is an important class for using LLVM in a threaded context.
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
#define R_00B848_COMPUTE_PGM_RSRC1
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
unsigned getNumSpilledVGPRs() const
bool isEntryFunction() const
initializer< Ty > init(const Ty &Val)
virtual bool EmitISAVersion()=0
AMDGPUTargetStreamer * getTargetStreamer() const
bool isVariable() const
isVariable - Check if this is a variable symbol.
iterator_range< reg_iterator > reg_operands(Register Reg) const
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
constexpr bool isUInt< 32 >(uint64_t x)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MCSymbol * getSymbol(const GlobalValue *GV) const
Primary interface to the complete machine description for the target machine.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
std::vector< std::string > DisasmLines
const MCSubtargetInfo * getGlobalSTI() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Target & getTheGCNTarget()
The target for GCN GPUs.
void reportError(SMLoc L, const Twine &Msg)
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
unsigned getAddressableNumSGPRs() const
MachineFunction * MF
The current machine function.
MCContext & OutContext
This is the context for the output file that we are streaming.
uint64_t getPGMRSrc1(CallingConv::ID CC) const
bool hasGFX90AInsts() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Register getReg() const
getReg - Returns the register number.
A Module instance is used to store all the information related to an LLVM module.
bool isMemoryBound() const
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment.
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
#define FP_DENORM_MODE_SP(x)
void redefineIfPossible()
Prepare this symbol to be redefined.
#define S_0286E8_WAVESIZE(x)
bool hasWorkGroupIDX() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
static cl::opt< uint32_t > AssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)=0
StringRef - Represent a constant reference to a string, i.e.
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isCompute(CallingConv::ID cc)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define G_00B84C_SCRATCH_EN(x)
AMD Kernel Code Object (amd_kernel_code_t).
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
bool hasPrivateSegmentBuffer() const
#define AMDHSA_BITS_SET(DST, MSK, VAL)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
StringRef getName() const
Return a constant reference to the value's name.
#define G_00B84C_TGID_Z_EN(x)
#define FP_ROUND_MODE_SP(x)
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
#define FP_ROUND_MODE_DP(x)
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
unsigned getNumUserSGPRs() const
bool hasWorkItemIDZ() const
amdgpu Simplify well known AMD library false FunctionCallee Callee
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
#define S_00B84C_TG_SIZE_EN(x)
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
unsigned getMaxFlatWorkGroupSize() const
Function & getFunction()
Return the LLVM function that this machine code represents.
bool hasDispatchPtr() const
#define S_00B84C_EXCP_EN(x)
bool isModuleEntryFunction() const
uint32_t code_properties
Code properties.
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
@ LOCAL_ADDRESS
Address space for local memory.
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
#define R_00B860_COMPUTE_TMPRING_SIZE
bool hasWorkItemIDY() const
This class is intended to be used as a driving class for all asm writers.
#define S_00B028_SGPRS(x)
bool isPhysRegUsed(MCRegister PhysReg) const
Return true if the specified register is modified or read in this function.
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
unsigned getMaxNumUserSGPRs() const
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
uint64_t getComputePGMRSrc1() const
Compute the value of the ComputePGMRsrc1 register.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
uint32_t compute_pgm_rsrc1
bool doFinalization(Module &M) override
Shut down the asmprinter.
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
unsigned getAddressSpace() const
Generation getGeneration() const
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
Track resource usage for kernels / entry functions.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
#define G_00B84C_USER_SGPR(x)
bool isCuModeEnabled() const
#define G_00B84C_TIDIG_COMP_CNT(x)
TargetMachine & TM
Target machine description.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
#define S_00B028_VGPRS(x)
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void emitFunctionBody()
This method emits the body and trailer for a function.
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
const Triple & getTargetTriple() const
Align max(MaybeAlign Lhs, Align Rhs)
#define S_00B84C_LDS_SIZE(x)
#define G_00B84C_TGID_X_EN(x)
bool hasWorkGroupIDY() const
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
bool hasKernargSegmentPtr() const
unsigned getPSInputAddr() const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Type * getValueType() const
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Here we don t need to write any variables to the top of the stack since they don t overwrite each other int callee(int32 arg1, int32 arg2)
AMDGPUPALMetadata * getPALMetadata()
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
bool isVerbose() const
Return true if assembly output should contain comments.
void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter()
bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI)
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
std::vector< std::string > HexLines
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Generic base class for all target subtargets.
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
unsigned getFunctionNumber() const
Return a unique ID for the current function.
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
bool hasFlatScratchInit() const
#define S_00B84C_TRAP_HANDLER(x)
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
#define S_00B860_WAVESIZE(x)