Go to the documentation of this file.
75 std::unique_ptr<MCStreamer> &&Streamer) {
87 std::unique_ptr<MCStreamer> Streamer)
103 return "AMDGPU Assembly Printer";
120 void AMDGPUAsmPrinter::initTargetStreamer(
Module &M) {
126 initializeTargetID(M);
157 initTargetStreamer(
M);
170 HSAMetadataStream->end();
199 initializeTargetID(*
F.getParent());
201 const auto &FunctionTargetID = STM.getTargetID();
204 if (FunctionTargetID.isXnackSupported() &&
208 "' function does not match module xnack setting");
213 if (FunctionTargetID.isSramEccSupported() &&
217 "' function does not match module sramecc setting");
228 getAmdKernelCode(KernelCode, CurrentProgramInfo, *
MF);
232 if (STM.isAmdHsaOS())
233 HSAMetadataStream->emitKernel(*
MF, CurrentProgramInfo);
246 auto &
Context = Streamer.getContext();
247 auto &ObjectFileInfo = *
Context.getObjectFileInfo();
248 auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
250 Streamer.PushSection();
251 Streamer.SwitchSection(&ReadOnlySection);
255 Streamer.emitValueToAlignment(64, 0, 1, 0);
256 if (ReadOnlySection.getAlignment() < 64)
257 ReadOnlySection.setAlignment(
Align(64));
264 STM, KernelName, getAmdhsaKernelDescriptor(*
MF, CurrentProgramInfo),
272 Streamer.PopSection();
290 if (DumpCodeInstEmitter) {
317 ": unsupported initializer for address space");
331 "' is already defined");
340 TS->emitAMDGPULDS(GVSym, Size, Alignment);
363 void AMDGPUAsmPrinter::emitCommonFunctionComments(
384 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
390 KernelCodeProperties |=
391 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
394 KernelCodeProperties |=
395 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
398 KernelCodeProperties |=
399 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
402 KernelCodeProperties |=
403 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
406 KernelCodeProperties |=
407 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
410 KernelCodeProperties |=
411 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
414 KernelCodeProperties |=
415 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
418 return KernelCodeProperties;
428 memset(&KernelDescriptor, 0x0,
sizeof(KernelDescriptor));
437 Align MaxKernArgAlign;
449 return KernelDescriptor;
458 ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
479 getSIProgramInfo(CurrentProgramInfo,
MF);
484 EmitPALMetadata(
MF, CurrentProgramInfo);
486 emitPALFunctionMetadata(
MF);
488 EmitProgramInfoSI(
MF, CurrentProgramInfo);
491 DumpCodeInstEmitter =
nullptr;
495 bool SaveFlag =
OutStreamer->getUseAssemblerInfoForParsing();
498 OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
515 OutStreamer->emitRawComment(
" Function info:",
false);
518 emitCommonFunctionComments(
521 Info.getTotalNumVGPRs(STM),
523 Info.PrivateSegmentSize,
524 getFunctionCodeSize(
MF), MFI);
528 OutStreamer->emitRawComment(
" Kernel info:",
false);
529 emitCommonFunctionComments(CurrentProgramInfo.
NumArchVGPR,
536 getFunctionCodeSize(
MF), MFI);
544 " bytes/workgroup (compile time only)",
false);
552 " NumSGPRsForWavesPerEU: " +
555 " NumVGPRsForWavesPerEU: " +
571 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
574 " COMPUTE_PGM_RSRC2:USER_SGPR: " +
577 " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
580 " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
583 " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
586 " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
589 " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
597 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
599 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
602 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
604 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
609 if (DumpCodeInstEmitter) {
615 std::string Comment =
"\n";
630 void AMDGPUAsmPrinter::initializeTargetID(
const Module &M) {
644 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
645 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
650 if (TSTargetID->isXnackSupported())
653 if (TSTargetID->isSramEccSupported())
655 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
670 if (
MI.isDebugInstr())
673 CodeSize +=
TII->getInstSizeInBytes(
MI);
680 void AMDGPUAsmPrinter::getSIProgramInfo(
SIProgramInfo &ProgInfo,
697 const uint64_t MaxScratchPerWorkitem =
699 if (ProgInfo.
ScratchSize > MaxScratchPerWorkitem) {
718 if (ProgInfo.
NumSGPR > MaxAddressableNumSGPRs) {
725 ProgInfo.
NumSGPR = MaxAddressableNumSGPRs - 1;
730 ProgInfo.
NumSGPR += ExtraSGPRs;
736 unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
745 unsigned LastEna = 0;
758 assert((InputEna || InputAddr) &&
759 "PSInputAddr and PSInputEnable should "
760 "never both be 0 for AMDGPU_PS shaders");
764 LastEna = InputEna ?
findLastSet(InputEna) + 1 : 1;
770 unsigned PSArgCount = 0;
771 unsigned IntermediateVGPR = 0;
772 for (
auto &
Arg :
F.args()) {
773 unsigned NumRegs = (
DL.getTypeSizeInBits(
Arg.getType()) + 31) / 32;
774 if (
Arg.hasAttribute(Attribute::InReg)) {
775 WaveDispatchNumSGPR += NumRegs;
782 if (IsPixelShader && PSArgCount < 16) {
783 if ((1 << PSArgCount) & InputAddr) {
784 if (PSArgCount < LastEna)
785 WaveDispatchNumVGPR += NumRegs;
787 IntermediateVGPR += NumRegs;
793 if (IntermediateVGPR) {
794 WaveDispatchNumVGPR += IntermediateVGPR;
795 IntermediateVGPR = 0;
797 WaveDispatchNumVGPR += NumRegs;
817 if (ProgInfo.
NumSGPR > MaxAddressableNumSGPRs) {
822 ProgInfo.
NumSGPR, MaxAddressableNumSGPRs,
825 ProgInfo.
NumSGPR = MaxAddressableNumSGPRs;
869 unsigned LDSAlignShift;
880 alignTo(ProgInfo.
LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
883 unsigned ScratchAlignShift = 10;
889 1ULL << ScratchAlignShift) >>
898 unsigned TIDIGCompCnt = 0;
921 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
924 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1006 MD->setNumUsedAgprs(CC, CurrentProgramInfo.
NumAccVGPR);
1010 MD->setRsrc1(CC, CurrentProgramInfo.
getPGMRSrc1(CC));
1029 void AMDGPUAsmPrinter::emitPALFunctionMetadata(
const MachineFunction &MF) {
1040 MD->setFunctionLdsSize(
MF, CurrentProgramInfo.
LDSSize);
1106 if (STM.isXNACKEnabled())
1109 Align MaxKernArgAlign;
1128 if (ExtraCode && ExtraCode[0]) {
1129 if (ExtraCode[1] != 0)
1132 switch (ExtraCode[0]) {
1146 }
else if (MO.
isImm()) {
1147 int64_t Val = MO.
getImm();
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
MCStreamer & getStreamer()
#define S_00B84C_TGID_Y_EN(x)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode)
#define FP_ROUND_ROUND_TO_NEAREST
const Optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
static unsigned getRsrcReg(CallingConv::ID CallConv)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
This is an optimization pass for GlobalISel generic memory operations.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
A parsed version of the target data layout string in and methods for querying it.
IsaVersion getIsaVersion(StringRef GPU)
Context object for machine code objects.
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
unsigned getNumSpilledSGPRs() const
@ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
virtual void EmitDirectiveAMDGCNTarget()=0
#define FP_DENORM_MODE_DP(x)
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
unsigned getAmdhsaCodeObjectVersion()
#define R_0286CC_SPI_PS_INPUT_ENA
uint32_t getLDSSize() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
bool isTrapHandlerEnabled() const
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
#define AMD_HSA_BITS_SET(dst, mask, val)
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
unsigned getPSInputEnable() const
#define S_00B84C_USER_SGPR(x)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
This represents a section on linux, lots of unix variants and some bare metal systems.
uint32_t NumSGPRsForWavesPerEU
MCContext & getContext() const
uint32_t compute_pgm_rsrc2
#define R_0286D0_SPI_PS_INPUT_ADDR
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
void initializeTargetID(const MCSubtargetInfo &STI)
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
bool hasInitializer() const
Definitions have initializers, declarations don't.
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
const SIInstrInfo * getInstrInfo() const override
#define S_00B84C_TIDIG_COMP_CNT(x)
Instruction set architecture version.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
#define S_00B84C_SCRATCH_EN(x)
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
bool isGFX90A(const MCSubtargetInfo &STI)
#define S_00B84C_EXCP_EN_MSB(x)
bool needsWaveLimiter() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
bool isShader(CallingConv::ID cc)
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
#define S_00B84C_TGID_Z_EN(x)
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
#define S_00B02C_EXTRA_LDS_SIZE(x)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
#define G_00B84C_TRAP_HANDLER(x)
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
unsigned Log2(Align A)
Returns the log2 of the alignment.
const Triple & getTargetTriple() const
uint32_t NumVGPRsForWavesPerEU
bool hasWorkGroupIDZ() const
Represent the analysis usage information of a pass.
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
constexpr T getValueOr(U &&value) const &
uint32_t group_segment_fixed_size
Diagnostic information for stack size etc.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
const HexagonInstrInfo * TII
const SIFunctionResourceInfo & getResourceInfo(const Function *F) const
MachineOperand class - Representation of each machine instruction operand.
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
bool IsTargetStreamerInitialized
bool hasSGPRInitBug() const
#define G_00B84C_TGID_Y_EN(x)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
This class implements an extremely fast bulk output stream that can only output to a stream.
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
uint64_t ComputePGMRSrc3GFX90A
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
#define AMDHSA_BITS_GET(SRC, MSK)
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
uint16_t kernel_code_properties
MCCodeEmitter * getEmitterPtr() const
Analysis containing CSE Info
StringRef getName() const
getName - Get the symbol name.
bool isTgSplitEnabled() const
This struct is a compact representation of a valid (non-zero power of two) alignment.
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
unsigned getLocalMemorySize() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool isGFX10Plus(const MCSubtargetInfo &STI)
uint32_t compute_pgm_rsrc3
AMDGPU::SIModeRegisterDefaults getMode() const
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr)=0
unsigned getWavefrontSize() const
#define S_00B84C_TGID_X_EN(x)
#define R_00B84C_COMPUTE_PGM_RSRC2
static const unsigned MaxWaveScratchSize
@ AMD_CODE_PROPERTY_IS_PTR64
#define R_0286E8_SPI_TMPRING_SIZE
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
uint32_t private_segment_fixed_size
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Analyzes how many registers and other resources are used by functions.
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
constexpr bool isUInt< 16 >(uint64_t x)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
VisibilityTypes getVisibility() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasWorkGroupInfo() const
Module * getParent()
Get the module that this global value is contained inside of...
OSType getOS() const
Get the parsed operating system type of this triple.
#define LLVM_EXTERNAL_VISIBILITY
const MCSubtargetInfo * getMCSubtargetInfo() const
unsigned getMaxWavesPerEU() const
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
bool hasDispatchID() const
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
This is an important class for using LLVM in a threaded context.
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
#define R_00B848_COMPUTE_PGM_RSRC1
unsigned getNumSpilledVGPRs() const
bool isEntryFunction() const
virtual bool EmitISAVersion()=0
AMDGPUTargetStreamer * getTargetStreamer() const
bool isVariable() const
isVariable - Check if this is a variable symbol.
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
constexpr bool isUInt< 32 >(uint64_t x)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MCSymbol * getSymbol(const GlobalValue *GV) const
Primary interface to the complete machine description for the target machine.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
std::vector< std::string > DisasmLines
const MCSubtargetInfo * getGlobalSTI() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Target & getTheGCNTarget()
The target for GCN GPUs.
void reportError(SMLoc L, const Twine &Msg)
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
unsigned getAddressableNumSGPRs() const
MachineFunction * MF
The current machine function.
MCContext & OutContext
This is the context for the output file that we are streaming.
uint64_t getPGMRSrc1(CallingConv::ID CC) const
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
bool hasGFX90AInsts() const
Register getReg() const
getReg - Returns the register number.
A Module instance is used to store all the information related to an LLVM module.
bool isMemoryBound() const
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment.
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
#define FP_DENORM_MODE_SP(x)
void redefineIfPossible()
Prepare this symbol to be redefined.
#define S_0286E8_WAVESIZE(x)
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
bool hasWorkGroupIDX() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)=0
StringRef - Represent a constant reference to a string, i.e.
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isCompute(CallingConv::ID cc)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define G_00B84C_SCRATCH_EN(x)
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
AMD Kernel Code Object (amd_kernel_code_t).
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
bool hasPrivateSegmentBuffer() const
#define AMDHSA_BITS_SET(DST, MSK, VAL)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
StringRef getName() const
Return a constant reference to the value's name.
#define G_00B84C_TGID_Z_EN(x)
#define FP_ROUND_MODE_SP(x)
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
#define FP_ROUND_MODE_DP(x)
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
unsigned getNumUserSGPRs() const
bool hasWorkItemIDZ() const
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
#define S_00B84C_TG_SIZE_EN(x)
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Function & getFunction()
Return the LLVM function that this machine code represents.
bool hasDispatchPtr() const
#define S_00B84C_EXCP_EN(x)
bool isModuleEntryFunction() const
uint32_t code_properties
Code properties.
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
#define R_00B860_COMPUTE_TMPRING_SIZE
bool hasWorkItemIDY() const
This class is intended to be used as a driving class for all asm writers.
#define S_00B028_SGPRS(x)
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
unsigned getMaxNumUserSGPRs() const
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
uint64_t getComputePGMRSrc1() const
Compute the value of the ComputePGMRsrc1 register.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
uint32_t compute_pgm_rsrc1
bool doFinalization(Module &M) override
Shut down the asmprinter.
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ LOCAL_ADDRESS
Address space for local memory.
unsigned getAddressSpace() const
Generation getGeneration() const
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
Track resource usage for kernels / entry functions.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
#define G_00B84C_USER_SGPR(x)
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
bool isCuModeEnabled() const
#define G_00B84C_TIDIG_COMP_CNT(x)
TargetMachine & TM
Target machine description.
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
#define S_00B028_VGPRS(x)
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void emitFunctionBody()
This method emits the body and trailer for a function.
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
const Triple & getTargetTriple() const
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
Align max(MaybeAlign Lhs, Align Rhs)
#define S_00B84C_LDS_SIZE(x)
#define G_00B84C_TGID_X_EN(x)
bool hasWorkGroupIDY() const
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
bool hasKernargSegmentPtr() const
unsigned getPSInputAddr() const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Type * getValueType() const
const char LLVMTargetMachineRef TM
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
AnalysisUsage & addRequired()
AMDGPUPALMetadata * getPALMetadata()
bool isVerbose() const
Return true if assembly output should contain comments.
void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter()
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
std::vector< std::string > HexLines
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Generic base class for all target subtargets.
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
unsigned getFunctionNumber() const
Return a unique ID for the current function.
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
bool hasFlatScratchInit() const
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
#define S_00B84C_TRAP_HANDLER(x)
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
#define S_00B860_WAVESIZE(x)