42 cl::desc(
"Force a specific generic_v<N> flag to be "
43 "added. For testing purposes only."),
48 if (!HSAMetadataDoc.
fromYAML(HSAMetadataString))
245 OS <<
"\t.amdgcn_target \"" <<
getTargetID()->toString() <<
"\"\n";
251 OS <<
"\t.amdhsa_code_object_version " << COV <<
'\n';
260 OS <<
"\t.amd_kernel_code_t\n";
261 Header.EmitKernelCodeT(OS,
getContext(), FoldAndPrint);
262 OS <<
"\t.end_amd_kernel_code_t\n";
270 OS <<
"\t.amdgpu_hsa_kernel " << SymbolName <<
'\n' ;
277 OS <<
"\t.amdgpu_lds " << Symbol->getName() <<
", " <<
Size <<
", "
278 << Alignment.
value() <<
'\n';
287#define PRINT_RES_INFO(ARG) \
289 ARG->print(OS, getContext().getAsmInfo()); \
291 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
292 Streamer.addBlankLine();
310#define PRINT_RES_INFO(ARG) \
312 ARG->print(OS, getContext().getAsmInfo()); \
314 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
315 Streamer.addBlankLine();
325 OS <<
"\t.amd_amdgpu_isa \"" <<
getTargetID()->toString() <<
"\"\n";
332 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
335 std::string HSAMetadataString;
337 HSAMetadataDoc.
toYAML(StrOS);
340 OS << StrOS.
str() <<
'\n';
346 const uint32_t Encoded_s_code_end = 0xbf9f0000;
347 const uint32_t Encoded_s_nop = 0xbf800000;
348 uint32_t Encoded_pad = Encoded_s_code_end;
358 Encoded_pad = Encoded_s_nop;
362 OS <<
"\t.p2alignl " << Log2CacheLineSize <<
", " << Encoded_pad <<
'\n';
363 OS <<
"\t.fill " << (FillSize / 4) <<
", 4, " << Encoded_pad <<
'\n';
371 const MCExpr *ReserveFlatScr) {
375 OS <<
"\t.amdhsa_kernel " << KernelName <<
'\n';
380 const MCExpr *ShiftedAndMaskedExpr =
392 OS <<
"\t\t.amdhsa_group_segment_fixed_size ";
396 OS <<
"\t\t.amdhsa_private_segment_fixed_size ";
400 OS <<
"\t\t.amdhsa_kernarg_size ";
406 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
407 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
408 ".amdhsa_user_sgpr_count");
411 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
412 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
413 ".amdhsa_user_sgpr_count");
419 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
420 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
421 ".amdhsa_user_sgpr_private_segment_buffer");
423 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
425 ".amdhsa_user_sgpr_dispatch_ptr");
427 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
429 ".amdhsa_user_sgpr_queue_ptr");
431 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
432 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
433 ".amdhsa_user_sgpr_kernarg_segment_ptr");
435 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
436 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
437 ".amdhsa_user_sgpr_dispatch_id");
440 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
441 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
442 ".amdhsa_user_sgpr_flat_scratch_init");
445 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
446 ".amdhsa_user_sgpr_kernarg_preload_length");
448 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
449 ".amdhsa_user_sgpr_kernarg_preload_offset");
453 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
454 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
455 ".amdhsa_user_sgpr_private_segment_size");
456 if (IVersion.
Major >= 10)
458 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
459 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
460 ".amdhsa_wavefront_size32");
463 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
464 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
465 ".amdhsa_uses_dynamic_stack");
467 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
468 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
470 ?
".amdhsa_enable_private_segment"
471 :
".amdhsa_system_sgpr_private_segment_wavefront_offset"));
473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
474 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
475 ".amdhsa_system_sgpr_workgroup_id_x");
477 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
478 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
479 ".amdhsa_system_sgpr_workgroup_id_y");
481 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
482 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
483 ".amdhsa_system_sgpr_workgroup_id_z");
485 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
486 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
487 ".amdhsa_system_sgpr_workgroup_info");
489 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
490 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
491 ".amdhsa_system_vgpr_workitem_id");
494 OS <<
"\t\t.amdhsa_next_free_vgpr ";
495 EmitMCExpr(NextVGPR);
498 OS <<
"\t\t.amdhsa_next_free_sgpr ";
499 EmitMCExpr(NextSGPR);
506 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
507 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
getContext());
512 OS <<
"\t\t.amdhsa_accum_offset ";
520 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
521 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
522 ".amdhsa_named_barrier_count");
524 OS <<
"\t\t.amdhsa_reserve_vcc ";
525 EmitMCExpr(ReserveVCC);
529 OS <<
"\t\t.amdhsa_reserve_flat_scratch ";
530 EmitMCExpr(ReserveFlatScr);
540 OS <<
"\t\t.amdhsa_reserve_xnack_mask " <<
getTargetID()->isXnackOnOrAny() <<
'\n';
545 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
546 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
547 ".amdhsa_float_round_mode_32");
549 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
550 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
551 ".amdhsa_float_round_mode_16_64");
553 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
554 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
555 ".amdhsa_float_denorm_mode_32");
557 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
558 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
559 ".amdhsa_float_denorm_mode_16_64");
560 if (IVersion.
Major < 12) {
562 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
563 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
564 ".amdhsa_dx10_clamp");
566 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
567 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
568 ".amdhsa_ieee_mode");
570 if (IVersion.
Major >= 9) {
572 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
573 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
574 ".amdhsa_fp16_overflow");
578 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
579 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
".amdhsa_tg_split");
582 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
583 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
584 ".amdhsa_workgroup_processor_mode");
585 if (IVersion.
Major >= 10) {
587 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
588 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
589 ".amdhsa_memory_ordered");
591 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
592 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
593 ".amdhsa_forward_progress");
595 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
597 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
598 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
599 ".amdhsa_shared_vgpr_count");
601 if (IVersion.
Major == 11) {
603 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
604 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
605 ".amdhsa_inst_pref_size");
607 if (IVersion.
Major >= 12) {
609 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
610 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
611 ".amdhsa_inst_pref_size");
613 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
614 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
615 ".amdhsa_round_robin_scheduling");
620 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
621 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
622 ".amdhsa_exception_fp_ieee_invalid_op");
625 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
626 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
627 ".amdhsa_exception_fp_denorm_src");
631 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
632 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
633 ".amdhsa_exception_fp_ieee_div_zero");
636 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
637 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
638 ".amdhsa_exception_fp_ieee_overflow");
641 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
642 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
643 ".amdhsa_exception_fp_ieee_underflow");
646 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
647 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
648 ".amdhsa_exception_fp_ieee_inexact");
651 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
652 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
653 ".amdhsa_exception_int_div_zero");
655 OS <<
"\t.end_amdhsa_kernel\n";
675 W.setELFHeaderEFlags(getEFlags());
676 W.setOverrideABIVersion(
693void AMDGPUTargetELFStreamer::EmitNote(
697 auto &Context = S.getContext();
699 auto NameSZ = Name.size() + 1;
701 unsigned NoteFlags = 0;
711 S.emitValue(DescSZ, 4);
712 S.emitInt32(NoteType);
714 S.emitValueToAlignment(
Align(4), 0, 1, 0);
716 S.emitValueToAlignment(
Align(4), 0, 1, 0);
720unsigned AMDGPUTargetELFStreamer::getEFlags() {
725 return getEFlagsR600();
727 return getEFlagsAMDGCN();
731unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
737unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
738 assert(STI.getTargetTriple().isAMDGCN());
740 switch (STI.getTargetTriple().getOS()) {
745 return getEFlagsUnknownOS();
747 return getEFlagsAMDHSA();
749 return getEFlagsAMDPAL();
751 return getEFlagsMesa3D();
755unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
759 return getEFlagsV3();
762unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
766 return getEFlagsV6();
767 return getEFlagsV4();
770unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
773 return getEFlagsV3();
776unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
779 return getEFlagsV3();
782unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
783 unsigned EFlagsV3 = 0;
798unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
799 unsigned EFlagsV4 = 0;
838unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
839 unsigned Flags = getEFlagsV4();
872 " - no ELF flag can represent this version!");
897 auto *SymbolELF =
static_cast<MCSymbolELF *
>(Symbol);
900 if (!SymbolELF->isBindingSet())
903 if (SymbolELF->declareCommon(
Size, Alignment)) {
905 " redeclared as different type");
916 auto *DescBegin = Context.createTempSymbol();
917 auto *DescEnd = Context.createTempSymbol();
934 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
937 std::string HSAMetadataString;
943 auto *DescBegin = Context.createTempSymbol();
944 auto *DescEnd = Context.createTempSymbol();
959 const uint32_t Encoded_s_code_end = 0xbf9f0000;
960 const uint32_t Encoded_s_nop = 0xbf800000;
961 uint32_t Encoded_pad = Encoded_s_code_end;
971 Encoded_pad = Encoded_s_nop;
978 for (
unsigned I = 0;
I < FillSize;
I += 4)
988 const MCExpr *ReserveFlatScr) {
990 auto &Context = Streamer.getContext();
992 auto *KernelCodeSymbol =
994 auto *KernelDescriptorSymbol =
static_cast<MCSymbolELF *
>(
995 Context.getOrCreateSymbol(
Twine(KernelName) +
Twine(
".kd")));
999 KernelDescriptorSymbol->
setBinding(KernelCodeSymbol->getBinding());
1000 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
1001 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
1004 KernelDescriptorSymbol->setSize(
1012 Streamer.emitLabel(KernelDescriptorSymbol);
1023 Streamer.emitInt8(0u);
1036 Streamer.emitInt8(0u);
1049 Streamer.emitInt8(0u);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
#define PRINT_RES_INFO(ARG)
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
verify safepoint Safepoint IR Verifier
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitDirectiveAMDGCNTarget() override
void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitDirectiveAMDGCNTarget() override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
MCELFStreamer & getStreamer()
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
AMDGPUTargetStreamer(MCStreamer &S)
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
unsigned CodeObjectVersion
This class is intended to be used as a base class for asm properties and features specific to the tar...
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
const MCAsmInfo * getAsmInfo() const
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
ELFObjectWriter & getWriter()
void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc()) override
Emit a label for Symbol into the current section.
Base class for the full range of assembler expressions which are needed for parsing.
void emitBytes(StringRef Data) override
Emit the bytes in Data into the output.
Streaming machine code generation interface.
virtual bool popSection()
Restore the current and previous section from the section stack.
MCContext & getContext() const
virtual void emitValueToAlignment(Align Alignment, int64_t Fill=0, uint8_t FillLen=1, unsigned MaxBytesToEmit=0)
Emit some number of copies of Value until the byte alignment ByteAlignment is reached.
void pushSection()
Save the current and previous section on the section stack.
void emitInt32(uint64_t Value)
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
void setBinding(unsigned Binding) const
void setType(unsigned Type) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
StringRef - Represent a constant reference to a string, i.e.
ArchType getArch() const
Get the parsed architecture type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
LLVM_ABI void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
LLVM_ABI void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
LLVM_ABI bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
LLVM_ABI StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
LLVM_ABI StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
LLVM_ABI GPUKind parseArchR600(StringRef CPU)
@ EF_AMDGPU_GENERIC_VERSION_MAX
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX703
@ EF_AMDGPU_MACH_AMDGCN_GFX1035
@ EF_AMDGPU_FEATURE_SRAMECC_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX1031
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
@ EF_AMDGPU_MACH_R600_CAYMAN
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX704
@ EF_AMDGPU_MACH_AMDGCN_GFX902
@ EF_AMDGPU_MACH_AMDGCN_GFX810
@ EF_AMDGPU_MACH_AMDGCN_GFX950
@ EF_AMDGPU_MACH_AMDGCN_GFX1036
@ EF_AMDGPU_MACH_AMDGCN_GFX1102
@ EF_AMDGPU_MACH_R600_RV730
@ EF_AMDGPU_MACH_R600_RV710
@ EF_AMDGPU_MACH_AMDGCN_GFX908
@ EF_AMDGPU_MACH_AMDGCN_GFX1011
@ EF_AMDGPU_MACH_R600_CYPRESS
@ EF_AMDGPU_MACH_AMDGCN_GFX1032
@ EF_AMDGPU_MACH_R600_R600
@ EF_AMDGPU_MACH_AMDGCN_GFX1250
@ EF_AMDGPU_MACH_R600_TURKS
@ EF_AMDGPU_MACH_R600_JUNIPER
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX601
@ EF_AMDGPU_MACH_AMDGCN_GFX942
@ EF_AMDGPU_MACH_AMDGCN_GFX1152
@ EF_AMDGPU_MACH_R600_R630
@ EF_AMDGPU_MACH_R600_REDWOOD
@ EF_AMDGPU_MACH_R600_RV770
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX600
@ EF_AMDGPU_FEATURE_XNACK_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX602
@ EF_AMDGPU_MACH_AMDGCN_GFX1101
@ EF_AMDGPU_MACH_AMDGCN_GFX1100
@ EF_AMDGPU_MACH_AMDGCN_GFX1310
@ EF_AMDGPU_MACH_AMDGCN_GFX1033
@ EF_AMDGPU_MACH_AMDGCN_GFX801
@ EF_AMDGPU_MACH_AMDGCN_GFX705
@ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1153
@ EF_AMDGPU_MACH_AMDGCN_GFX1010
@ EF_AMDGPU_MACH_R600_RV670
@ EF_AMDGPU_MACH_AMDGCN_GFX701
@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1012
@ EF_AMDGPU_MACH_AMDGCN_GFX1151
@ EF_AMDGPU_MACH_AMDGCN_GFX1030
@ EF_AMDGPU_MACH_R600_CEDAR
@ EF_AMDGPU_MACH_AMDGCN_GFX1200
@ EF_AMDGPU_MACH_AMDGCN_GFX700
@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX803
@ EF_AMDGPU_MACH_AMDGCN_GFX802
@ EF_AMDGPU_MACH_AMDGCN_GFX90C
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX900
@ EF_AMDGPU_MACH_AMDGCN_GFX909
@ EF_AMDGPU_MACH_AMDGCN_GFX906
@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1103
@ EF_AMDGPU_MACH_R600_CAICOS
@ EF_AMDGPU_MACH_AMDGCN_GFX90A
@ EF_AMDGPU_MACH_AMDGCN_GFX1034
@ EF_AMDGPU_MACH_AMDGCN_GFX1013
@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX904
@ EF_AMDGPU_MACH_AMDGCN_GFX1251
@ EF_AMDGPU_MACH_R600_RS880
@ EF_AMDGPU_MACH_AMDGCN_GFX805
@ EF_AMDGPU_MACH_AMDGCN_GFX1201
@ EF_AMDGPU_MACH_AMDGCN_GFX1150
@ EF_AMDGPU_MACH_R600_SUMO
@ EF_AMDGPU_MACH_R600_BARTS
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX702
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
uint32_t group_segment_fixed_size
uint32_t compute_pgm_rsrc1
uint32_t private_segment_fixed_size
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3
int64_t kernel_code_entry_byte_offset