44#define DEBUG_TYPE "amdgpu-disassembler"
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
60 MAI(*Ctx.getAsmInfo()),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&
STI)),
63 CodeObjectVersion(
AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
65 if (!
STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !
isGFX10Plus())
69 createConstantSymbolExpr(Symbol, Code);
71 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), Name);
107 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset, Addr,
true, 2, 2, 0))
116 if (DAsm->isGFX12Plus()) {
118 }
else if (DAsm->isVI()) {
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t , \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
174 unsigned OpWidth,
unsigned Imm,
unsigned EncImm,
176 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
193template <
unsigned OpW
idth>
201template <
unsigned OpW
idth>
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
211template <
unsigned OpW
idth>
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
219template <
unsigned OpW
idth>
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
231template <
unsigned OpW
idth>
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
240template <
unsigned OpW
idth>
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
292 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
311template <
unsigned OpW
idth>
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
326template <
unsigned OpW
idth>
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
369 uint64_t Addr,
const void *Decoder) {
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
380template <
unsigned Opw>
390 assert(Imm < (1 << 9) &&
"9-bit encoding");
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
409#include "AMDGPUGenDisassemblerTables.inc"
413template <>
constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <>
constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <>
constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <>
constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
423template <
typename InsnType>
431 const auto SavedBytes = Bytes;
438 decodeInstruction(Table, TmpInst, Inst,
Address,
this,
STI);
444 Comments << LocalComments;
451template <
typename InsnType>
456 for (
const uint8_t *
T : {Table1, Table2}) {
467 Bytes = Bytes.
slice(
sizeof(
T));
475 Bytes = Bytes.
slice(8);
477 Bytes = Bytes.
slice(4);
478 return (
Hi << 64) |
Lo;
485 Bytes = Bytes.
slice(8);
487 Bytes = Bytes.
slice(8);
488 return (
Hi << 64) |
Lo;
491void AMDGPUDisassembler::decodeImmOperands(
MCInst &
MI,
493 const MCInstrDesc &
Desc = MCII.get(
MI.getOpcode());
495 if (OpNo >=
MI.getNumOperands())
505 MCOperand &
Op =
MI.getOperand(OpNo);
508 int64_t
Imm =
Op.getImm();
522 switch (OpDesc.OperandType) {
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
570 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
571 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
575 Size = std::min((
size_t)4, Bytes_.
size());
587 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
622 if (
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
624 Bytes = Bytes_.
slice(4, MaxInstBytesNum - 4);
632 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
634 }
else if (Bytes.size() >= 16 &&
635 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
641 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
644 if (Bytes.size() >= 8) {
647 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
651 if (
STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
655 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
662 if (
STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
666 if (
STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
670 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
722 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
726 if (Bytes.size() >= 4) {
739 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
743 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
747 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
785 decodeImmOperands(
MI, *MCII);
797 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
809 AMDGPU::OpName::src2_modifiers);
812 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
813 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
816 AMDGPU::OpName::src2_modifiers);
824 if (MCII->get(
MI.getOpcode()).TSFlags &
826 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
827 AMDGPU::OpName::cpol);
832 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
834 AMDGPU::OpName::cpol);
836 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
841 if ((MCII->get(
MI.getOpcode()).TSFlags &
843 (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
846 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
847 if (TFEOpIdx != -1) {
848 auto *TFEIter =
MI.begin();
849 std::advance(TFEIter, TFEOpIdx);
857 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::offset);
858 if (OffsetIdx != -1) {
859 uint32_t Imm =
MI.getOperand(OffsetIdx).getImm();
861 if (SignedOffset < 0)
866 if (MCII->get(
MI.getOpcode()).TSFlags &
869 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
870 if (SWZOpIdx != -1) {
871 auto *SWZIter =
MI.begin();
872 std::advance(SWZIter, SWZOpIdx);
880 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
882 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
883 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
884 if (VAddr0Idx >= 0 && NSAArgs > 0) {
885 unsigned NSAWords = (NSAArgs + 3) / 4;
886 if (Bytes.size() < 4 * NSAWords)
888 for (
unsigned i = 0; i < NSAArgs; ++i) {
889 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
891 MCII->getOpRegClassID(
Desc.operands()[VAddrIdx], HwModeRegClass);
894 Bytes = Bytes.slice(4 * NSAWords);
900 if (MCII->get(
MI.getOpcode()).TSFlags &
919 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
920 AMDGPU::OpName::vdst_in);
921 if (VDstIn_Idx != -1) {
922 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
924 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
925 !
MI.getOperand(VDstIn_Idx).isReg() ||
926 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
927 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
928 MI.erase(&
MI.getOperand(VDstIn_Idx));
931 AMDGPU::OpName::vdst_in);
943 MCII->get(
MI.getOpcode()).getNumDefs() == 0 &&
944 MCII->get(
MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
945 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
946 if (Bytes_[0] != ExecEncoding)
950 Size = MaxInstBytesNum - Bytes.size();
955 if (
STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
965 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
971 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
972 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
973 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
974 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
975 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
976 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
977 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
978 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
979 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
980 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
981 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
982 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
983 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
984 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
985 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
986 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
987 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
988 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
996 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
997 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1001 }
else if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1002 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
1006 AMDGPU::OpName::sdst);
1026 MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1034 BaseReg, AMDGPU::sub0, &MRI.
getRegClass(AMDGPU::VReg_384RegClassID));
1035 return MO.
setReg(NewReg);
1052 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
1057 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
1059 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
1060 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
1064 if (!AdjustedRegClassOpcode ||
1065 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1068 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1070 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1072 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1081 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1086 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1088 unsigned FmtA =
MI.getOperand(FmtAIdx).getImm();
1089 unsigned FmtB =
MI.getOperand(FmtBIdx).getImm();
1093 if (!AdjustedRegClassOpcode ||
1094 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1097 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1099 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1101 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1119 bool IsVOP3P =
false) {
1121 unsigned Opc =
MI.getOpcode();
1122 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1123 AMDGPU::OpName::src1_modifiers,
1124 AMDGPU::OpName::src2_modifiers};
1125 for (
int J = 0; J < 3; ++J) {
1126 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
1130 unsigned Val =
MI.getOperand(
OpIdx).getImm();
1137 }
else if (J == 0) {
1148 const unsigned Opc =
MI.getOpcode();
1150 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1151 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1152 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1154 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1156 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1158 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1160 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1162 int OpModsIdx = AMDGPU::getNamedOperandIdx(
Opc, OpModsName);
1163 if (
OpIdx == -1 || OpModsIdx == -1)
1170 unsigned OpEnc = MRI.getEncodingValue(
Op.getReg());
1171 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
1172 unsigned ModVal = OpMods.
getImm();
1173 if (ModVal & OpSelMask) {
1183 constexpr int DST_IDX = 0;
1184 auto Opcode =
MI.getOpcode();
1185 const auto &
Desc = MCII->get(Opcode);
1186 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1188 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1192 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1203 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1206 AMDGPU::OpName::src2_modifiers);
1210 unsigned Opc =
MI.getOpcode();
1213 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1214 if (VDstInIdx != -1)
1217 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1218 if (
MI.getNumOperands() < DescNumOps &&
1223 AMDGPU::OpName::op_sel);
1226 if (
MI.getNumOperands() < DescNumOps &&
1229 AMDGPU::OpName::src0_modifiers);
1231 if (
MI.getNumOperands() < DescNumOps &&
1234 AMDGPU::OpName::src1_modifiers);
1242 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1243 if (VDstInIdx != -1)
1246 unsigned Opc =
MI.getOpcode();
1247 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1248 if (
MI.getNumOperands() < DescNumOps &&
1252 AMDGPU::OpName::op_sel);
1267 BaseReg = AMDGPU::VGPR0;
1269 BaseReg = AMDGPU::AGPR0;
1271 assert(BaseReg &&
"Only vector registers expected");
1273 return (Sub0 - BaseReg + NumRegs <= 256) ?
Reg :
MCRegister();
1280 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1282 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1283 AMDGPU::OpName::vdst);
1285 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1286 AMDGPU::OpName::vdata);
1288 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1290 ? AMDGPU::OpName::srsrc
1291 : AMDGPU::OpName::rsrc;
1292 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1293 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1294 AMDGPU::OpName::dmask);
1296 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1297 AMDGPU::OpName::tfe);
1298 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1299 AMDGPU::OpName::d16);
1306 if (BaseOpcode->
BVH) {
1312 bool IsAtomic = (VDstIdx != -1);
1316 bool IsPartialNSA =
false;
1317 unsigned AddrSize = Info->VAddrDwords;
1321 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1323 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1326 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).
getImm());
1333 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1334 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1335 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1337 if (!IsVSample && AddrSize > 12)
1340 if (AddrSize > Info->VAddrDwords) {
1341 if (!
STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1346 IsPartialNSA =
true;
1351 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1352 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1354 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1356 DstSize = (DstSize + 1) / 2;
1359 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1362 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1367 if (NewOpcode == -1)
1372 if (DstSize != Info->VDataDwords) {
1373 auto DataRCID = MCII->getOpRegClassID(
1374 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1378 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1379 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1382 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1393 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1395 if (
STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1396 AddrSize != Info->VAddrDwords) {
1397 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1398 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1399 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1401 auto AddrRCID = MCII->getOpRegClassID(
1402 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1405 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1411 MI.setOpcode(NewOpcode);
1413 if (NewVdata != AMDGPU::NoRegister) {
1425 assert(AddrSize <= Info->VAddrDwords);
1426 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1427 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1435 unsigned Opc =
MI.getOpcode();
1436 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1439 if (
MI.getNumOperands() < DescNumOps &&
1443 if (
MI.getNumOperands() < DescNumOps &&
1446 AMDGPU::OpName::op_sel);
1447 if (
MI.getNumOperands() < DescNumOps &&
1450 AMDGPU::OpName::op_sel_hi);
1451 if (
MI.getNumOperands() < DescNumOps &&
1454 AMDGPU::OpName::neg_lo);
1455 if (
MI.getNumOperands() < DescNumOps &&
1458 AMDGPU::OpName::neg_hi);
1463 unsigned Opc =
MI.getOpcode();
1464 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1466 if (
MI.getNumOperands() < DescNumOps &&
1470 if (
MI.getNumOperands() < DescNumOps &&
1473 AMDGPU::OpName::src0_modifiers);
1475 if (
MI.getNumOperands() < DescNumOps &&
1478 AMDGPU::OpName::src1_modifiers);
1482 unsigned Opc =
MI.getOpcode();
1483 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1487 if (
MI.getNumOperands() < DescNumOps &&
1491 AMDGPU::OpName::op_sel);
1496 assert(HasLiteral &&
"Should have decoded a literal");
1507 const Twine& ErrMsg)
const {
1521 unsigned Val)
const {
1522 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1523 if (Val >= RegCl.getNumRegs())
1525 ": unknown register " +
Twine(Val));
1531 unsigned Val)
const {
1535 switch (SRegClassID) {
1536 case AMDGPU::SGPR_32RegClassID:
1537 case AMDGPU::TTMP_32RegClassID:
1539 case AMDGPU::SGPR_64RegClassID:
1540 case AMDGPU::TTMP_64RegClassID:
1543 case AMDGPU::SGPR_96RegClassID:
1544 case AMDGPU::TTMP_96RegClassID:
1545 case AMDGPU::SGPR_128RegClassID:
1546 case AMDGPU::TTMP_128RegClassID:
1549 case AMDGPU::SGPR_256RegClassID:
1550 case AMDGPU::TTMP_256RegClassID:
1553 case AMDGPU::SGPR_288RegClassID:
1554 case AMDGPU::TTMP_288RegClassID:
1555 case AMDGPU::SGPR_320RegClassID:
1556 case AMDGPU::TTMP_320RegClassID:
1557 case AMDGPU::SGPR_352RegClassID:
1558 case AMDGPU::TTMP_352RegClassID:
1559 case AMDGPU::SGPR_384RegClassID:
1560 case AMDGPU::TTMP_384RegClassID:
1561 case AMDGPU::SGPR_512RegClassID:
1562 case AMDGPU::TTMP_512RegClassID:
1571 if (Val % (1 << shift)) {
1573 <<
": scalar reg isn't aligned " << Val;
1581 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1591 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1593 return errOperand(Val,
"More than one unique literal is illegal");
1604 return errOperand(Val,
"More than one unique literal is illegal");
1609 bool UseLit64 =
Hi_32(Literal) == 0;
1622 if (Bytes.size() < 4) {
1623 return errOperand(0,
"cannot read literal, inst bytes left " +
1624 Twine(Bytes.size()));
1631 bool HasInv2Pi =
true;
1635 int64_t Val = Literal;
1636 bool UseLit =
false;
1703 assert(
STI.hasFeature(AMDGPU::Feature64BitLiterals));
1706 if (Bytes.size() < 8) {
1707 return errOperand(0,
"cannot read literal64, inst bytes left " +
1708 Twine(Bytes.size()));
1714 bool UseLit64 =
Hi_32(Literal) == 0;
1723 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1725 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1726 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1774 return 0x3fc45f306dc9c882;
1836 return VGPR_32RegClassID;
1838 return VReg_64RegClassID;
1840 return VReg_96RegClassID;
1842 return VReg_128RegClassID;
1844 return VReg_160RegClassID;
1846 return VReg_192RegClassID;
1848 return VReg_256RegClassID;
1850 return VReg_288RegClassID;
1852 return VReg_320RegClassID;
1854 return VReg_352RegClassID;
1856 return VReg_384RegClassID;
1858 return VReg_512RegClassID;
1860 return VReg_1024RegClassID;
1871 return AGPR_32RegClassID;
1873 return AReg_64RegClassID;
1875 return AReg_96RegClassID;
1877 return AReg_128RegClassID;
1879 return AReg_160RegClassID;
1881 return AReg_256RegClassID;
1883 return AReg_288RegClassID;
1885 return AReg_320RegClassID;
1887 return AReg_352RegClassID;
1889 return AReg_384RegClassID;
1891 return AReg_512RegClassID;
1893 return AReg_1024RegClassID;
1904 return SGPR_32RegClassID;
1906 return SGPR_64RegClassID;
1908 return SGPR_96RegClassID;
1910 return SGPR_128RegClassID;
1912 return SGPR_160RegClassID;
1914 return SGPR_256RegClassID;
1916 return SGPR_288RegClassID;
1918 return SGPR_320RegClassID;
1920 return SGPR_352RegClassID;
1922 return SGPR_384RegClassID;
1924 return SGPR_512RegClassID;
1935 return TTMP_32RegClassID;
1937 return TTMP_64RegClassID;
1939 return TTMP_128RegClassID;
1941 return TTMP_256RegClassID;
1943 return TTMP_288RegClassID;
1945 return TTMP_320RegClassID;
1947 return TTMP_352RegClassID;
1949 return TTMP_384RegClassID;
1951 return TTMP_512RegClassID;
1959 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1960 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1962 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1966 unsigned Val)
const {
1971 bool IsAGPR = Val & 512;
1974 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1983 unsigned Val)
const {
1986 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1991 static_assert(SGPR_MIN == 0);
2000 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2001 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2002 Val == LITERAL_CONST)
2005 if (Val == LITERAL64_CONST &&
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
2028 unsigned Val)
const {
2030 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
2033 unsigned XDstReg = MRI.getEncodingValue(Inst.
getOperand(VDstXInd).
getReg());
2034 Val |= ~XDstReg & 1;
2127 const unsigned Val)
const {
2131 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2132 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2135 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
2136 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2138 Val - SDWA9EncValues::SRC_VGPR_MIN);
2140 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2141 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2142 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2144 Val - SDWA9EncValues::SRC_SGPR_MIN);
2146 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2147 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2149 Val - SDWA9EncValues::SRC_TTMP_MIN);
2152 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2154 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2155 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2160 if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2176 assert((
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2177 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2178 "SDWAVopcDst should be present only on GFX9+");
2180 bool IsWave32 =
STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2182 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2183 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2199 unsigned Val)
const {
2200 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2206 unsigned Val)
const {
2223 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
2226 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
2236 if (
I == Versions.end())
2252 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2258 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2270 return STI.hasFeature(AMDGPU::FeatureGFX11);
2280 return STI.hasFeature(AMDGPU::FeatureGFX12);
2300 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2322 if (PopCount == 1) {
2323 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2325 S <<
"bits in range ("
2326 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2327 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2333#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2334#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2336 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2338#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2340 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2341 << GET_FIELD(MASK) << '\n'; \
2344#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2346 if (FourByteBuffer & (MASK)) { \
2347 return createStringError(std::errc::invalid_argument, \
2348 "kernel descriptor " DESC \
2349 " reserved %s set" MSG, \
2350 getBitRangeFromMask((MASK), 0).c_str()); \
2354#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2355#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2356 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2357#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2358 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2359#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2360 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2373 uint32_t GranulatedWorkitemVGPRCount =
2374 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2377 (GranulatedWorkitemVGPRCount + 1) *
2380 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2401 uint32_t GranulatedWavefrontSGPRCount =
2402 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2406 "must be zero on gfx10+");
2408 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2411 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2413 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2414 bool ReservedXnackMask =
STI.hasFeature(AMDGPU::FeatureXNACK);
2415 assert(!ReservedXnackMask ||
STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2416 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2418 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2423 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2425 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2427 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2429 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2433 if (
STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2435 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2439 if (
STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2441 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2448 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2451 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2457 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2460 "COMPUTE_PGM_RSRC1");
2471 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2473 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2474 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2477 "COMPUTE_PGM_RSRC1");
2482 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2494 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2496 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2497 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2499 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2501 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2503 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2505 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2507 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2514 ".amdhsa_exception_fp_ieee_invalid_op",
2515 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2517 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2519 ".amdhsa_exception_fp_ieee_div_zero",
2520 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2522 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2524 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2526 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2528 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2541 KdStream << Indent <<
".amdhsa_accum_offset "
2542 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2545 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2548 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2550 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2554 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2556 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2559 "SHARED_VGPR_COUNT",
2560 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2564 "COMPUTE_PGM_RSRC3",
2565 "must be zero on gfx12+");
2571 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2573 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2575 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2578 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2581 "COMPUTE_PGM_RSRC3",
2582 "must be zero on gfx10");
2587 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2592 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2595 "COMPUTE_PGM_RSRC3",
2596 "must be zero on gfx10 or gfx11");
2602 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2604 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2606 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2608 "ENABLE_DIDT_THROTTLE",
2609 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2612 "COMPUTE_PGM_RSRC3",
2613 "must be zero on gfx10+");
2618 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2623 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2626 "COMPUTE_PGM_RSRC3",
2627 "must be zero on gfx10");
2629 }
else if (FourByteBuffer) {
2631 std::errc::invalid_argument,
2632 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2636#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2637#undef PRINT_DIRECTIVE
2639#undef CHECK_RESERVED_BITS_IMPL
2640#undef CHECK_RESERVED_BITS
2641#undef CHECK_RESERVED_BITS_MSG
2642#undef CHECK_RESERVED_BITS_DESC
2643#undef CHECK_RESERVED_BITS_DESC_MSG
2648 const char *Msg =
"") {
2650 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2657 unsigned WidthInBytes) {
2661 std::errc::invalid_argument,
2662 "kernel descriptor reserved bits in range (%u:%u) set",
2663 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2669#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2671 KdStream << Indent << DIRECTIVE " " \
2672 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2681 assert(Bytes.size() == 64);
2684 switch (Cursor.tell()) {
2686 FourByteBuffer = DE.
getU32(Cursor);
2687 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2692 FourByteBuffer = DE.
getU32(Cursor);
2693 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2694 << FourByteBuffer <<
'\n';
2698 FourByteBuffer = DE.
getU32(Cursor);
2699 KdStream << Indent <<
".amdhsa_kernarg_size "
2700 << FourByteBuffer <<
'\n';
2705 ReservedBytes = DE.
getBytes(Cursor, 4);
2706 for (
char B : ReservedBytes) {
2721 ReservedBytes = DE.
getBytes(Cursor, 20);
2722 for (
char B : ReservedBytes) {
2729 FourByteBuffer = DE.
getU32(Cursor);
2733 FourByteBuffer = DE.
getU32(Cursor);
2737 FourByteBuffer = DE.
getU32(Cursor);
2742 TwoByteBuffer = DE.
getU16(Cursor);
2746 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2748 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2750 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2752 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2754 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2757 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2759 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2761 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2767 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2769 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2774 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2779 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2781 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2790 TwoByteBuffer = DE.
getU16(Cursor);
2791 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2793 KERNARG_PRELOAD_SPEC_LENGTH);
2796 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2798 KERNARG_PRELOAD_SPEC_OFFSET);
2804 ReservedBytes = DE.
getBytes(Cursor, 4);
2805 for (
char B : ReservedBytes) {
2815#undef PRINT_DIRECTIVE
2822 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2824 "kernel descriptor must be 64-byte aligned");
2835 EnableWavefrontSize32 =
2837 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2842 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2845 while (
C &&
C.tell() < Bytes.size()) {
2853 KdStream <<
".end_amdhsa_kernel\n";
2872 "code object v2 is not supported");
2885const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2888 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2896 if (!Valid || Res != Val)
2897 Ctx.reportWarning(
SMLoc(),
"unsupported redefinition of " + Id);
2903 const uint64_t TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
2938 if (Result != Symbols->end()) {
2939 auto *Sym =
Ctx.getOrCreateSymbol(Result->Name);
2945 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2964 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
bool isGFX1250Plus() const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1170(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.