39#define DEBUG_TYPE "amdgpu-disassembler"
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
51 TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
71 std::advance(
I, OpIdx);
84 APInt SignedOffset(18, Imm * 4,
true);
85 int64_t
Offset = (SignedOffset.
sext(64) + 4 +
Addr).getSExtValue();
87 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
99 Offset = SignExtend64<21>(Imm);
107 return addOperand(Inst, DAsm->decodeBoolReg(Val));
110#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
111 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
113 const MCDisassembler *Decoder) { \
114 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
115 return addOperand(Inst, DAsm->DecoderName(Imm)); \
120#define DECODE_OPERAND_REG_8(RegClass) \
121 static DecodeStatus Decode##RegClass##RegisterClass( \
122 MCInst &Inst, unsigned Imm, uint64_t , \
123 const MCDisassembler *Decoder) { \
124 assert(Imm < (1 << 8) && "8-bit encoding"); \
125 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
127 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
130#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
132 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
133 const MCDisassembler *Decoder) { \
134 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
135 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
136 return addOperand(Inst, \
137 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
138 MandatoryLiteral, ImmWidth)); \
143#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
144 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
150#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \
151 DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \
152 Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
155#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \
156 DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
161#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \
162 DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
166#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \
167 DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
174#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \
175 DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \
180#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \
181 DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \
182 Imm | 512, false, ImmWidth)
184#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \
185 DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \
186 OpWidth, Imm, true, ImmWidth)
269 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
276 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
282 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
294 unsigned Sub =
MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
295 auto Reg = Sub ? Sub : Op.getReg();
296 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
303 if (!DAsm->isGFX90A()) {
314 : AMDGPU::OpName::vdata;
330 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
368#define DECODE_SDWA(DecName) \
369DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
375#include "AMDGPUGenDisassemblerTables.inc"
383 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.
data());
384 Bytes = Bytes.
slice(
sizeof(
T));
390 uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>(
392 Bytes = Bytes.
slice(8);
393 uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>(
395 Bytes = Bytes.
slice(4);
406 if ((
unsigned)FiIdx >=
MI.getNumOperands())
408 unsigned Fi =
MI.getOperand(FiIdx).getImm();
409 return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
419 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
420 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
454 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
456 if (Bytes.
size() >= 8) {
457 const uint64_t QW = eatBytes<uint64_t>(Bytes);
492 if (Res) { IsSDWA =
true;
break; }
495 if (Res) { IsSDWA =
true;
break; }
498 if (Res) { IsSDWA =
true;
break; }
517 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
520 if (Bytes.
size() < 4)
break;
521 const uint32_t DW = eatBytes<uint32_t>(Bytes);
548 if (Bytes.
size() < 4)
break;
585 AMDGPU::OpName::src2_modifiers);
588 if (Res && (MCII->get(
MI.getOpcode()).TSFlags &
591 AMDGPU::OpName::cpol);
596 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
598 AMDGPU::OpName::cpol);
600 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
605 if (Res && (MCII->get(
MI.getOpcode()).TSFlags &
611 if (TFEOpIdx != -1) {
612 auto TFEIter =
MI.begin();
613 std::advance(TFEIter, TFEOpIdx);
618 if (Res && (MCII->get(
MI.getOpcode()).TSFlags &
622 if (SWZOpIdx != -1) {
623 auto SWZIter =
MI.begin();
624 std::advance(SWZIter, SWZOpIdx);
634 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
635 if (VAddr0Idx >= 0 && NSAArgs > 0) {
636 unsigned NSAWords = (NSAArgs + 3) / 4;
637 if (Bytes.
size() < 4 * NSAWords) {
640 for (
unsigned i = 0; i < NSAArgs; ++i) {
641 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
643 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
644 MI.insert(
MI.begin() + VAddrIdx,
647 Bytes = Bytes.
slice(4 * NSAWords);
665 AMDGPU::OpName::vdst_in);
666 if (VDstIn_Idx != -1) {
667 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
669 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
670 !
MI.getOperand(VDstIn_Idx).isReg() ||
671 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
672 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
673 MI.erase(&
MI.getOperand(VDstIn_Idx));
676 AMDGPU::OpName::vdst_in);
683 if (Res && ImmLitIdx != -1 && !IsSOPK)
688 Size = Res ? (MaxInstBytesNum - Bytes.
size())
689 : std::min((
size_t)4, Bytes_.
size());
704 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
705 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
706 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
707 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
726 AMDGPU::OpName::sdst);
737 unsigned OpSelHi = 0;
746 bool IsVOP3P =
false) {
748 unsigned Opc =
MI.getOpcode();
749 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
750 AMDGPU::OpName::src1_modifiers,
751 AMDGPU::OpName::src2_modifiers};
752 for (
int J = 0; J < 3; ++J) {
757 unsigned Val =
MI.getOperand(OpIdx).getImm();
775 constexpr int DST_IDX = 0;
776 auto Opcode =
MI.getOpcode();
777 const auto &Desc = MCII->get(Opcode);
780 if (OldIdx != -1 && Desc.getOperandConstraint(
783 assert(Desc.getOperandConstraint(
795 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
798 AMDGPU::OpName::src2_modifiers);
804 unsigned Opc =
MI.getOpcode();
814 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
815 if (
MI.getNumOperands() < DescNumOps &&
819 AMDGPU::OpName::op_sel);
822 if (
MI.getNumOperands() < DescNumOps &&
825 AMDGPU::OpName::src0_modifiers);
827 if (
MI.getNumOperands() < DescNumOps &&
830 AMDGPU::OpName::src1_modifiers);
840 unsigned Opc =
MI.getOpcode();
841 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
842 if (
MI.getNumOperands() < DescNumOps &&
846 AMDGPU::OpName::op_sel);
857 AMDGPU::OpName::vdst);
860 AMDGPU::OpName::vdata);
866 AMDGPU::OpName::dmask);
869 AMDGPU::OpName::tfe);
871 AMDGPU::OpName::d16);
878 if (BaseOpcode->
BVH) {
885 bool IsAtomic = (VDstIdx != -1);
888 bool IsPartialNSA =
false;
889 unsigned AddrSize =
Info->VAddrDwords;
898 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
903 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
904 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
909 if (AddrSize >
Info->VAddrDwords) {
920 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
921 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
923 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
925 DstSize = (DstSize + 1) / 2;
928 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
931 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
940 unsigned NewVdata = AMDGPU::NoRegister;
941 if (DstSize !=
Info->VDataDwords) {
942 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
945 unsigned Vdata0 =
MI.getOperand(VDataIdx).getReg();
946 unsigned VdataSub0 = MRI.
getSubReg(Vdata0, AMDGPU::sub0);
947 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
951 if (NewVdata == AMDGPU::NoRegister) {
960 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
961 unsigned NewVAddrSA = AMDGPU::NoRegister;
962 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
963 AddrSize !=
Info->VAddrDwords) {
964 unsigned VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
965 unsigned VAddrSubSA = MRI.
getSubReg(VAddrSA, AMDGPU::sub0);
966 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
968 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
975 MI.setOpcode(NewOpcode);
977 if (NewVdata != AMDGPU::NoRegister) {
989 assert(AddrSize <= Info->VAddrDwords);
990 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
991 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1001 unsigned Opc =
MI.getOpcode();
1002 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1005 if (
MI.getNumOperands() < DescNumOps &&
1009 if (
MI.getNumOperands() < DescNumOps &&
1012 AMDGPU::OpName::op_sel);
1013 if (
MI.getNumOperands() < DescNumOps &&
1016 AMDGPU::OpName::op_sel_hi);
1017 if (
MI.getNumOperands() < DescNumOps &&
1020 AMDGPU::OpName::neg_lo);
1021 if (
MI.getNumOperands() < DescNumOps &&
1024 AMDGPU::OpName::neg_hi);
1031 unsigned Opc =
MI.getOpcode();
1032 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1034 if (
MI.getNumOperands() < DescNumOps &&
1038 if (
MI.getNumOperands() < DescNumOps &&
1041 AMDGPU::OpName::src0_modifiers);
1043 if (
MI.getNumOperands() < DescNumOps &&
1046 AMDGPU::OpName::src1_modifiers);
1051 int ImmLitIdx)
const {
1052 assert(HasLiteral &&
"Should have decoded a literal");
1056 AMDGPU::OpName::immDeferred);
1057 assert(DescNumOps ==
MI.getNumOperands());
1058 for (
unsigned I = 0;
I < DescNumOps; ++
I) {
1059 auto &Op =
MI.getOperand(
I);
1060 auto OpType = Desc.
operands()[
I].OperandType;
1077 const Twine& ErrMsg)
const {
1092 unsigned Val)
const {
1093 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1094 if (Val >= RegCl.getNumRegs())
1096 ": unknown register " +
Twine(Val));
1102 unsigned Val)
const {
1106 switch (SRegClassID) {
1107 case AMDGPU::SGPR_32RegClassID:
1108 case AMDGPU::TTMP_32RegClassID:
1110 case AMDGPU::SGPR_64RegClassID:
1111 case AMDGPU::TTMP_64RegClassID:
1114 case AMDGPU::SGPR_128RegClassID:
1115 case AMDGPU::TTMP_128RegClassID:
1118 case AMDGPU::SGPR_256RegClassID:
1119 case AMDGPU::TTMP_256RegClassID:
1122 case AMDGPU::SGPR_288RegClassID:
1123 case AMDGPU::TTMP_288RegClassID:
1124 case AMDGPU::SGPR_320RegClassID:
1125 case AMDGPU::TTMP_320RegClassID:
1126 case AMDGPU::SGPR_352RegClassID:
1127 case AMDGPU::TTMP_352RegClassID:
1128 case AMDGPU::SGPR_384RegClassID:
1129 case AMDGPU::TTMP_384RegClassID:
1130 case AMDGPU::SGPR_512RegClassID:
1131 case AMDGPU::TTMP_512RegClassID:
1140 if (Val % (1 << shift)) {
1142 <<
": scalar reg isn't aligned " << Val;
1154 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1156 return errOperand(Val,
"More than one unique literal is illegal");
1168 if (Bytes.
size() < 4) {
1169 return errOperand(0,
"cannot read literal, inst bytes left " +
1173 Literal = eatBytes<uint32_t>(Bytes);
1179 using namespace AMDGPU::EncValues;
1181 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1183 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1184 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1191 return llvm::bit_cast<uint32_t>(0.5f);
1193 return llvm::bit_cast<uint32_t>(-0.5f);
1195 return llvm::bit_cast<uint32_t>(1.0f);
1197 return llvm::bit_cast<uint32_t>(-1.0f);
1199 return llvm::bit_cast<uint32_t>(2.0f);
1201 return llvm::bit_cast<uint32_t>(-2.0f);
1203 return llvm::bit_cast<uint32_t>(4.0f);
1205 return llvm::bit_cast<uint32_t>(-4.0f);
1216 return llvm::bit_cast<uint64_t>(0.5);
1218 return llvm::bit_cast<uint64_t>(-0.5);
1220 return llvm::bit_cast<uint64_t>(1.0);
1222 return llvm::bit_cast<uint64_t>(-1.0);
1224 return llvm::bit_cast<uint64_t>(2.0);
1226 return llvm::bit_cast<uint64_t>(-2.0);
1228 return llvm::bit_cast<uint64_t>(4.0);
1230 return llvm::bit_cast<uint64_t>(-4.0);
1232 return 0x3fc45f306dc9c882;
1293 return VGPR_32RegClassID;
1295 case OPWV232:
return VReg_64RegClassID;
1296 case OPW96:
return VReg_96RegClassID;
1297 case OPW128:
return VReg_128RegClassID;
1298 case OPW160:
return VReg_160RegClassID;
1299 case OPW256:
return VReg_256RegClassID;
1300 case OPW288:
return VReg_288RegClassID;
1301 case OPW320:
return VReg_320RegClassID;
1302 case OPW352:
return VReg_352RegClassID;
1303 case OPW384:
return VReg_384RegClassID;
1304 case OPW512:
return VReg_512RegClassID;
1305 case OPW1024:
return VReg_1024RegClassID;
1318 return AGPR_32RegClassID;
1320 case OPWV232:
return AReg_64RegClassID;
1321 case OPW96:
return AReg_96RegClassID;
1322 case OPW128:
return AReg_128RegClassID;
1323 case OPW160:
return AReg_160RegClassID;
1324 case OPW256:
return AReg_256RegClassID;
1325 case OPW288:
return AReg_288RegClassID;
1326 case OPW320:
return AReg_320RegClassID;
1327 case OPW352:
return AReg_352RegClassID;
1328 case OPW384:
return AReg_384RegClassID;
1329 case OPW512:
return AReg_512RegClassID;
1330 case OPW1024:
return AReg_1024RegClassID;
1344 return SGPR_32RegClassID;
1346 case OPWV232:
return SGPR_64RegClassID;
1347 case OPW96:
return SGPR_96RegClassID;
1348 case OPW128:
return SGPR_128RegClassID;
1349 case OPW160:
return SGPR_160RegClassID;
1350 case OPW256:
return SGPR_256RegClassID;
1351 case OPW288:
return SGPR_288RegClassID;
1352 case OPW320:
return SGPR_320RegClassID;
1353 case OPW352:
return SGPR_352RegClassID;
1354 case OPW384:
return SGPR_384RegClassID;
1355 case OPW512:
return SGPR_512RegClassID;
1368 return TTMP_32RegClassID;
1370 case OPWV232:
return TTMP_64RegClassID;
1371 case OPW128:
return TTMP_128RegClassID;
1372 case OPW256:
return TTMP_256RegClassID;
1373 case OPW288:
return TTMP_288RegClassID;
1374 case OPW320:
return TTMP_320RegClassID;
1375 case OPW352:
return TTMP_352RegClassID;
1376 case OPW384:
return TTMP_384RegClassID;
1377 case OPW512:
return TTMP_512RegClassID;
1382 using namespace AMDGPU::EncValues;
1384 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1385 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1387 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1391 bool MandatoryLiteral,
1392 unsigned ImmWidth)
const {
1393 using namespace AMDGPU::EncValues;
1397 bool IsAGPR = Val & 512;
1400 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1406 static_assert(SGPR_MIN == 0);
1415 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1418 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1421 if (Val == LITERAL_CONST) {
1422 if (MandatoryLiteral)
1445 unsigned Val)
const {
1451 Val |= ~XDstReg & 1;
1525 unsigned ImmWidth)
const {
1526 using namespace AMDGPU::SDWA;
1527 using namespace AMDGPU::EncValues;
1533 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1534 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1536 Val - SDWA9EncValues::SRC_VGPR_MIN);
1538 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1539 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1540 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1542 Val - SDWA9EncValues::SRC_SGPR_MIN);
1544 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1545 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1547 Val - SDWA9EncValues::SRC_TTMP_MIN);
1550 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1552 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1555 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1559 }
else if (
STI.
hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1574 using namespace AMDGPU::SDWA;
1578 "SDWAVopcDst should be present only on GFX9+");
1580 bool IsWave64 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize64);
1582 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1583 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1634 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1640#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1642 KdStream << Indent << DIRECTIVE " " \
1643 << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
1649 using namespace amdhsa;
1657 uint32_t GranulatedWorkitemVGPRCount =
1658 (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
1659 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
1661 uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
1664 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
1685 uint32_t GranulatedWavefrontSGPRCount =
1686 (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
1687 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
1689 if (
isGFX10Plus() && GranulatedWavefrontSGPRCount)
1692 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1695 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
1697 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
1698 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
1699 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
1701 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1705 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1707 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1709 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1711 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1713 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1716 PRINT_DIRECTIVE(
".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1718 if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1721 PRINT_DIRECTIVE(
".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1723 if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1726 if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1729 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1731 if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1736 COMPUTE_PGM_RSRC1_WGP_MODE);
1737 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1738 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1746 using namespace amdhsa;
1750 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1752 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
1753 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1755 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1757 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1759 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1761 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1763 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1765 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1768 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1771 if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1775 ".amdhsa_exception_fp_ieee_invalid_op",
1776 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1778 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1780 ".amdhsa_exception_fp_ieee_div_zero",
1781 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1783 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1785 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1787 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1789 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1791 if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1797#undef PRINT_DIRECTIVE
1803#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1805 KdStream << Indent << DIRECTIVE " " \
1806 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
1818 switch (Cursor.
tell()) {
1820 FourByteBuffer = DE.
getU32(Cursor);
1821 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
1826 FourByteBuffer = DE.
getU32(Cursor);
1827 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
1828 << FourByteBuffer <<
'\n';
1832 FourByteBuffer = DE.
getU32(Cursor);
1833 KdStream << Indent <<
".amdhsa_kernarg_size "
1834 << FourByteBuffer <<
'\n';
1839 ReservedBytes = DE.
getBytes(Cursor, 4);
1840 for (
int I = 0;
I < 4; ++
I) {
1841 if (ReservedBytes[
I] != 0) {
1856 ReservedBytes = DE.
getBytes(Cursor, 20);
1857 for (
int I = 0;
I < 20; ++
I) {
1858 if (ReservedBytes[
I] != 0) {
1868 FourByteBuffer = DE.
getU32(Cursor);
1875 FourByteBuffer = DE.
getU32(Cursor);
1883 FourByteBuffer = DE.
getU32(Cursor);
1891 using namespace amdhsa;
1892 TwoByteBuffer = DE.
getU16(Cursor);
1896 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
1898 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
1900 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
1902 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
1904 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
1907 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
1909 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
1911 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
1916 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
1920 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
1925 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
1927 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
1934 ReservedBytes = DE.
getBytes(Cursor, 6);
1935 for (
int I = 0;
I < 6; ++
I) {
1936 if (ReservedBytes[
I] != 0)
1945#undef PRINT_DIRECTIVE
1951 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
1956 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
1959 while (
C &&
C.tell() < Bytes.
size()) {
1968 KdStream <<
".end_amdhsa_kernel\n";
1973std::optional<MCDisassembler::DecodeStatus>
1995 return std::nullopt;
2020 if (Result != Symbols->end()) {
2027 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2046 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static bool isValidDPP8(const MCInst &MI)
static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecodeStatus DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define DECODE_SDWA(DecName)
#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth)
#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth)
#define DECODE_OPERAND_REG_8(RegClass)
#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth)
static int64_t getInlineImmVal16(unsigned Imm)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth)
static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth)
static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm)
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
MCOperand decodeLiteralConstant() const
DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
DecodeStatus decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
DecodeStatus convertSDWAInst(MCInst &MI) const
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
DecodeStatus convertVOP3DPPInst(MCInst &MI) const
DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
DecodeStatus convertMIMGInst(MCInst &MI) const
DecodeStatus convertVINTERPInst(MCInst &MI) const
DecodeStatus convertDPP8Inst(MCInst &MI) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus convertEXPInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address) const
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
std::optional< DecodeStatus > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const override
Used to perform separate target specific disassembly for a particular symbol.
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth=0) const
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const
DecodeStatus convertVOPCDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSDWASrc16(unsigned Val) const
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Symbolize and annotate disassembled instructions.
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
unsigned getAmdhsaCodeObjectVersion()
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool isGFX9Plus(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.