LLVM 18.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInstrDesc.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "amdgpu-disassembler"
40
41#define SGPR_MAX \
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
44
46
48 MCContext &Ctx, MCInstrInfo const *MCII)
49 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
51 // ToDo: AMDGPUDisassembler supports only VI ISA.
52 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
53 report_fatal_error("Disassembly not yet supported for subtarget");
54}
55
57addOperand(MCInst &Inst, const MCOperand& Opnd) {
58 Inst.addOperand(Opnd);
59 return Opnd.isValid() ?
62}
63
65 uint16_t NameIdx) {
66 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
67 if (OpIdx != -1) {
68 auto I = MI.begin();
69 std::advance(I, OpIdx);
70 MI.insert(I, Op);
71 }
72 return OpIdx;
73}
74
75static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
77 const MCDisassembler *Decoder) {
78 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
79
80 // Our branches take a simm16, but we need two extra bits to account for the
81 // factor of 4.
82 APInt SignedOffset(18, Imm * 4, true);
83 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
84
85 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
87 return addOperand(Inst, MCOperand::createImm(Imm));
88}
89
90static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
91 const MCDisassembler *Decoder) {
92 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
93 int64_t Offset;
94 if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
95 Offset = Imm & 0xFFFFF;
96 } else { // GFX9+ supports 21-bit signed offsets.
97 Offset = SignExtend64<21>(Imm);
98 }
100}
101
102static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
103 const MCDisassembler *Decoder) {
104 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
105 return addOperand(Inst, DAsm->decodeBoolReg(Val));
106}
107
108#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
109 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
110 uint64_t /*Addr*/, \
111 const MCDisassembler *Decoder) { \
112 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
113 return addOperand(Inst, DAsm->DecoderName(Imm)); \
114 }
115
116// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
117// number of register. Used by VGPR only and AGPR only operands.
118#define DECODE_OPERAND_REG_8(RegClass) \
119 static DecodeStatus Decode##RegClass##RegisterClass( \
120 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
121 const MCDisassembler *Decoder) { \
122 assert(Imm < (1 << 8) && "8-bit encoding"); \
123 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
124 return addOperand( \
125 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
126 }
127
128#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
129 ImmWidth) \
130 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
131 const MCDisassembler *Decoder) { \
132 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
133 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
134 return addOperand(Inst, \
135 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
136 MandatoryLiteral, ImmWidth)); \
137 }
138
139// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
140// get register class. Used by SGPR only operands.
141#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
142 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
143
144// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
145// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
146// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
147// Used by AV_ register classes (AGPR or VGPR only register operands).
148#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \
149 DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \
150 Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
151
152// Decoder for Src(9-bit encoding) registers only.
153#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \
154 DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
155
156// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
157// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
158// only.
159#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \
160 DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
161
162// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
163// Imm{9} is acc, registers only.
164#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \
165 DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
166
167// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
168// register from RegClass or immediate. Registers that don't belong to RegClass
169// will be decoded and InstPrinter will report warning. Immediate will be
170// decoded into constant of size ImmWidth, should match width of immediate used
171// by OperandType (important for floating point types).
172#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \
173 DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \
174 false, ImmWidth)
175
176// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
177// and decode using 'enum10' from decodeSrcOp.
178#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \
179 DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \
180 Imm | 512, false, ImmWidth)
181
182#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \
183 DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \
184 OpWidth, Imm, true, ImmWidth)
185
186// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
187// when RegisterClass is used as an operand. Most often used for destination
188// operands.
189
191DECODE_OPERAND_REG_8(VGPR_32_Lo128)
194DECODE_OPERAND_REG_8(VReg_128)
195DECODE_OPERAND_REG_8(VReg_256)
196DECODE_OPERAND_REG_8(VReg_288)
197DECODE_OPERAND_REG_8(VReg_352)
198DECODE_OPERAND_REG_8(VReg_384)
199DECODE_OPERAND_REG_8(VReg_512)
200DECODE_OPERAND_REG_8(VReg_1024)
201
202DECODE_OPERAND_REG_7(SReg_32, OPW32)
203DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
204DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
205DECODE_OPERAND_REG_7(SReg_64, OPW64)
206DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
207DECODE_OPERAND_REG_7(SReg_128, OPW128)
208DECODE_OPERAND_REG_7(SReg_256, OPW256)
209DECODE_OPERAND_REG_7(SReg_512, OPW512)
210
213DECODE_OPERAND_REG_8(AReg_128)
214DECODE_OPERAND_REG_8(AReg_256)
215DECODE_OPERAND_REG_8(AReg_512)
216DECODE_OPERAND_REG_8(AReg_1024)
217
218DECODE_OPERAND_REG_AV10(AVDst_128, OPW128)
219DECODE_OPERAND_REG_AV10(AVDst_512, OPW512)
220
221// Decoders for register only source RegisterOperands that use use 9-bit Src
222// encoding: 'decodeOperand_<RegClass>'.
223
224DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32)
225DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64)
226DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128)
227DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256)
228DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32)
229
230DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32)
231
232DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32)
233DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64)
234DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
235
236// Decoders for register or immediate RegisterOperands that use 9-bit Src
237// encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'.
238
239DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
240DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
241DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
242DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
243DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
244DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
245DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32)
246DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64)
247DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32)
248DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64)
249DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32)
250DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
251DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
252DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
253
254DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64)
255DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32)
256DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64)
257DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32)
258DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
259
260DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
264
266 uint64_t /*Addr*/,
267 const MCDisassembler *Decoder) {
268 assert(isUInt<10>(Imm) && "10-bit encoding expected");
269 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
270
271 bool IsHi = Imm & (1 << 9);
272 unsigned RegIdx = Imm & 0xff;
273 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
274 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
275}
276
277static DecodeStatus
279 const MCDisassembler *Decoder) {
280 assert(isUInt<8>(Imm) && "8-bit encoding expected");
281
282 bool IsHi = Imm & (1 << 7);
283 unsigned RegIdx = Imm & 0x7f;
284 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
285 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
286}
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<9>(Imm) && "9-bit encoding expected");
292
293 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
294 bool IsVGPR = Imm & (1 << 8);
295 if (IsVGPR) {
296 bool IsHi = Imm & (1 << 7);
297 unsigned RegIdx = Imm & 0x7f;
298 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
299 }
300 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
301 Imm & 0xFF, false, 16));
302}
303
304static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
305 uint64_t /*Addr*/,
306 const MCDisassembler *Decoder) {
307 assert(isUInt<10>(Imm) && "10-bit encoding expected");
308
309 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
310 bool IsVGPR = Imm & (1 << 8);
311 if (IsVGPR) {
312 bool IsHi = Imm & (1 << 9);
313 unsigned RegIdx = Imm & 0xff;
314 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
315 }
316 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
317 Imm & 0xFF, false, 16));
318}
319
320static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
322 const MCDisassembler *Decoder) {
323 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
324 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
325}
326
327static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
328 uint64_t Addr, const void *Decoder) {
329 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
330 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
331}
332
333static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
334 const MCRegisterInfo *MRI) {
335 if (OpIdx < 0)
336 return false;
337
338 const MCOperand &Op = Inst.getOperand(OpIdx);
339 if (!Op.isReg())
340 return false;
341
342 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
343 auto Reg = Sub ? Sub : Op.getReg();
344 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
345}
346
349 const MCDisassembler *Decoder) {
350 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
351 if (!DAsm->isGFX90A()) {
352 Imm &= 511;
353 } else {
354 // If atomic has both vdata and vdst their register classes are tied.
355 // The bit is decoded along with the vdst, first operand. We need to
356 // change register class to AGPR if vdst was AGPR.
357 // If a DS instruction has both data0 and data1 their register classes
358 // are also tied.
359 unsigned Opc = Inst.getOpcode();
360 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
361 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
362 : AMDGPU::OpName::vdata;
363 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
364 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
365 if ((int)Inst.getNumOperands() == DataIdx) {
366 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
367 if (IsAGPROperand(Inst, DstIdx, MRI))
368 Imm |= 512;
369 }
370
372 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
373 if ((int)Inst.getNumOperands() == Data2Idx &&
374 IsAGPROperand(Inst, DataIdx, MRI))
375 Imm |= 512;
376 }
377 }
378 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
379}
380
381static DecodeStatus
383 const MCDisassembler *Decoder) {
384 return decodeOperand_AVLdSt_Any(Inst, Imm,
386}
387
388static DecodeStatus
390 const MCDisassembler *Decoder) {
391 return decodeOperand_AVLdSt_Any(Inst, Imm,
393}
394
395static DecodeStatus
397 const MCDisassembler *Decoder) {
398 return decodeOperand_AVLdSt_Any(Inst, Imm,
400}
401
402static DecodeStatus
404 const MCDisassembler *Decoder) {
405 return decodeOperand_AVLdSt_Any(Inst, Imm,
407}
408
409static DecodeStatus
411 const MCDisassembler *Decoder) {
413 Decoder);
414}
415
416#define DECODE_SDWA(DecName) \
417DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
418
419DECODE_SDWA(Src32)
420DECODE_SDWA(Src16)
421DECODE_SDWA(VopcDst)
422
423#include "AMDGPUGenDisassemblerTables.inc"
424
425//===----------------------------------------------------------------------===//
426//
427//===----------------------------------------------------------------------===//
428
429template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
430 assert(Bytes.size() >= sizeof(T));
431 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
432 Bytes = Bytes.slice(sizeof(T));
433 return Res;
434}
435
437 assert(Bytes.size() >= 12);
438 uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>(
439 Bytes.data());
440 Bytes = Bytes.slice(8);
441 uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>(
442 Bytes.data());
443 Bytes = Bytes.slice(4);
444 return DecoderUInt128(Lo, Hi);
445}
446
447// The disassembler is greedy, so we need to check FI operand value to
448// not parse a dpp if the correct literal is not set. For dpp16 the
449// autogenerated decoder checks the dpp literal
450static bool isValidDPP8(const MCInst &MI) {
451 using namespace llvm::AMDGPU::DPP;
452 int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
453 assert(FiIdx != -1);
454 if ((unsigned)FiIdx >= MI.getNumOperands())
455 return false;
456 unsigned Fi = MI.getOperand(FiIdx).getImm();
457 return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
458}
459
461 ArrayRef<uint8_t> Bytes_,
463 raw_ostream &CS) const {
464 bool IsSDWA = false;
465
466 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
467 Bytes = Bytes_.slice(0, MaxInstBytesNum);
468
470 do {
471 // ToDo: better to switch encoding length using some bit predicate
472 // but it is unknown yet, so try all we can
473
474 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
475 // encodings
476 if (isGFX11Plus() && Bytes.size() >= 12 ) {
477 DecoderUInt128 DecW = eat12Bytes(Bytes);
478 Res =
479 tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
480 MI, DecW, Address, CS);
482 break;
483 MI = MCInst(); // clear
484 Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
485 MI, DecW, Address, CS);
486 if (Res) {
487 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
489 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
490 convertVOPCDPPInst(MI); // Special VOP3 case
491 else {
492 assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
493 convertVOP3DPPInst(MI); // Regular VOP3 case
494 }
495 break;
496 }
497 Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
498 if (Res)
499 break;
500 }
501 // Reinitialize Bytes
502 Bytes = Bytes_.slice(0, MaxInstBytesNum);
503
504 if (Bytes.size() >= 8) {
505 const uint64_t QW = eatBytes<uint64_t>(Bytes);
506
507 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
508 Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
509 if (Res) {
510 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
511 == -1)
512 break;
514 break;
515 MI = MCInst(); // clear
516 }
517 }
518
519 Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
521 break;
522 MI = MCInst(); // clear
523
524 Res = tryDecodeInst(DecoderTableDPP8GFX1164,
525 DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
527 break;
528 MI = MCInst(); // clear
529
530 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
531 if (Res) break;
532
533 Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
534 MI, QW, Address, CS);
535 if (Res) {
536 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
538 break;
539 }
540
541 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
542 if (Res) { IsSDWA = true; break; }
543
544 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
545 if (Res) { IsSDWA = true; break; }
546
547 Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
548 if (Res) { IsSDWA = true; break; }
549
550 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
551 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
552 if (Res)
553 break;
554 }
555
556 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
557 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
558 // table first so we print the correct name.
559 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
560 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
561 if (Res)
562 break;
563 }
564 }
565
566 // Reinitialize Bytes as DPP64 could have eaten too much
567 Bytes = Bytes_.slice(0, MaxInstBytesNum);
568
569 // Try decode 32-bit instruction
570 if (Bytes.size() < 4) break;
571 const uint32_t DW = eatBytes<uint32_t>(Bytes);
572 Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
573 if (Res) break;
574
575 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
576 if (Res) break;
577
578 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
579 if (Res) break;
580
581 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
582 Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
583 if (Res)
584 break;
585 }
586
587 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
588 Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
589 if (Res) break;
590 }
591
592 Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
593 if (Res) break;
594
595 Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
596 Address, CS);
597 if (Res) break;
598
599 if (Bytes.size() < 4) break;
600 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
601
602 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
603 Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
604 if (Res)
605 break;
606 }
607
608 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
609 Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
610 if (Res)
611 break;
612 }
613
614 Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
615 if (Res) break;
616
617 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
618 if (Res) break;
619
620 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
621 if (Res) break;
622
623 Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
624 if (Res) break;
625
626 Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
627 Address, CS);
628 if (Res)
629 break;
630
631 Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
632 } while (false);
633
634 if (Res && AMDGPU::isMAC(MI.getOpcode())) {
635 // Insert dummy unused src2_modifiers.
637 AMDGPU::OpName::src2_modifiers);
638 }
639
640 if (Res && (MCII->get(MI.getOpcode()).TSFlags &
642 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
643 AMDGPU::OpName::cpol);
644 if (CPolPos != -1) {
645 unsigned CPol =
646 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
648 if (MI.getNumOperands() <= (unsigned)CPolPos) {
650 AMDGPU::OpName::cpol);
651 } else if (CPol) {
652 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
653 }
654 }
655 }
656
657 if (Res && (MCII->get(MI.getOpcode()).TSFlags &
659 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
660 // GFX90A lost TFE, its place is occupied by ACC.
661 int TFEOpIdx =
662 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
663 if (TFEOpIdx != -1) {
664 auto TFEIter = MI.begin();
665 std::advance(TFEIter, TFEOpIdx);
666 MI.insert(TFEIter, MCOperand::createImm(0));
667 }
668 }
669
670 if (Res && (MCII->get(MI.getOpcode()).TSFlags &
672 int SWZOpIdx =
673 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
674 if (SWZOpIdx != -1) {
675 auto SWZIter = MI.begin();
676 std::advance(SWZIter, SWZOpIdx);
677 MI.insert(SWZIter, MCOperand::createImm(0));
678 }
679 }
680
681 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
682 int VAddr0Idx =
683 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
684 int RsrcIdx =
685 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
686 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
687 if (VAddr0Idx >= 0 && NSAArgs > 0) {
688 unsigned NSAWords = (NSAArgs + 3) / 4;
689 if (Bytes.size() < 4 * NSAWords) {
691 } else {
692 for (unsigned i = 0; i < NSAArgs; ++i) {
693 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
694 auto VAddrRCID =
695 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
696 MI.insert(MI.begin() + VAddrIdx,
697 createRegOperand(VAddrRCID, Bytes[i]));
698 }
699 Bytes = Bytes.slice(4 * NSAWords);
700 }
701 }
702
703 if (Res)
704 Res = convertMIMGInst(MI);
705 }
706
707 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
708 Res = convertEXPInst(MI);
709
710 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
711 Res = convertVINTERPInst(MI);
712
713 if (Res && IsSDWA)
714 Res = convertSDWAInst(MI);
715
716 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
717 AMDGPU::OpName::vdst_in);
718 if (VDstIn_Idx != -1) {
719 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
721 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
722 !MI.getOperand(VDstIn_Idx).isReg() ||
723 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
724 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
725 MI.erase(&MI.getOperand(VDstIn_Idx));
727 MCOperand::createReg(MI.getOperand(Tied).getReg()),
728 AMDGPU::OpName::vdst_in);
729 }
730 }
731
732 int ImmLitIdx =
733 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
734 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
735 if (Res && ImmLitIdx != -1 && !IsSOPK)
736 Res = convertFMAanyK(MI, ImmLitIdx);
737
738 // if the opcode was not recognized we'll assume a Size of 4 bytes
739 // (unless there are fewer bytes left)
740 Size = Res ? (MaxInstBytesNum - Bytes.size())
741 : std::min((size_t)4, Bytes_.size());
742 return Res;
743}
744
746 if (STI.hasFeature(AMDGPU::FeatureGFX11)) {
747 // The MCInst still has these fields even though they are no longer encoded
748 // in the GFX11 instruction.
749 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
750 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
751 }
753}
754
756 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
757 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
758 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
759 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
760 // The MCInst has this field that is not directly encoded in the
761 // instruction.
762 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
763 }
765}
766
768 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
769 STI.hasFeature(AMDGPU::FeatureGFX10)) {
770 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
771 // VOPC - insert clamp
772 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
773 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
774 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
775 if (SDst != -1) {
776 // VOPC - insert VCC register as sdst
778 AMDGPU::OpName::sdst);
779 } else {
780 // VOP1/2 - insert omod if present in instruction
781 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
782 }
783 }
785}
786
788 unsigned OpSel = 0;
789 unsigned OpSelHi = 0;
790 unsigned NegLo = 0;
791 unsigned NegHi = 0;
792};
793
794// Reconstruct values of VOP3/VOP3P operands such as op_sel.
795// Note that these values do not affect disassembler output,
796// so this is only necessary for consistency with src_modifiers.
798 bool IsVOP3P = false) {
799 VOPModifiers Modifiers;
800 unsigned Opc = MI.getOpcode();
801 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
802 AMDGPU::OpName::src1_modifiers,
803 AMDGPU::OpName::src2_modifiers};
804 for (int J = 0; J < 3; ++J) {
805 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
806 if (OpIdx == -1)
807 continue;
808
809 unsigned Val = MI.getOperand(OpIdx).getImm();
810
811 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
812 if (IsVOP3P) {
813 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
814 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
815 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
816 } else if (J == 0) {
817 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
818 }
819 }
820
821 return Modifiers;
822}
823
824// MAC opcodes have special old and src2 operands.
825// src2 is tied to dst, while old is not tied (but assumed to be).
827 constexpr int DST_IDX = 0;
828 auto Opcode = MI.getOpcode();
829 const auto &Desc = MCII->get(Opcode);
830 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
831
832 if (OldIdx != -1 && Desc.getOperandConstraint(
833 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
834 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
835 assert(Desc.getOperandConstraint(
836 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
838 (void)DST_IDX;
839 return true;
840 }
841
842 return false;
843}
844
845// Create dummy old operand and insert dummy unused src2_modifiers
847 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
848 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
850 AMDGPU::OpName::src2_modifiers);
851}
852
853// We must check FI == literal to reject not genuine dpp8 insts, and we must
854// first add optional MI operands to check FI
856 unsigned Opc = MI.getOpcode();
857 if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
859 } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
860 AMDGPU::isVOPC64DPP(Opc)) {
862 } else {
863 if (isMacDPP(MI))
865
866 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
867 if (MI.getNumOperands() < DescNumOps &&
868 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
869 auto Mods = collectVOPModifiers(MI);
871 AMDGPU::OpName::op_sel);
872 } else {
873 // Insert dummy unused src modifiers.
874 if (MI.getNumOperands() < DescNumOps &&
875 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
877 AMDGPU::OpName::src0_modifiers);
878
879 if (MI.getNumOperands() < DescNumOps &&
880 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
882 AMDGPU::OpName::src1_modifiers);
883 }
884 }
886}
887
889 if (isMacDPP(MI))
891
892 unsigned Opc = MI.getOpcode();
893 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
894 if (MI.getNumOperands() < DescNumOps &&
895 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
896 auto Mods = collectVOPModifiers(MI);
898 AMDGPU::OpName::op_sel);
899 }
901}
902
903// Note that before gfx10, the MIMG encoding provided no information about
904// VADDR size. Consequently, decoded instructions always show address as if it
905// has 1 dword, which could be not really so.
907
908 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
909 AMDGPU::OpName::vdst);
910
911 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
912 AMDGPU::OpName::vdata);
913 int VAddr0Idx =
914 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
915 int RsrcIdx =
916 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
917 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
918 AMDGPU::OpName::dmask);
919
920 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
921 AMDGPU::OpName::tfe);
922 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
923 AMDGPU::OpName::d16);
924
925 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
926 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
928
929 assert(VDataIdx != -1);
930 if (BaseOpcode->BVH) {
931 // Add A16 operand for intersect_ray instructions
932 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
934 }
935
936 bool IsAtomic = (VDstIdx != -1);
937 bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
938 bool IsNSA = false;
939 bool IsPartialNSA = false;
940 unsigned AddrSize = Info->VAddrDwords;
941
942 if (isGFX10Plus()) {
943 unsigned DimIdx =
944 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
945 int A16Idx =
946 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
947 const AMDGPU::MIMGDimInfo *Dim =
948 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
949 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
950
951 AddrSize =
952 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
953
954 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
955 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
956 if (!IsNSA) {
957 if (AddrSize > 12)
958 AddrSize = 16;
959 } else {
960 if (AddrSize > Info->VAddrDwords) {
961 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
962 // The NSA encoding does not contain enough operands for the
963 // combination of base opcode / dimension. Should this be an error?
965 }
966 IsPartialNSA = true;
967 }
968 }
969 }
970
971 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
972 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
973
974 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
975 if (D16 && AMDGPU::hasPackedD16(STI)) {
976 DstSize = (DstSize + 1) / 2;
977 }
978
979 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
980 DstSize += 1;
981
982 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
984
985 int NewOpcode =
986 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
987 if (NewOpcode == -1)
989
990 // Widen the register to the correct number of enabled channels.
991 unsigned NewVdata = AMDGPU::NoRegister;
992 if (DstSize != Info->VDataDwords) {
993 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
994
995 // Get first subregister of VData
996 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
997 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
998 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
999
1000 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1001 &MRI.getRegClass(DataRCID));
1002 if (NewVdata == AMDGPU::NoRegister) {
1003 // It's possible to encode this such that the low register + enabled
1004 // components exceeds the register count.
1006 }
1007 }
1008
1009 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1010 // If using partial NSA on GFX11+ widen last address register.
1011 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1012 unsigned NewVAddrSA = AMDGPU::NoRegister;
1013 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1014 AddrSize != Info->VAddrDwords) {
1015 unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1016 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1017 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1018
1019 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1020 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1021 &MRI.getRegClass(AddrRCID));
1022 if (!NewVAddrSA)
1024 }
1025
1026 MI.setOpcode(NewOpcode);
1027
1028 if (NewVdata != AMDGPU::NoRegister) {
1029 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1030
1031 if (IsAtomic) {
1032 // Atomic operations have an additional operand (a copy of data)
1033 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1034 }
1035 }
1036
1037 if (NewVAddrSA) {
1038 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1039 } else if (IsNSA) {
1040 assert(AddrSize <= Info->VAddrDwords);
1041 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1042 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1043 }
1044
1046}
1047
1048// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1049// decoder only adds to src_modifiers, so manually add the bits to the other
1050// operands.
1052 unsigned Opc = MI.getOpcode();
1053 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1054 auto Mods = collectVOPModifiers(MI, true);
1055
1056 if (MI.getNumOperands() < DescNumOps &&
1057 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1058 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1059
1060 if (MI.getNumOperands() < DescNumOps &&
1061 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1063 AMDGPU::OpName::op_sel);
1064 if (MI.getNumOperands() < DescNumOps &&
1065 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1067 AMDGPU::OpName::op_sel_hi);
1068 if (MI.getNumOperands() < DescNumOps &&
1069 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1071 AMDGPU::OpName::neg_lo);
1072 if (MI.getNumOperands() < DescNumOps &&
1073 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1075 AMDGPU::OpName::neg_hi);
1076
1078}
1079
1080// Create dummy old operand and insert optional operands
1082 unsigned Opc = MI.getOpcode();
1083 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1084
1085 if (MI.getNumOperands() < DescNumOps &&
1086 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1087 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1088
1089 if (MI.getNumOperands() < DescNumOps &&
1090 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1092 AMDGPU::OpName::src0_modifiers);
1093
1094 if (MI.getNumOperands() < DescNumOps &&
1095 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1097 AMDGPU::OpName::src1_modifiers);
1099}
1100
1102 int ImmLitIdx) const {
1103 assert(HasLiteral && "Should have decoded a literal");
1104 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1105 unsigned DescNumOps = Desc.getNumOperands();
1107 AMDGPU::OpName::immDeferred);
1108 assert(DescNumOps == MI.getNumOperands());
1109 for (unsigned I = 0; I < DescNumOps; ++I) {
1110 auto &Op = MI.getOperand(I);
1111 auto OpType = Desc.operands()[I].OperandType;
1112 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1114 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1115 IsDeferredOp)
1116 Op.setImm(Literal);
1117 }
1119}
1120
1121const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1122 return getContext().getRegisterInfo()->
1123 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1124}
1125
1126inline
1128 const Twine& ErrMsg) const {
1129 *CommentStream << "Error: " + ErrMsg;
1130
1131 // ToDo: add support for error operands to MCInst.h
1132 // return MCOperand::createError(V);
1133 return MCOperand();
1134}
1135
1136inline
1139}
1140
1141inline
1143 unsigned Val) const {
1144 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1145 if (Val >= RegCl.getNumRegs())
1146 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1147 ": unknown register " + Twine(Val));
1148 return createRegOperand(RegCl.getRegister(Val));
1149}
1150
1151inline
1153 unsigned Val) const {
1154 // ToDo: SI/CI have 104 SGPRs, VI - 102
1155 // Valery: here we accepting as much as we can, let assembler sort it out
1156 int shift = 0;
1157 switch (SRegClassID) {
1158 case AMDGPU::SGPR_32RegClassID:
1159 case AMDGPU::TTMP_32RegClassID:
1160 break;
1161 case AMDGPU::SGPR_64RegClassID:
1162 case AMDGPU::TTMP_64RegClassID:
1163 shift = 1;
1164 break;
1165 case AMDGPU::SGPR_128RegClassID:
1166 case AMDGPU::TTMP_128RegClassID:
1167 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1168 // this bundle?
1169 case AMDGPU::SGPR_256RegClassID:
1170 case AMDGPU::TTMP_256RegClassID:
1171 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1172 // this bundle?
1173 case AMDGPU::SGPR_288RegClassID:
1174 case AMDGPU::TTMP_288RegClassID:
1175 case AMDGPU::SGPR_320RegClassID:
1176 case AMDGPU::TTMP_320RegClassID:
1177 case AMDGPU::SGPR_352RegClassID:
1178 case AMDGPU::TTMP_352RegClassID:
1179 case AMDGPU::SGPR_384RegClassID:
1180 case AMDGPU::TTMP_384RegClassID:
1181 case AMDGPU::SGPR_512RegClassID:
1182 case AMDGPU::TTMP_512RegClassID:
1183 shift = 2;
1184 break;
1185 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1186 // this bundle?
1187 default:
1188 llvm_unreachable("unhandled register class");
1189 }
1190
1191 if (Val % (1 << shift)) {
1192 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1193 << ": scalar reg isn't aligned " << Val;
1194 }
1195
1196 return createRegOperand(SRegClassID, Val >> shift);
1197}
1198
1200 bool IsHi) const {
1201 unsigned RCID =
1202 IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID;
1203 return createRegOperand(RCID, RegIdx);
1204}
1205
1206// Decode Literals for insts which always have a literal in the encoding
1209 if (HasLiteral) {
1210 assert(
1212 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1213 if (Literal != Val)
1214 return errOperand(Val, "More than one unique literal is illegal");
1215 }
1216 HasLiteral = true;
1217 Literal = Val;
1218 return MCOperand::createImm(Literal);
1219}
1220
1222 // For now all literal constants are supposed to be unsigned integer
1223 // ToDo: deal with signed/unsigned 64-bit integer constants
1224 // ToDo: deal with float/double constants
1225 if (!HasLiteral) {
1226 if (Bytes.size() < 4) {
1227 return errOperand(0, "cannot read literal, inst bytes left " +
1228 Twine(Bytes.size()));
1229 }
1230 HasLiteral = true;
1231 Literal = eatBytes<uint32_t>(Bytes);
1232 }
1233 return MCOperand::createImm(Literal);
1234}
1235
1237 using namespace AMDGPU::EncValues;
1238
1239 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1240 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1241 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1242 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1243 // Cast prevents negative overflow.
1244}
1245
1246static int64_t getInlineImmVal32(unsigned Imm) {
1247 switch (Imm) {
1248 case 240:
1249 return llvm::bit_cast<uint32_t>(0.5f);
1250 case 241:
1251 return llvm::bit_cast<uint32_t>(-0.5f);
1252 case 242:
1253 return llvm::bit_cast<uint32_t>(1.0f);
1254 case 243:
1255 return llvm::bit_cast<uint32_t>(-1.0f);
1256 case 244:
1257 return llvm::bit_cast<uint32_t>(2.0f);
1258 case 245:
1259 return llvm::bit_cast<uint32_t>(-2.0f);
1260 case 246:
1261 return llvm::bit_cast<uint32_t>(4.0f);
1262 case 247:
1263 return llvm::bit_cast<uint32_t>(-4.0f);
1264 case 248: // 1 / (2 * PI)
1265 return 0x3e22f983;
1266 default:
1267 llvm_unreachable("invalid fp inline imm");
1268 }
1269}
1270
1271static int64_t getInlineImmVal64(unsigned Imm) {
1272 switch (Imm) {
1273 case 240:
1274 return llvm::bit_cast<uint64_t>(0.5);
1275 case 241:
1276 return llvm::bit_cast<uint64_t>(-0.5);
1277 case 242:
1278 return llvm::bit_cast<uint64_t>(1.0);
1279 case 243:
1280 return llvm::bit_cast<uint64_t>(-1.0);
1281 case 244:
1282 return llvm::bit_cast<uint64_t>(2.0);
1283 case 245:
1284 return llvm::bit_cast<uint64_t>(-2.0);
1285 case 246:
1286 return llvm::bit_cast<uint64_t>(4.0);
1287 case 247:
1288 return llvm::bit_cast<uint64_t>(-4.0);
1289 case 248: // 1 / (2 * PI)
1290 return 0x3fc45f306dc9c882;
1291 default:
1292 llvm_unreachable("invalid fp inline imm");
1293 }
1294}
1295
1296static int64_t getInlineImmVal16(unsigned Imm) {
1297 switch (Imm) {
1298 case 240:
1299 return 0x3800;
1300 case 241:
1301 return 0xB800;
1302 case 242:
1303 return 0x3C00;
1304 case 243:
1305 return 0xBC00;
1306 case 244:
1307 return 0x4000;
1308 case 245:
1309 return 0xC000;
1310 case 246:
1311 return 0x4400;
1312 case 247:
1313 return 0xC400;
1314 case 248: // 1 / (2 * PI)
1315 return 0x3118;
1316 default:
1317 llvm_unreachable("invalid fp inline imm");
1318 }
1319}
1320
1321MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
1324
1325 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1326 // ImmWidth 0 is a default case where operand should not allow immediates.
1327 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1328 // use it to print verbose error message.
1329 switch (ImmWidth) {
1330 case 0:
1331 case 32:
1333 case 64:
1335 case 16:
1337 default:
1338 llvm_unreachable("implement me");
1339 }
1340}
1341
1343 using namespace AMDGPU;
1344
1345 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1346 switch (Width) {
1347 default: // fall
1348 case OPW32:
1349 case OPW16:
1350 case OPWV216:
1351 return VGPR_32RegClassID;
1352 case OPW64:
1353 case OPWV232: return VReg_64RegClassID;
1354 case OPW96: return VReg_96RegClassID;
1355 case OPW128: return VReg_128RegClassID;
1356 case OPW160: return VReg_160RegClassID;
1357 case OPW256: return VReg_256RegClassID;
1358 case OPW288: return VReg_288RegClassID;
1359 case OPW320: return VReg_320RegClassID;
1360 case OPW352: return VReg_352RegClassID;
1361 case OPW384: return VReg_384RegClassID;
1362 case OPW512: return VReg_512RegClassID;
1363 case OPW1024: return VReg_1024RegClassID;
1364 }
1365}
1366
1368 using namespace AMDGPU;
1369
1370 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1371 switch (Width) {
1372 default: // fall
1373 case OPW32:
1374 case OPW16:
1375 case OPWV216:
1376 return AGPR_32RegClassID;
1377 case OPW64:
1378 case OPWV232: return AReg_64RegClassID;
1379 case OPW96: return AReg_96RegClassID;
1380 case OPW128: return AReg_128RegClassID;
1381 case OPW160: return AReg_160RegClassID;
1382 case OPW256: return AReg_256RegClassID;
1383 case OPW288: return AReg_288RegClassID;
1384 case OPW320: return AReg_320RegClassID;
1385 case OPW352: return AReg_352RegClassID;
1386 case OPW384: return AReg_384RegClassID;
1387 case OPW512: return AReg_512RegClassID;
1388 case OPW1024: return AReg_1024RegClassID;
1389 }
1390}
1391
1392
1394 using namespace AMDGPU;
1395
1396 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1397 switch (Width) {
1398 default: // fall
1399 case OPW32:
1400 case OPW16:
1401 case OPWV216:
1402 return SGPR_32RegClassID;
1403 case OPW64:
1404 case OPWV232: return SGPR_64RegClassID;
1405 case OPW96: return SGPR_96RegClassID;
1406 case OPW128: return SGPR_128RegClassID;
1407 case OPW160: return SGPR_160RegClassID;
1408 case OPW256: return SGPR_256RegClassID;
1409 case OPW288: return SGPR_288RegClassID;
1410 case OPW320: return SGPR_320RegClassID;
1411 case OPW352: return SGPR_352RegClassID;
1412 case OPW384: return SGPR_384RegClassID;
1413 case OPW512: return SGPR_512RegClassID;
1414 }
1415}
1416
1418 using namespace AMDGPU;
1419
1420 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1421 switch (Width) {
1422 default: // fall
1423 case OPW32:
1424 case OPW16:
1425 case OPWV216:
1426 return TTMP_32RegClassID;
1427 case OPW64:
1428 case OPWV232: return TTMP_64RegClassID;
1429 case OPW128: return TTMP_128RegClassID;
1430 case OPW256: return TTMP_256RegClassID;
1431 case OPW288: return TTMP_288RegClassID;
1432 case OPW320: return TTMP_320RegClassID;
1433 case OPW352: return TTMP_352RegClassID;
1434 case OPW384: return TTMP_384RegClassID;
1435 case OPW512: return TTMP_512RegClassID;
1436 }
1437}
1438
1439int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1440 using namespace AMDGPU::EncValues;
1441
1442 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1443 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1444
1445 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1446}
1447
1449 bool MandatoryLiteral,
1450 unsigned ImmWidth) const {
1451 using namespace AMDGPU::EncValues;
1452
1453 assert(Val < 1024); // enum10
1454
1455 bool IsAGPR = Val & 512;
1456 Val &= 511;
1457
1458 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1459 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1460 : getVgprClassId(Width), Val - VGPR_MIN);
1461 }
1462 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth);
1463}
1464
1466 unsigned Val,
1467 bool MandatoryLiteral,
1468 unsigned ImmWidth) const {
1469 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1470 // decoded earlier.
1471 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1472 using namespace AMDGPU::EncValues;
1473
1474 if (Val <= SGPR_MAX) {
1475 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1476 static_assert(SGPR_MIN == 0);
1477 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1478 }
1479
1480 int TTmpIdx = getTTmpIdx(Val);
1481 if (TTmpIdx >= 0) {
1482 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1483 }
1484
1485 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1486 return decodeIntImmed(Val);
1487
1488 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1489 return decodeFPImmed(ImmWidth, Val);
1490
1491 if (Val == LITERAL_CONST) {
1492 if (MandatoryLiteral)
1493 // Keep a sentinel value for deferred setting
1494 return MCOperand::createImm(LITERAL_CONST);
1495 else
1496 return decodeLiteralConstant();
1497 }
1498
1499 switch (Width) {
1500 case OPW32:
1501 case OPW16:
1502 case OPWV216:
1503 return decodeSpecialReg32(Val);
1504 case OPW64:
1505 case OPWV232:
1506 return decodeSpecialReg64(Val);
1507 default:
1508 llvm_unreachable("unexpected immediate type");
1509 }
1510}
1511
1512// Bit 0 of DstY isn't stored in the instruction, because it's always the
1513// opposite of bit 0 of DstX.
1515 unsigned Val) const {
1516 int VDstXInd =
1517 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1518 assert(VDstXInd != -1);
1519 assert(Inst.getOperand(VDstXInd).isReg());
1520 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1521 Val |= ~XDstReg & 1;
1523 return createRegOperand(getVgprClassId(Width), Val);
1524}
1525
1527 using namespace AMDGPU;
1528
1529 switch (Val) {
1530 // clang-format off
1531 case 102: return createRegOperand(FLAT_SCR_LO);
1532 case 103: return createRegOperand(FLAT_SCR_HI);
1533 case 104: return createRegOperand(XNACK_MASK_LO);
1534 case 105: return createRegOperand(XNACK_MASK_HI);
1535 case 106: return createRegOperand(VCC_LO);
1536 case 107: return createRegOperand(VCC_HI);
1537 case 108: return createRegOperand(TBA_LO);
1538 case 109: return createRegOperand(TBA_HI);
1539 case 110: return createRegOperand(TMA_LO);
1540 case 111: return createRegOperand(TMA_HI);
1541 case 124:
1542 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1543 case 125:
1544 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1545 case 126: return createRegOperand(EXEC_LO);
1546 case 127: return createRegOperand(EXEC_HI);
1547 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1548 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1549 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1550 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1551 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1552 case 251: return createRegOperand(SRC_VCCZ);
1553 case 252: return createRegOperand(SRC_EXECZ);
1554 case 253: return createRegOperand(SRC_SCC);
1555 case 254: return createRegOperand(LDS_DIRECT);
1556 default: break;
1557 // clang-format on
1558 }
1559 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1560}
1561
1563 using namespace AMDGPU;
1564
1565 switch (Val) {
1566 case 102: return createRegOperand(FLAT_SCR);
1567 case 104: return createRegOperand(XNACK_MASK);
1568 case 106: return createRegOperand(VCC);
1569 case 108: return createRegOperand(TBA);
1570 case 110: return createRegOperand(TMA);
1571 case 124:
1572 if (isGFX11Plus())
1573 return createRegOperand(SGPR_NULL);
1574 break;
1575 case 125:
1576 if (!isGFX11Plus())
1577 return createRegOperand(SGPR_NULL);
1578 break;
1579 case 126: return createRegOperand(EXEC);
1580 case 235: return createRegOperand(SRC_SHARED_BASE);
1581 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1582 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1583 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1584 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1585 case 251: return createRegOperand(SRC_VCCZ);
1586 case 252: return createRegOperand(SRC_EXECZ);
1587 case 253: return createRegOperand(SRC_SCC);
1588 default: break;
1589 }
1590 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1591}
1592
1594 const unsigned Val,
1595 unsigned ImmWidth) const {
1596 using namespace AMDGPU::SDWA;
1597 using namespace AMDGPU::EncValues;
1598
1599 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1600 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1601 // XXX: cast to int is needed to avoid stupid warning:
1602 // compare with unsigned is always true
1603 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1604 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1605 return createRegOperand(getVgprClassId(Width),
1606 Val - SDWA9EncValues::SRC_VGPR_MIN);
1607 }
1608 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1609 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1610 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1611 return createSRegOperand(getSgprClassId(Width),
1612 Val - SDWA9EncValues::SRC_SGPR_MIN);
1613 }
1614 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1615 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1616 return createSRegOperand(getTtmpClassId(Width),
1617 Val - SDWA9EncValues::SRC_TTMP_MIN);
1618 }
1619
1620 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1621
1622 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1623 return decodeIntImmed(SVal);
1624
1625 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1626 return decodeFPImmed(ImmWidth, SVal);
1627
1628 return decodeSpecialReg32(SVal);
1629 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1630 return createRegOperand(getVgprClassId(Width), Val);
1631 }
1632 llvm_unreachable("unsupported target");
1633}
1634
1636 return decodeSDWASrc(OPW16, Val, 16);
1637}
1638
1640 return decodeSDWASrc(OPW32, Val, 32);
1641}
1642
1644 using namespace AMDGPU::SDWA;
1645
1646 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1647 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1648 "SDWAVopcDst should be present only on GFX9+");
1649
1650 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1651
1652 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1653 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1654
1655 int TTmpIdx = getTTmpIdx(Val);
1656 if (TTmpIdx >= 0) {
1657 auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1658 return createSRegOperand(TTmpClsId, TTmpIdx);
1659 } else if (Val > SGPR_MAX) {
1660 return IsWave64 ? decodeSpecialReg64(Val)
1661 : decodeSpecialReg32(Val);
1662 } else {
1663 return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1664 }
1665 } else {
1666 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1667 }
1668}
1669
1671 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1672 ? decodeSrcOp(OPW64, Val)
1673 : decodeSrcOp(OPW32, Val);
1674}
1675
1677 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1678}
1679
1681
1683 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1684}
1685
1687
1689
1691 return AMDGPU::isGFX10Plus(STI);
1692}
1693
1695 return STI.hasFeature(AMDGPU::FeatureGFX11);
1696}
1697
1699 return AMDGPU::isGFX11Plus(STI);
1700}
1701
1702
1704 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1705}
1706
1709}
1710
1711//===----------------------------------------------------------------------===//
1712// AMDGPU specific symbol handling
1713//===----------------------------------------------------------------------===//
1714#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1715#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1716 do { \
1717 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1718 } while (0)
1719#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1720 do { \
1721 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1722 << GET_FIELD(MASK) << '\n'; \
1723 } while (0)
1724
1725// NOLINTNEXTLINE(readability-identifier-naming)
1727 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1728 using namespace amdhsa;
1729 StringRef Indent = "\t";
1730
1731 // We cannot accurately backward compute #VGPRs used from
1732 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1733 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1734 // simply calculate the inverse of what the assembler does.
1735
1736 uint32_t GranulatedWorkitemVGPRCount =
1737 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1738
1739 uint32_t NextFreeVGPR =
1740 (GranulatedWorkitemVGPRCount + 1) *
1741 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1742
1743 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1744
1745 // We cannot backward compute values used to calculate
1746 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1747 // directives can't be computed:
1748 // .amdhsa_reserve_vcc
1749 // .amdhsa_reserve_flat_scratch
1750 // .amdhsa_reserve_xnack_mask
1751 // They take their respective default values if not specified in the assembly.
1752 //
1753 // GRANULATED_WAVEFRONT_SGPR_COUNT
1754 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1755 //
1756 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1757 // are set to 0. So while disassembling we consider that:
1758 //
1759 // GRANULATED_WAVEFRONT_SGPR_COUNT
1760 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1761 //
1762 // The disassembler cannot recover the original values of those 3 directives.
1763
1764 uint32_t GranulatedWavefrontSGPRCount =
1765 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1766
1767 if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1768 return MCDisassembler::Fail;
1769
1770 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1772
1773 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1775 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1776 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1777 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1778
1779 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1780 return MCDisassembler::Fail;
1781
1782 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1783 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1784 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1785 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1786 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1787 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1788 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1789 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1790
1791 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1792 return MCDisassembler::Fail;
1793
1794 PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1795
1796 if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1797 return MCDisassembler::Fail;
1798
1799 PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1800
1801 if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1802 return MCDisassembler::Fail;
1803
1804 if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1805 return MCDisassembler::Fail;
1806
1807 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1808
1809 if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1810 return MCDisassembler::Fail;
1811
1812 if (isGFX10Plus()) {
1813 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1814 COMPUTE_PGM_RSRC1_WGP_MODE);
1815 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1816 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1817 }
1819}
1820
1821// NOLINTNEXTLINE(readability-identifier-naming)
1823 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1824 using namespace amdhsa;
1825 StringRef Indent = "\t";
1827 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1828 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1829 else
1830 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1831 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1832 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1833 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1834 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1835 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1836 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1837 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1838 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1839 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1840 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1841 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1842
1843 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1844 return MCDisassembler::Fail;
1845
1846 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1847 return MCDisassembler::Fail;
1848
1849 if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1850 return MCDisassembler::Fail;
1851
1853 ".amdhsa_exception_fp_ieee_invalid_op",
1854 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1855 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1858 ".amdhsa_exception_fp_ieee_div_zero",
1859 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1860 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1861 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1862 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1863 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1864 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1865 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1866 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1867 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1868
1869 if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1870 return MCDisassembler::Fail;
1871
1873}
1874
1875// NOLINTNEXTLINE(readability-identifier-naming)
1877 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1878 using namespace amdhsa;
1879 StringRef Indent = "\t";
1880 if (isGFX90A()) {
1881 KdStream << Indent << ".amdhsa_accum_offset "
1882 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
1883 << '\n';
1884 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
1885 return MCDisassembler::Fail;
1886 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
1887 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
1888 return MCDisassembler::Fail;
1889 } else if (isGFX10Plus()) {
1890 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
1891 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
1892 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
1893 } else {
1895 "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
1896 }
1897 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
1898 COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
1899 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
1900 COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
1901 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
1902 COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
1903 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
1904 return MCDisassembler::Fail;
1906 COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
1907 } else if (FourByteBuffer) {
1908 return MCDisassembler::Fail;
1909 }
1911}
1912#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
1913#undef PRINT_DIRECTIVE
1914#undef GET_FIELD
1915
1919 raw_string_ostream &KdStream) const {
1920#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1921 do { \
1922 KdStream << Indent << DIRECTIVE " " \
1923 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
1924 } while (0)
1925
1926 uint16_t TwoByteBuffer = 0;
1927 uint32_t FourByteBuffer = 0;
1928
1929 StringRef ReservedBytes;
1930 StringRef Indent = "\t";
1931
1932 assert(Bytes.size() == 64);
1933 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
1934
1935 switch (Cursor.tell()) {
1937 FourByteBuffer = DE.getU32(Cursor);
1938 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
1939 << '\n';
1941
1943 FourByteBuffer = DE.getU32(Cursor);
1944 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
1945 << FourByteBuffer << '\n';
1947
1949 FourByteBuffer = DE.getU32(Cursor);
1950 KdStream << Indent << ".amdhsa_kernarg_size "
1951 << FourByteBuffer << '\n';
1953
1955 // 4 reserved bytes, must be 0.
1956 ReservedBytes = DE.getBytes(Cursor, 4);
1957 for (int I = 0; I < 4; ++I) {
1958 if (ReservedBytes[I] != 0) {
1959 return MCDisassembler::Fail;
1960 }
1961 }
1963
1965 // KERNEL_CODE_ENTRY_BYTE_OFFSET
1966 // So far no directive controls this for Code Object V3, so simply skip for
1967 // disassembly.
1968 DE.skip(Cursor, 8);
1970
1972 // 20 reserved bytes, must be 0.
1973 ReservedBytes = DE.getBytes(Cursor, 20);
1974 for (int I = 0; I < 20; ++I) {
1975 if (ReservedBytes[I] != 0) {
1976 return MCDisassembler::Fail;
1977 }
1978 }
1980
1982 FourByteBuffer = DE.getU32(Cursor);
1983 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
1984
1986 FourByteBuffer = DE.getU32(Cursor);
1987 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
1988
1990 FourByteBuffer = DE.getU32(Cursor);
1991 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
1992
1994 using namespace amdhsa;
1995 TwoByteBuffer = DE.getU16(Cursor);
1996
1998 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
1999 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2000 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2001 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2002 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2003 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2004 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2005 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2006 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2007 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2009 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2010 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2011 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2012 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2013
2014 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2015 return MCDisassembler::Fail;
2016
2017 // Reserved for GFX9
2018 if (isGFX9() &&
2019 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2020 return MCDisassembler::Fail;
2021 } else if (isGFX10Plus()) {
2022 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2023 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2024 }
2025
2027 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2028 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2029
2030 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
2031 return MCDisassembler::Fail;
2032
2034
2036 using namespace amdhsa;
2037 TwoByteBuffer = DE.getU16(Cursor);
2038 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2039 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2040 KERNARG_PRELOAD_SPEC_LENGTH);
2041 }
2042
2043 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2044 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2045 KERNARG_PRELOAD_SPEC_OFFSET);
2046 }
2048
2050 // 4 bytes from here are reserved, must be 0.
2051 ReservedBytes = DE.getBytes(Cursor, 4);
2052 for (int I = 0; I < 4; ++I) {
2053 if (ReservedBytes[I] != 0)
2054 return MCDisassembler::Fail;
2055 }
2057
2058 default:
2059 llvm_unreachable("Unhandled index. Case statements cover everything.");
2060 return MCDisassembler::Fail;
2061 }
2062#undef PRINT_DIRECTIVE
2063}
2064
2066 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2067 // CP microcode requires the kernel descriptor to be 64 aligned.
2068 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2069 return MCDisassembler::Fail;
2070
2071 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2072 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2073 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2074 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2075 // when required.
2076 if (isGFX10Plus()) {
2077 uint16_t KernelCodeProperties =
2079 support::endianness::little);
2080 EnableWavefrontSize32 =
2081 AMDHSA_BITS_GET(KernelCodeProperties,
2082 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2083 }
2084
2085 std::string Kd;
2086 raw_string_ostream KdStream(Kd);
2087 KdStream << ".amdhsa_kernel " << KdName << '\n';
2088
2090 while (C && C.tell() < Bytes.size()) {
2092 decodeKernelDescriptorDirective(C, Bytes, KdStream);
2093
2094 cantFail(C.takeError());
2095
2097 return MCDisassembler::Fail;
2098 }
2099 KdStream << ".end_amdhsa_kernel\n";
2100 outs() << KdStream.str();
2102}
2103
2104std::optional<MCDisassembler::DecodeStatus>
2107 raw_ostream &CStream) const {
2108 // Right now only kernel descriptor needs to be handled.
2109 // We ignore all other symbols for target specific handling.
2110 // TODO:
2111 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2112 // Object V2 and V3 when symbols are marked protected.
2113
2114 // amd_kernel_code_t for Code Object V2.
2115 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2116 Size = 256;
2117 return MCDisassembler::Fail;
2118 }
2119
2120 // Code Object V3 kernel descriptors.
2121 StringRef Name = Symbol.Name;
2122 if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
2123 Size = 64; // Size = 64 regardless of success or failure.
2124 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2125 }
2126 return std::nullopt;
2127}
2128
2129//===----------------------------------------------------------------------===//
2130// AMDGPUSymbolizer
2131//===----------------------------------------------------------------------===//
2132
2133// Try to find symbol name for specified label
2135 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2136 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2137 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2138
2139 if (!IsBranch) {
2140 return false;
2141 }
2142
2143 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2144 if (!Symbols)
2145 return false;
2146
2147 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2148 return Val.Addr == static_cast<uint64_t>(Value) &&
2149 Val.Type == ELF::STT_NOTYPE;
2150 });
2151 if (Result != Symbols->end()) {
2152 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2153 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2155 return true;
2156 }
2157 // Add to list of referenced addresses, so caller can synthesize a label.
2158 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2159 return false;
2160}
2161
2163 int64_t Value,
2164 uint64_t Address) {
2165 llvm_unreachable("unimplemented");
2166}
2167
2168//===----------------------------------------------------------------------===//
2169// Initialization
2170//===----------------------------------------------------------------------===//
2171
2173 LLVMOpInfoCallback /*GetOpInfo*/,
2174 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2175 void *DisInfo,
2176 MCContext *Ctx,
2177 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2178 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2179}
2180
2182 const MCSubtargetInfo &STI,
2183 MCContext &Ctx) {
2184 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2185}
2186
2192}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static bool isValidDPP8(const MCInst &MI)
static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecodeStatus DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth)
#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth)
#define DECODE_OPERAND_REG_8(RegClass)
#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth)
static int64_t getInlineImmVal16(unsigned Imm)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
#define GET_FIELD(MASK)
#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth)
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:468
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t TSFlags
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm)
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
MCOperand decodeLiteralConstant() const
DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
DecodeStatus decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
DecodeStatus convertSDWAInst(MCInst &MI) const
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
DecodeStatus convertVOP3DPPInst(MCInst &MI) const
DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
DecodeStatus convertMIMGInst(MCInst &MI) const
DecodeStatus convertVINTERPInst(MCInst &MI) const
DecodeStatus convertDPP8Inst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus convertEXPInst(MCInst &MI) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
std::optional< DecodeStatus > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const override
Used to perform separate target specific disassembly for a particular symbol.
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth=0) const
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const
DecodeStatus convertVOPCDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSDWASrc16(unsigned Val) const
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Context object for machine code objects.
Definition: MCContext.h:76
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isValid() const
Definition: MCInst.h:60
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:389
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:660
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
unsigned getAmdhsaCodeObjectVersion()
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:195
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:194
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1254
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1268
@ STT_OBJECT
Definition: ELF.h:1255
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:356
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:349
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:749
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1754
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.