LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
533 Imm = getInlineImmValF16(Imm);
534 break;
537 Imm = getInlineImmValF16(Imm);
538 break;
540 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
541 // halves, so we need to produce the duplicated value for correct
542 // round-trip.
543 if (isGFX11Plus()) {
544 int64_t F16Val = getInlineImmValF16(Imm);
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
546 } else {
547 Imm = getInlineImmValF16(Imm);
548 }
549 break;
550 }
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
567 ArrayRef<uint8_t> Bytes_,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
571 Bytes = Bytes_.slice(0, MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min((size_t)4, Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250Plus() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
586 break;
587 Bytes = Bytes_.slice(0, MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX1170() &&
594 tryDecodeInst(DecoderTableGFX117096, DecoderTableGFX1170_FAKE1696, MI,
595 DecW, Address, CS))
596 break;
597
598 if (isGFX11() &&
599 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
600 DecW, Address, CS))
601 break;
602
603 if (isGFX1250() &&
604 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
605 DecW, Address, CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
610 DecW, Address, CS))
611 break;
612
613 if (isGFX12() &&
614 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
615 break;
616
617 if (isGFX13() &&
618 tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
619 DecW, Address, CS))
620 break;
621
622 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
623 // Return 8 bytes for a potential literal.
624 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
625
626 if (isGFX1250() &&
627 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
628 break;
629 }
630
631 // Reinitialize Bytes
632 Bytes = Bytes_.slice(0, MaxInstBytesNum);
633
634 } else if (Bytes.size() >= 16 &&
635 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
636 std::bitset<128> DecW = eat16Bytes(Bytes);
637 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
638 break;
639
640 // Reinitialize Bytes
641 Bytes = Bytes_.slice(0, MaxInstBytesNum);
642 }
643
644 if (Bytes.size() >= 8) {
645 const uint64_t QW = eatBytes<uint64_t>(Bytes);
646
647 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
648 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
649 break;
650
651 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
652 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
653 break;
654
655 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
656 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
657 break;
658
659 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
660 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
661 // table first so we print the correct name.
662 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
663 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
664 break;
665
666 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
667 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
668 break;
669
670 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
671 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
672 break;
673
674 if ((isVI() || isGFX9()) &&
675 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
676 break;
677
678 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
679 break;
680
681 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
682 break;
683
684 if (isGFX1250() &&
685 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
686 QW, Address, CS))
687 break;
688
689 if (isGFX12() &&
690 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
691 Address, CS))
692 break;
693
694 if (isGFX1170() &&
695 tryDecodeInst(DecoderTableGFX117064, DecoderTableGFX1170_FAKE1664, MI,
696 QW, Address, CS))
697 break;
698
699 if (isGFX11() &&
700 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
701 Address, CS))
702 break;
703
704 if (isGFX1170() &&
705 tryDecodeInst(DecoderTableGFX1170W6464, MI, QW, Address, CS))
706 break;
707
708 if (isGFX11() &&
709 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
710 break;
711
712 if (isGFX12() &&
713 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
714 break;
715
716 if (isGFX13() &&
717 tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
718 Address, CS))
719 break;
720
721 // Reinitialize Bytes
722 Bytes = Bytes_.slice(0, MaxInstBytesNum);
723 }
724
725 // Try decode 32-bit instruction
726 if (Bytes.size() >= 4) {
727 const uint32_t DW = eatBytes<uint32_t>(Bytes);
728
729 if ((isVI() || isGFX9()) &&
730 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
731 break;
732
733 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
734 break;
735
736 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
737 break;
738
739 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
740 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
741 break;
742
743 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
744 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
745 break;
746
747 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
748 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
749 break;
750
751 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
752 break;
753
754 if (isGFX1170() &&
755 tryDecodeInst(DecoderTableGFX117032, DecoderTableGFX1170_FAKE1632, MI,
756 DW, Address, CS))
757 break;
758
759 if (isGFX11() &&
760 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
761 Address, CS))
762 break;
763
764 if (isGFX1250() &&
765 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
766 DW, Address, CS))
767 break;
768
769 if (isGFX12() &&
770 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
771 Address, CS))
772 break;
773
774 if (isGFX13() &&
775 tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
776 Address, CS))
777 break;
778 }
779
781 } while (false);
782
784
785 decodeImmOperands(MI, *MCII);
786
787 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
788 if (isMacDPP(MI))
790
791 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
793 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
794 convertVOPCDPPInst(MI); // Special VOP3 case
795 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
796 convertVOPC64DPPInst(MI); // Special VOP3 case
797 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
798 -1)
800 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
801 convertVOP3DPPInst(MI); // Regular VOP3 case
802 }
803
805
806 if (AMDGPU::isMAC(MI.getOpcode())) {
807 // Insert dummy unused src2_modifiers.
809 AMDGPU::OpName::src2_modifiers);
810 }
811
812 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
813 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
814 // Insert dummy unused src2_modifiers.
816 AMDGPU::OpName::src2_modifiers);
817 }
818
819 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
821 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
822 }
823
824 if (MCII->get(MI.getOpcode()).TSFlags &
826 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
827 AMDGPU::OpName::cpol);
828 if (CPolPos != -1) {
829 unsigned CPol =
830 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
832 if (MI.getNumOperands() <= (unsigned)CPolPos) {
834 AMDGPU::OpName::cpol);
835 } else if (CPol) {
836 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
837 }
838 }
839 }
840
841 if ((MCII->get(MI.getOpcode()).TSFlags &
843 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
844 // GFX90A lost TFE, its place is occupied by ACC.
845 int TFEOpIdx =
846 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
847 if (TFEOpIdx != -1) {
848 auto *TFEIter = MI.begin();
849 std::advance(TFEIter, TFEOpIdx);
850 MI.insert(TFEIter, MCOperand::createImm(0));
851 }
852 }
853
854 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
856 int OffsetIdx =
857 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
858 if (OffsetIdx != -1) {
859 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
860 int64_t SignedOffset = SignExtend64<24>(Imm);
861 if (SignedOffset < 0)
863 }
864 }
865
866 if (MCII->get(MI.getOpcode()).TSFlags &
868 int SWZOpIdx =
869 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
870 if (SWZOpIdx != -1) {
871 auto *SWZIter = MI.begin();
872 std::advance(SWZIter, SWZOpIdx);
873 MI.insert(SWZIter, MCOperand::createImm(0));
874 }
875 }
876
877 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
878 if (Desc.TSFlags & SIInstrFlags::MIMG) {
879 int VAddr0Idx =
880 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
881 int RsrcIdx =
882 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
883 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
884 if (VAddr0Idx >= 0 && NSAArgs > 0) {
885 unsigned NSAWords = (NSAArgs + 3) / 4;
886 if (Bytes.size() < 4 * NSAWords)
888 for (unsigned i = 0; i < NSAArgs; ++i) {
889 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
890 auto VAddrRCID =
891 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
892 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
893 }
894 Bytes = Bytes.slice(4 * NSAWords);
895 }
896
898 }
899
900 if (MCII->get(MI.getOpcode()).TSFlags &
903
904 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
906
907 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
909
910 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
912
913 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
915
916 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
918
919 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
920 AMDGPU::OpName::vdst_in);
921 if (VDstIn_Idx != -1) {
922 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
924 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
925 !MI.getOperand(VDstIn_Idx).isReg() ||
926 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
927 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
928 MI.erase(&MI.getOperand(VDstIn_Idx));
930 MCOperand::createReg(MI.getOperand(Tied).getReg()),
931 AMDGPU::OpName::vdst_in);
932 }
933 }
934
935 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
936 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
938
939 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
940 // have EXEC as implicit destination. Issue a warning if encoding for
941 // vdst is not EXEC.
942 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
943 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
944 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
945 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
946 if (Bytes_[0] != ExecEncoding)
948 }
949
950 Size = MaxInstBytesNum - Bytes.size();
951 return Status;
952}
953
955 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
956 // The MCInst still has these fields even though they are no longer encoded
957 // in the GFX11 instruction.
958 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
959 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
960 }
961}
962
965 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
971 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
972 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
973 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
974 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
975 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
976 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
977 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
978 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
979 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
980 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
981 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
982 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
983 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
984 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
985 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
986 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
987 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
988 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
989 // The MCInst has this field that is not directly encoded in the
990 // instruction.
991 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
992 }
993}
994
996 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
997 STI.hasFeature(AMDGPU::FeatureGFX10)) {
998 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
999 // VOPC - insert clamp
1000 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
1001 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1002 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
1003 if (SDst != -1) {
1004 // VOPC - insert VCC register as sdst
1006 AMDGPU::OpName::sdst);
1007 } else {
1008 // VOP1/2 - insert omod if present in instruction
1009 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
1010 }
1011 }
1012}
1013
1014/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1015/// appropriate subregister for the used format width.
1017 MCOperand &MO, uint8_t NumRegs) {
1018 switch (NumRegs) {
1019 case 4:
1020 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1021 case 6:
1022 return MO.setReg(
1023 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1024 case 8:
1025 if (MCRegister NewReg = MRI.getSubReg(
1026 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1027 MO.setReg(NewReg);
1028 }
1029 return;
1030 case 12: {
1031 // There is no 384-bit subreg index defined.
1032 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1033 MCRegister NewReg = MRI.getMatchingSuperReg(
1034 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1035 return MO.setReg(NewReg);
1036 }
1037 case 16:
1038 // No-op in cases where one operand is still f8/bf8.
1039 return;
1040 default:
1041 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1042 }
1043}
1044
1045/// f8f6f4 instructions have different pseudos depending on the used formats. In
1046/// the disassembler table, we only have the variants with the largest register
1047/// classes which assume using an fp8/bf8 format for both operands. The actual
1048/// register class depends on the format in blgp and cbsz operands. Adjust the
1049/// register classes depending on the used format.
1051 int BlgpIdx =
1052 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1053 if (BlgpIdx == -1)
1054 return;
1055
1056 int CbszIdx =
1057 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1058
1059 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1060 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1061
1062 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1063 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1064 if (!AdjustedRegClassOpcode ||
1065 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1066 return;
1067
1068 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1069 int Src0Idx =
1070 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1071 int Src1Idx =
1072 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1073 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1074 AdjustedRegClassOpcode->NumRegsSrcA);
1075 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1076 AdjustedRegClassOpcode->NumRegsSrcB);
1077}
1078
1080 int FmtAIdx =
1081 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1082 if (FmtAIdx == -1)
1083 return;
1084
1085 int FmtBIdx =
1086 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1087
1088 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1089 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1090
1091 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1092 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1093 if (!AdjustedRegClassOpcode ||
1094 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1095 return;
1096
1097 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1098 int Src0Idx =
1099 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1100 int Src1Idx =
1101 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1102 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1103 AdjustedRegClassOpcode->NumRegsSrcA);
1104 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1105 AdjustedRegClassOpcode->NumRegsSrcB);
1106}
1107
1109 unsigned OpSel = 0;
1110 unsigned OpSelHi = 0;
1111 unsigned NegLo = 0;
1112 unsigned NegHi = 0;
1113};
1114
1115// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1116// Note that these values do not affect disassembler output,
1117// so this is only necessary for consistency with src_modifiers.
1119 bool IsVOP3P = false) {
1120 VOPModifiers Modifiers;
1121 unsigned Opc = MI.getOpcode();
1122 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1123 AMDGPU::OpName::src1_modifiers,
1124 AMDGPU::OpName::src2_modifiers};
1125 for (int J = 0; J < 3; ++J) {
1126 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1127 if (OpIdx == -1)
1128 continue;
1129
1130 unsigned Val = MI.getOperand(OpIdx).getImm();
1131
1132 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1133 if (IsVOP3P) {
1134 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1135 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1136 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1137 } else if (J == 0) {
1138 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1139 }
1140 }
1141
1142 return Modifiers;
1143}
1144
1145// Instructions decode the op_sel/suffix bits into the src_modifier
1146// operands. Copy those bits into the src operands for true16 VGPRs.
1148 const unsigned Opc = MI.getOpcode();
1149 const MCRegisterClass &ConversionRC =
1150 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1151 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1152 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1154 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1156 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1158 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1160 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1161 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1162 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1163 if (OpIdx == -1 || OpModsIdx == -1)
1164 continue;
1165 MCOperand &Op = MI.getOperand(OpIdx);
1166 if (!Op.isReg())
1167 continue;
1168 if (!ConversionRC.contains(Op.getReg()))
1169 continue;
1170 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1171 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1172 unsigned ModVal = OpMods.getImm();
1173 if (ModVal & OpSelMask) { // isHi
1174 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1175 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1176 }
1177 }
1178}
1179
1180// MAC opcodes have special old and src2 operands.
1181// src2 is tied to dst, while old is not tied (but assumed to be).
1183 constexpr int DST_IDX = 0;
1184 auto Opcode = MI.getOpcode();
1185 const auto &Desc = MCII->get(Opcode);
1186 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1187
1188 if (OldIdx != -1 && Desc.getOperandConstraint(
1189 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1190 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1191 assert(Desc.getOperandConstraint(
1192 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1194 (void)DST_IDX;
1195 return true;
1196 }
1197
1198 return false;
1199}
1200
1201// Create dummy old operand and insert dummy unused src2_modifiers
1203 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1204 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1206 AMDGPU::OpName::src2_modifiers);
1207}
1208
1210 unsigned Opc = MI.getOpcode();
1211
1212 int VDstInIdx =
1213 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1214 if (VDstInIdx != -1)
1215 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1216
1217 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1218 if (MI.getNumOperands() < DescNumOps &&
1219 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1221 auto Mods = collectVOPModifiers(MI);
1223 AMDGPU::OpName::op_sel);
1224 } else {
1225 // Insert dummy unused src modifiers.
1226 if (MI.getNumOperands() < DescNumOps &&
1227 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1229 AMDGPU::OpName::src0_modifiers);
1230
1231 if (MI.getNumOperands() < DescNumOps &&
1232 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1234 AMDGPU::OpName::src1_modifiers);
1235 }
1236}
1237
1240
1241 int VDstInIdx =
1242 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1243 if (VDstInIdx != -1)
1244 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1245
1246 unsigned Opc = MI.getOpcode();
1247 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1248 if (MI.getNumOperands() < DescNumOps &&
1249 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1250 auto Mods = collectVOPModifiers(MI);
1252 AMDGPU::OpName::op_sel);
1253 }
1254}
1255
1256// Given a wide tuple \p Reg check if it will overflow 256 registers.
1257// \returns \p Reg on success or NoRegister otherwise.
1259 const MCRegisterInfo &MRI) {
1260 unsigned NumRegs = RC.getSizeInBits() / 32;
1261 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1262 if (!Sub0)
1263 return Reg;
1264
1265 MCRegister BaseReg;
1266 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1267 BaseReg = AMDGPU::VGPR0;
1268 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1269 BaseReg = AMDGPU::AGPR0;
1270
1271 assert(BaseReg && "Only vector registers expected");
1272
1273 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1274}
1275
1276// Note that before gfx10, the MIMG encoding provided no information about
1277// VADDR size. Consequently, decoded instructions always show address as if it
1278// has 1 dword, which could be not really so.
1280 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1281
1282 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1283 AMDGPU::OpName::vdst);
1284
1285 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1286 AMDGPU::OpName::vdata);
1287 int VAddr0Idx =
1288 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1289 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1290 ? AMDGPU::OpName::srsrc
1291 : AMDGPU::OpName::rsrc;
1292 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1293 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1294 AMDGPU::OpName::dmask);
1295
1296 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1297 AMDGPU::OpName::tfe);
1298 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1299 AMDGPU::OpName::d16);
1300
1301 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1302 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1303 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1304
1305 assert(VDataIdx != -1);
1306 if (BaseOpcode->BVH) {
1307 // Add A16 operand for intersect_ray instructions
1308 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1309 return;
1310 }
1311
1312 bool IsAtomic = (VDstIdx != -1);
1313 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1314 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1315 bool IsNSA = false;
1316 bool IsPartialNSA = false;
1317 unsigned AddrSize = Info->VAddrDwords;
1318
1319 if (isGFX10Plus()) {
1320 unsigned DimIdx =
1321 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1322 int A16Idx =
1323 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1324 const AMDGPU::MIMGDimInfo *Dim =
1325 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1326 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1327
1328 AddrSize =
1329 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1330
1331 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1332 // VIMAGE insts other than BVH never use vaddr4.
1333 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1334 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1335 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1336 if (!IsNSA) {
1337 if (!IsVSample && AddrSize > 12)
1338 AddrSize = 16;
1339 } else {
1340 if (AddrSize > Info->VAddrDwords) {
1341 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1342 // The NSA encoding does not contain enough operands for the
1343 // combination of base opcode / dimension. Should this be an error?
1344 return;
1345 }
1346 IsPartialNSA = true;
1347 }
1348 }
1349 }
1350
1351 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1352 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1353
1354 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1355 if (D16 && AMDGPU::hasPackedD16(STI)) {
1356 DstSize = (DstSize + 1) / 2;
1357 }
1358
1359 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1360 DstSize += 1;
1361
1362 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1363 return;
1364
1365 int NewOpcode =
1366 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1367 if (NewOpcode == -1)
1368 return;
1369
1370 // Widen the register to the correct number of enabled channels.
1371 MCRegister NewVdata;
1372 if (DstSize != Info->VDataDwords) {
1373 auto DataRCID = MCII->getOpRegClassID(
1374 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1375
1376 // Get first subregister of VData
1377 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1378 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1379 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1380
1381 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1382 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1383 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1384 if (!NewVdata) {
1385 // It's possible to encode this such that the low register + enabled
1386 // components exceeds the register count.
1387 return;
1388 }
1389 }
1390
1391 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1392 // If using partial NSA on GFX11+ widen last address register.
1393 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1394 MCRegister NewVAddrSA;
1395 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1396 AddrSize != Info->VAddrDwords) {
1397 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1398 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1399 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1400
1401 auto AddrRCID = MCII->getOpRegClassID(
1402 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1403
1404 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1405 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1406 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1407 if (!NewVAddrSA)
1408 return;
1409 }
1410
1411 MI.setOpcode(NewOpcode);
1412
1413 if (NewVdata != AMDGPU::NoRegister) {
1414 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1415
1416 if (IsAtomic) {
1417 // Atomic operations have an additional operand (a copy of data)
1418 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1419 }
1420 }
1421
1422 if (NewVAddrSA) {
1423 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1424 } else if (IsNSA) {
1425 assert(AddrSize <= Info->VAddrDwords);
1426 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1427 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1428 }
1429}
1430
1431// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1432// decoder only adds to src_modifiers, so manually add the bits to the other
1433// operands.
1435 unsigned Opc = MI.getOpcode();
1436 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1437 auto Mods = collectVOPModifiers(MI, true);
1438
1439 if (MI.getNumOperands() < DescNumOps &&
1440 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1441 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1442
1443 if (MI.getNumOperands() < DescNumOps &&
1444 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1446 AMDGPU::OpName::op_sel);
1447 if (MI.getNumOperands() < DescNumOps &&
1448 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1450 AMDGPU::OpName::op_sel_hi);
1451 if (MI.getNumOperands() < DescNumOps &&
1452 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1454 AMDGPU::OpName::neg_lo);
1455 if (MI.getNumOperands() < DescNumOps &&
1456 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1458 AMDGPU::OpName::neg_hi);
1459}
1460
1461// Create dummy old operand and insert optional operands
1463 unsigned Opc = MI.getOpcode();
1464 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1465
1466 if (MI.getNumOperands() < DescNumOps &&
1467 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1468 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1469
1470 if (MI.getNumOperands() < DescNumOps &&
1471 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1473 AMDGPU::OpName::src0_modifiers);
1474
1475 if (MI.getNumOperands() < DescNumOps &&
1476 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1478 AMDGPU::OpName::src1_modifiers);
1479}
1480
1482 unsigned Opc = MI.getOpcode();
1483 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1484
1486
1487 if (MI.getNumOperands() < DescNumOps &&
1488 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1491 AMDGPU::OpName::op_sel);
1492 }
1493}
1494
1496 assert(HasLiteral && "Should have decoded a literal");
1497 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1498}
1499
1500const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1501 return getContext().getRegisterInfo()->
1502 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1503}
1504
1505inline
1507 const Twine& ErrMsg) const {
1508 *CommentStream << "Error: " + ErrMsg;
1509
1510 // ToDo: add support for error operands to MCInst.h
1511 // return MCOperand::createError(V);
1512 return MCOperand();
1513}
1514
1518
1519inline
1521 unsigned Val) const {
1522 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1523 if (Val >= RegCl.getNumRegs())
1524 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1525 ": unknown register " + Twine(Val));
1526 return createRegOperand(RegCl.getRegister(Val));
1527}
1528
1529inline
1531 unsigned Val) const {
1532 // ToDo: SI/CI have 104 SGPRs, VI - 102
1533 // Valery: here we accepting as much as we can, let assembler sort it out
1534 int shift = 0;
1535 switch (SRegClassID) {
1536 case AMDGPU::SGPR_32RegClassID:
1537 case AMDGPU::TTMP_32RegClassID:
1538 break;
1539 case AMDGPU::SGPR_64RegClassID:
1540 case AMDGPU::TTMP_64RegClassID:
1541 shift = 1;
1542 break;
1543 case AMDGPU::SGPR_96RegClassID:
1544 case AMDGPU::TTMP_96RegClassID:
1545 case AMDGPU::SGPR_128RegClassID:
1546 case AMDGPU::TTMP_128RegClassID:
1547 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1548 // this bundle?
1549 case AMDGPU::SGPR_256RegClassID:
1550 case AMDGPU::TTMP_256RegClassID:
1551 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1552 // this bundle?
1553 case AMDGPU::SGPR_288RegClassID:
1554 case AMDGPU::TTMP_288RegClassID:
1555 case AMDGPU::SGPR_320RegClassID:
1556 case AMDGPU::TTMP_320RegClassID:
1557 case AMDGPU::SGPR_352RegClassID:
1558 case AMDGPU::TTMP_352RegClassID:
1559 case AMDGPU::SGPR_384RegClassID:
1560 case AMDGPU::TTMP_384RegClassID:
1561 case AMDGPU::SGPR_512RegClassID:
1562 case AMDGPU::TTMP_512RegClassID:
1563 shift = 2;
1564 break;
1565 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1566 // this bundle?
1567 default:
1568 llvm_unreachable("unhandled register class");
1569 }
1570
1571 if (Val % (1 << shift)) {
1572 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1573 << ": scalar reg isn't aligned " << Val;
1574 }
1575
1576 return createRegOperand(SRegClassID, Val >> shift);
1577}
1578
1580 bool IsHi) const {
1581 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1582 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1583}
1584
1585// Decode Literals for insts which always have a literal in the encoding
1588 if (HasLiteral) {
1589 assert(
1591 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1592 if (Literal != Val)
1593 return errOperand(Val, "More than one unique literal is illegal");
1594 }
1595 HasLiteral = true;
1596 Literal = Val;
1597 return MCOperand::createImm(Literal);
1598}
1599
1602 if (HasLiteral) {
1603 if (Literal != Val)
1604 return errOperand(Val, "More than one unique literal is illegal");
1605 }
1606 HasLiteral = true;
1607 Literal = Val;
1608
1609 bool UseLit64 = Hi_32(Literal) == 0;
1611 LitModifier::Lit64, Literal, getContext()))
1612 : MCOperand::createImm(Literal);
1613}
1614
1617 const MCOperandInfo &OpDesc) const {
1618 // For now all literal constants are supposed to be unsigned integer
1619 // ToDo: deal with signed/unsigned 64-bit integer constants
1620 // ToDo: deal with float/double constants
1621 if (!HasLiteral) {
1622 if (Bytes.size() < 4) {
1623 return errOperand(0, "cannot read literal, inst bytes left " +
1624 Twine(Bytes.size()));
1625 }
1626 HasLiteral = true;
1627 Literal = eatBytes<uint32_t>(Bytes);
1628 }
1629
1630 // For disassembling always assume all inline constants are available.
1631 bool HasInv2Pi = true;
1632
1633 // Invalid instruction codes may contain literals for inline-only
1634 // operands, so we support them here as well.
1635 int64_t Val = Literal;
1636 bool UseLit = false;
1637 switch (OpDesc.OperandType) {
1638 default:
1639 llvm_unreachable("Unexpected operand type!");
1643 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1644 break;
1647 break;
1651 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1652 break;
1654 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1655 break;
1658 break;
1660 break;
1664 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1665 break;
1667 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1668 break;
1678 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1679 break;
1683 Val <<= 32;
1684 break;
1687 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1688 break;
1690 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1691 // decoding a literal in a position of a register operand. Give
1692 // it special handling in the caller, decodeImmOperands(), instead
1693 // of quietly allowing it here.
1694 break;
1695 }
1696
1699 : MCOperand::createImm(Val);
1700}
1701
1703 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1704
1705 if (!HasLiteral) {
1706 if (Bytes.size() < 8) {
1707 return errOperand(0, "cannot read literal64, inst bytes left " +
1708 Twine(Bytes.size()));
1709 }
1710 HasLiteral = true;
1711 Literal = eatBytes<uint64_t>(Bytes);
1712 }
1713
1714 bool UseLit64 = Hi_32(Literal) == 0;
1716 LitModifier::Lit64, Literal, getContext()))
1717 : MCOperand::createImm(Literal);
1718}
1719
1721 using namespace AMDGPU::EncValues;
1722
1723 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1724 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1725 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1726 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1727 // Cast prevents negative overflow.
1728}
1729
1730static int64_t getInlineImmVal32(unsigned Imm) {
1731 switch (Imm) {
1732 case 240:
1733 return llvm::bit_cast<uint32_t>(0.5f);
1734 case 241:
1735 return llvm::bit_cast<uint32_t>(-0.5f);
1736 case 242:
1737 return llvm::bit_cast<uint32_t>(1.0f);
1738 case 243:
1739 return llvm::bit_cast<uint32_t>(-1.0f);
1740 case 244:
1741 return llvm::bit_cast<uint32_t>(2.0f);
1742 case 245:
1743 return llvm::bit_cast<uint32_t>(-2.0f);
1744 case 246:
1745 return llvm::bit_cast<uint32_t>(4.0f);
1746 case 247:
1747 return llvm::bit_cast<uint32_t>(-4.0f);
1748 case 248: // 1 / (2 * PI)
1749 return 0x3e22f983;
1750 default:
1751 llvm_unreachable("invalid fp inline imm");
1752 }
1753}
1754
1755static int64_t getInlineImmVal64(unsigned Imm) {
1756 switch (Imm) {
1757 case 240:
1758 return llvm::bit_cast<uint64_t>(0.5);
1759 case 241:
1760 return llvm::bit_cast<uint64_t>(-0.5);
1761 case 242:
1762 return llvm::bit_cast<uint64_t>(1.0);
1763 case 243:
1764 return llvm::bit_cast<uint64_t>(-1.0);
1765 case 244:
1766 return llvm::bit_cast<uint64_t>(2.0);
1767 case 245:
1768 return llvm::bit_cast<uint64_t>(-2.0);
1769 case 246:
1770 return llvm::bit_cast<uint64_t>(4.0);
1771 case 247:
1772 return llvm::bit_cast<uint64_t>(-4.0);
1773 case 248: // 1 / (2 * PI)
1774 return 0x3fc45f306dc9c882;
1775 default:
1776 llvm_unreachable("invalid fp inline imm");
1777 }
1778}
1779
1780static int64_t getInlineImmValF16(unsigned Imm) {
1781 switch (Imm) {
1782 case 240:
1783 return 0x3800;
1784 case 241:
1785 return 0xB800;
1786 case 242:
1787 return 0x3C00;
1788 case 243:
1789 return 0xBC00;
1790 case 244:
1791 return 0x4000;
1792 case 245:
1793 return 0xC000;
1794 case 246:
1795 return 0x4400;
1796 case 247:
1797 return 0xC400;
1798 case 248: // 1 / (2 * PI)
1799 return 0x3118;
1800 default:
1801 llvm_unreachable("invalid fp inline imm");
1802 }
1803}
1804
1805static int64_t getInlineImmValBF16(unsigned Imm) {
1806 switch (Imm) {
1807 case 240:
1808 return 0x3F00;
1809 case 241:
1810 return 0xBF00;
1811 case 242:
1812 return 0x3F80;
1813 case 243:
1814 return 0xBF80;
1815 case 244:
1816 return 0x4000;
1817 case 245:
1818 return 0xC000;
1819 case 246:
1820 return 0x4080;
1821 case 247:
1822 return 0xC080;
1823 case 248: // 1 / (2 * PI)
1824 return 0x3E22;
1825 default:
1826 llvm_unreachable("invalid fp inline imm");
1827 }
1828}
1829
1830unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1831 using namespace AMDGPU;
1832
1833 switch (Width) {
1834 case 16:
1835 case 32:
1836 return VGPR_32RegClassID;
1837 case 64:
1838 return VReg_64RegClassID;
1839 case 96:
1840 return VReg_96RegClassID;
1841 case 128:
1842 return VReg_128RegClassID;
1843 case 160:
1844 return VReg_160RegClassID;
1845 case 192:
1846 return VReg_192RegClassID;
1847 case 256:
1848 return VReg_256RegClassID;
1849 case 288:
1850 return VReg_288RegClassID;
1851 case 320:
1852 return VReg_320RegClassID;
1853 case 352:
1854 return VReg_352RegClassID;
1855 case 384:
1856 return VReg_384RegClassID;
1857 case 512:
1858 return VReg_512RegClassID;
1859 case 1024:
1860 return VReg_1024RegClassID;
1861 }
1862 llvm_unreachable("Invalid register width!");
1863}
1864
1865unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1866 using namespace AMDGPU;
1867
1868 switch (Width) {
1869 case 16:
1870 case 32:
1871 return AGPR_32RegClassID;
1872 case 64:
1873 return AReg_64RegClassID;
1874 case 96:
1875 return AReg_96RegClassID;
1876 case 128:
1877 return AReg_128RegClassID;
1878 case 160:
1879 return AReg_160RegClassID;
1880 case 256:
1881 return AReg_256RegClassID;
1882 case 288:
1883 return AReg_288RegClassID;
1884 case 320:
1885 return AReg_320RegClassID;
1886 case 352:
1887 return AReg_352RegClassID;
1888 case 384:
1889 return AReg_384RegClassID;
1890 case 512:
1891 return AReg_512RegClassID;
1892 case 1024:
1893 return AReg_1024RegClassID;
1894 }
1895 llvm_unreachable("Invalid register width!");
1896}
1897
1898unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1899 using namespace AMDGPU;
1900
1901 switch (Width) {
1902 case 16:
1903 case 32:
1904 return SGPR_32RegClassID;
1905 case 64:
1906 return SGPR_64RegClassID;
1907 case 96:
1908 return SGPR_96RegClassID;
1909 case 128:
1910 return SGPR_128RegClassID;
1911 case 160:
1912 return SGPR_160RegClassID;
1913 case 256:
1914 return SGPR_256RegClassID;
1915 case 288:
1916 return SGPR_288RegClassID;
1917 case 320:
1918 return SGPR_320RegClassID;
1919 case 352:
1920 return SGPR_352RegClassID;
1921 case 384:
1922 return SGPR_384RegClassID;
1923 case 512:
1924 return SGPR_512RegClassID;
1925 }
1926 llvm_unreachable("Invalid register width!");
1927}
1928
1929unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1930 using namespace AMDGPU;
1931
1932 switch (Width) {
1933 case 16:
1934 case 32:
1935 return TTMP_32RegClassID;
1936 case 64:
1937 return TTMP_64RegClassID;
1938 case 128:
1939 return TTMP_128RegClassID;
1940 case 256:
1941 return TTMP_256RegClassID;
1942 case 288:
1943 return TTMP_288RegClassID;
1944 case 320:
1945 return TTMP_320RegClassID;
1946 case 352:
1947 return TTMP_352RegClassID;
1948 case 384:
1949 return TTMP_384RegClassID;
1950 case 512:
1951 return TTMP_512RegClassID;
1952 }
1953 llvm_unreachable("Invalid register width!");
1954}
1955
1956int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1957 using namespace AMDGPU::EncValues;
1958
1959 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1960 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1961
1962 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1963}
1964
1966 unsigned Val) const {
1967 using namespace AMDGPU::EncValues;
1968
1969 assert(Val < 1024); // enum10
1970
1971 bool IsAGPR = Val & 512;
1972 Val &= 511;
1973
1974 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1975 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1976 : getVgprClassId(Width), Val - VGPR_MIN);
1977 }
1978 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1979}
1980
1982 unsigned Width,
1983 unsigned Val) const {
1984 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1985 // decoded earlier.
1986 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1987 using namespace AMDGPU::EncValues;
1988
1989 if (Val <= SGPR_MAX) {
1990 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1991 static_assert(SGPR_MIN == 0);
1992 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1993 }
1994
1995 int TTmpIdx = getTTmpIdx(Val);
1996 if (TTmpIdx >= 0) {
1997 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1998 }
1999
2000 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2001 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2002 Val == LITERAL_CONST)
2003 return MCOperand::createImm(Val);
2004
2005 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
2006 return decodeLiteral64Constant();
2007 }
2008
2009 switch (Width) {
2010 case 32:
2011 case 16:
2012 return decodeSpecialReg32(Val);
2013 case 64:
2014 return decodeSpecialReg64(Val);
2015 case 96:
2016 case 128:
2017 case 256:
2018 case 512:
2019 return decodeSpecialReg96Plus(Val);
2020 default:
2021 llvm_unreachable("unexpected immediate type");
2022 }
2023}
2024
2025// Bit 0 of DstY isn't stored in the instruction, because it's always the
2026// opposite of bit 0 of DstX.
2028 unsigned Val) const {
2029 int VDstXInd =
2030 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
2031 assert(VDstXInd != -1);
2032 assert(Inst.getOperand(VDstXInd).isReg());
2033 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
2034 Val |= ~XDstReg & 1;
2035 return createRegOperand(getVgprClassId(32), Val);
2036}
2037
2039 using namespace AMDGPU;
2040
2041 switch (Val) {
2042 // clang-format off
2043 case 102: return createRegOperand(FLAT_SCR_LO);
2044 case 103: return createRegOperand(FLAT_SCR_HI);
2045 case 104: return createRegOperand(XNACK_MASK_LO);
2046 case 105: return createRegOperand(XNACK_MASK_HI);
2047 case 106: return createRegOperand(VCC_LO);
2048 case 107: return createRegOperand(VCC_HI);
2049 case 108: return createRegOperand(TBA_LO);
2050 case 109: return createRegOperand(TBA_HI);
2051 case 110: return createRegOperand(TMA_LO);
2052 case 111: return createRegOperand(TMA_HI);
2053 case 124:
2054 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2055 case 125:
2056 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2057 case 126: return createRegOperand(EXEC_LO);
2058 case 127: return createRegOperand(EXEC_HI);
2059 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2060 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2061 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2062 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2063 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2064 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2065 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2066 case 251: return createRegOperand(SRC_VCCZ);
2067 case 252: return createRegOperand(SRC_EXECZ);
2068 case 253: return createRegOperand(SRC_SCC);
2069 case 254: return createRegOperand(LDS_DIRECT);
2070 default: break;
2071 // clang-format on
2072 }
2073 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2074}
2075
2077 using namespace AMDGPU;
2078
2079 switch (Val) {
2080 case 102: return createRegOperand(FLAT_SCR);
2081 case 104: return createRegOperand(XNACK_MASK);
2082 case 106: return createRegOperand(VCC);
2083 case 108: return createRegOperand(TBA);
2084 case 110: return createRegOperand(TMA);
2085 case 124:
2086 if (isGFX11Plus())
2087 return createRegOperand(SGPR_NULL);
2088 break;
2089 case 125:
2090 if (!isGFX11Plus())
2091 return createRegOperand(SGPR_NULL);
2092 break;
2093 case 126: return createRegOperand(EXEC);
2094 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2095 case 235: return createRegOperand(SRC_SHARED_BASE);
2096 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2097 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2098 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2099 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2100 case 251: return createRegOperand(SRC_VCCZ);
2101 case 252: return createRegOperand(SRC_EXECZ);
2102 case 253: return createRegOperand(SRC_SCC);
2103 default: break;
2104 }
2105 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2106}
2107
2109 using namespace AMDGPU;
2110
2111 switch (Val) {
2112 case 124:
2113 if (isGFX11Plus())
2114 return createRegOperand(SGPR_NULL);
2115 break;
2116 case 125:
2117 if (!isGFX11Plus())
2118 return createRegOperand(SGPR_NULL);
2119 break;
2120 default:
2121 break;
2122 }
2123 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2124}
2125
2127 const unsigned Val) const {
2128 using namespace AMDGPU::SDWA;
2129 using namespace AMDGPU::EncValues;
2130
2131 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2132 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2133 // XXX: cast to int is needed to avoid stupid warning:
2134 // compare with unsigned is always true
2135 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2136 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2137 return createRegOperand(getVgprClassId(Width),
2138 Val - SDWA9EncValues::SRC_VGPR_MIN);
2139 }
2140 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2141 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2142 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2143 return createSRegOperand(getSgprClassId(Width),
2144 Val - SDWA9EncValues::SRC_SGPR_MIN);
2145 }
2146 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2147 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2148 return createSRegOperand(getTtmpClassId(Width),
2149 Val - SDWA9EncValues::SRC_TTMP_MIN);
2150 }
2151
2152 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2153
2154 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2155 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2156 return MCOperand::createImm(SVal);
2157
2158 return decodeSpecialReg32(SVal);
2159 }
2160 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2161 return createRegOperand(getVgprClassId(Width), Val);
2162 llvm_unreachable("unsupported target");
2163}
2164
2166 return decodeSDWASrc(16, Val);
2167}
2168
2170 return decodeSDWASrc(32, Val);
2171}
2172
2174 using namespace AMDGPU::SDWA;
2175
2176 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2177 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2178 "SDWAVopcDst should be present only on GFX9+");
2179
2180 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2181
2182 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2183 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2184
2185 int TTmpIdx = getTTmpIdx(Val);
2186 if (TTmpIdx >= 0) {
2187 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2188 return createSRegOperand(TTmpClsId, TTmpIdx);
2189 }
2190 if (Val > SGPR_MAX) {
2191 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2192 }
2193 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2194 }
2195 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2196}
2197
2199 unsigned Val) const {
2200 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2201 ? decodeSrcOp(Inst, 32, Val)
2202 : decodeSrcOp(Inst, 64, Val);
2203}
2204
2206 unsigned Val) const {
2207 return decodeSrcOp(Inst, 32, Val);
2208}
2209
2212 return MCOperand();
2213 return MCOperand::createImm(Val);
2214}
2215
2217 using VersionField = AMDGPU::EncodingField<7, 0>;
2218 using W64Bit = AMDGPU::EncodingBit<13>;
2219 using W32Bit = AMDGPU::EncodingBit<14>;
2220 using MDPBit = AMDGPU::EncodingBit<15>;
2222
2223 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2224
2225 // Decode into a plain immediate if any unused bits are raised.
2226 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2227 return MCOperand::createImm(Imm);
2228
2229 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2230 const auto *I = find_if(
2231 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2232 return V.Code == Version;
2233 });
2234 MCContext &Ctx = getContext();
2235 const MCExpr *E;
2236 if (I == Versions.end())
2238 else
2239 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2240
2241 if (W64)
2242 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2243 if (W32)
2244 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2245 if (MDP)
2246 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2247
2248 return MCOperand::createExpr(E);
2249}
2250
2252 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2253}
2254
2256
2258 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2259}
2260
2262
2264
2268
2270 return STI.hasFeature(AMDGPU::FeatureGFX11);
2271}
2272
2276
2278
2280 return STI.hasFeature(AMDGPU::FeatureGFX12);
2281}
2282
2286
2288
2292
2294
2298
2300 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2301}
2302
2306
2307//===----------------------------------------------------------------------===//
2308// AMDGPU specific symbol handling
2309//===----------------------------------------------------------------------===//
2310
2311/// Print a string describing the reserved bit range specified by Mask with
2312/// offset BaseBytes for use in error comments. Mask is a single continuous
2313/// range of 1s surrounded by zeros. The format here is meant to align with the
2314/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2315static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2316 SmallString<32> Result;
2317 raw_svector_ostream S(Result);
2318
2319 int TrailingZeros = llvm::countr_zero(Mask);
2320 int PopCount = llvm::popcount(Mask);
2321
2322 if (PopCount == 1) {
2323 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2324 } else {
2325 S << "bits in range ("
2326 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2327 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2328 }
2329
2330 return Result;
2331}
2332
2333#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2334#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2335 do { \
2336 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2337 } while (0)
2338#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2339 do { \
2340 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2341 << GET_FIELD(MASK) << '\n'; \
2342 } while (0)
2343
2344#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2345 do { \
2346 if (FourByteBuffer & (MASK)) { \
2347 return createStringError(std::errc::invalid_argument, \
2348 "kernel descriptor " DESC \
2349 " reserved %s set" MSG, \
2350 getBitRangeFromMask((MASK), 0).c_str()); \
2351 } \
2352 } while (0)
2353
2354#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2355#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2356 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2357#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2358 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2359#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2360 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2361
2362// NOLINTNEXTLINE(readability-identifier-naming)
2364 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2365 using namespace amdhsa;
2366 StringRef Indent = "\t";
2367
2368 // We cannot accurately backward compute #VGPRs used from
2369 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2370 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2371 // simply calculate the inverse of what the assembler does.
2372
2373 uint32_t GranulatedWorkitemVGPRCount =
2374 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2375
2376 uint32_t NextFreeVGPR =
2377 (GranulatedWorkitemVGPRCount + 1) *
2378 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2379
2380 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2381
2382 // We cannot backward compute values used to calculate
2383 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2384 // directives can't be computed:
2385 // .amdhsa_reserve_vcc
2386 // .amdhsa_reserve_flat_scratch
2387 // .amdhsa_reserve_xnack_mask
2388 // They take their respective default values if not specified in the assembly.
2389 //
2390 // GRANULATED_WAVEFRONT_SGPR_COUNT
2391 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2392 //
2393 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2394 // are set to 0. So while disassembling we consider that:
2395 //
2396 // GRANULATED_WAVEFRONT_SGPR_COUNT
2397 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2398 //
2399 // The disassembler cannot recover the original values of those 3 directives.
2400
2401 uint32_t GranulatedWavefrontSGPRCount =
2402 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2403
2404 if (isGFX10Plus())
2405 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2406 "must be zero on gfx10+");
2407
2408 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2410
2411 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2413 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2414 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2415 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2416 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2417 << '\n';
2418 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2419
2420 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2421
2422 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2423 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2424 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2425 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2426 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2427 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2428 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2429 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2430
2431 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2432
2433 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2434 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2435 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2436
2437 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2438
2439 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2440 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2441 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2442
2443 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2444 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2445
2446 // Bits [26].
2447 if (isGFX9Plus()) {
2448 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2449 } else {
2450 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2451 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2452 }
2453
2454 // Bits [27].
2455 if (isGFX1250Plus()) {
2456 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2457 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2458 } else {
2459 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2460 "COMPUTE_PGM_RSRC1");
2461 }
2462
2463 // Bits [28].
2464 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2465
2466 // Bits [29-31].
2467 if (isGFX10Plus()) {
2468 // WGP_MODE is not available on GFX1250.
2469 if (!isGFX1250Plus()) {
2470 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2471 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2472 }
2473 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2474 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2475 } else {
2476 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2477 "COMPUTE_PGM_RSRC1");
2478 }
2479
2480 if (isGFX12Plus())
2481 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2482 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2483
2484 return true;
2485}
2486
2487// NOLINTNEXTLINE(readability-identifier-naming)
2489 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2490 using namespace amdhsa;
2491 StringRef Indent = "\t";
2493 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2494 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2495 else
2496 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2497 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2498 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2499 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2500 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2501 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2502 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2503 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2504 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2505 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2506 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2507 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2508
2509 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2510 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2511 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2512
2514 ".amdhsa_exception_fp_ieee_invalid_op",
2515 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2516 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2517 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2519 ".amdhsa_exception_fp_ieee_div_zero",
2520 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2521 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2522 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2523 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2524 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2525 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2526 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2527 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2528 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2529
2530 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2531
2532 return true;
2533}
2534
2535// NOLINTNEXTLINE(readability-identifier-naming)
2537 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2538 using namespace amdhsa;
2539 StringRef Indent = "\t";
2540 if (isGFX90A()) {
2541 KdStream << Indent << ".amdhsa_accum_offset "
2542 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2543 << '\n';
2544
2545 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2546
2547 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2548 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2549 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2550 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2551 } else if (isGFX10Plus()) {
2552 // Bits [0-3].
2553 if (!isGFX12Plus()) {
2554 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2555 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2556 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2557 } else {
2559 "SHARED_VGPR_COUNT",
2560 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2561 }
2562 } else {
2563 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2564 "COMPUTE_PGM_RSRC3",
2565 "must be zero on gfx12+");
2566 }
2567
2568 // Bits [4-11].
2569 if (isGFX11()) {
2570 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2571 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2572 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2573 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2574 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2575 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2576 } else if (isGFX12Plus()) {
2577 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2578 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2579 } else {
2580 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2581 "COMPUTE_PGM_RSRC3",
2582 "must be zero on gfx10");
2583 }
2584
2585 // Bits [12].
2586 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2587 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2588
2589 // Bits [13].
2590 if (isGFX12Plus()) {
2592 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2593 } else {
2594 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2595 "COMPUTE_PGM_RSRC3",
2596 "must be zero on gfx10 or gfx11");
2597 }
2598
2599 // Bits [14-21].
2600 if (isGFX1250Plus()) {
2601 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2602 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2604 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2606 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2608 "ENABLE_DIDT_THROTTLE",
2609 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2610 } else {
2611 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2612 "COMPUTE_PGM_RSRC3",
2613 "must be zero on gfx10+");
2614 }
2615
2616 // Bits [22-30].
2617 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2618 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2619
2620 // Bits [31].
2621 if (isGFX11Plus()) {
2623 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2624 } else {
2625 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2626 "COMPUTE_PGM_RSRC3",
2627 "must be zero on gfx10");
2628 }
2629 } else if (FourByteBuffer) {
2630 return createStringError(
2631 std::errc::invalid_argument,
2632 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2633 }
2634 return true;
2635}
2636#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2637#undef PRINT_DIRECTIVE
2638#undef GET_FIELD
2639#undef CHECK_RESERVED_BITS_IMPL
2640#undef CHECK_RESERVED_BITS
2641#undef CHECK_RESERVED_BITS_MSG
2642#undef CHECK_RESERVED_BITS_DESC
2643#undef CHECK_RESERVED_BITS_DESC_MSG
2644
2645/// Create an error object to return from onSymbolStart for reserved kernel
2646/// descriptor bits being set.
2647static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2648 const char *Msg = "") {
2649 return createStringError(
2650 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2651 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2652}
2653
2654/// Create an error object to return from onSymbolStart for reserved kernel
2655/// descriptor bytes being set.
2656static Error createReservedKDBytesError(unsigned BaseInBytes,
2657 unsigned WidthInBytes) {
2658 // Create an error comment in the same format as the "Kernel Descriptor"
2659 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2660 return createStringError(
2661 std::errc::invalid_argument,
2662 "kernel descriptor reserved bits in range (%u:%u) set",
2663 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2664}
2665
2668 raw_string_ostream &KdStream) const {
2669#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2670 do { \
2671 KdStream << Indent << DIRECTIVE " " \
2672 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2673 } while (0)
2674
2675 uint16_t TwoByteBuffer = 0;
2676 uint32_t FourByteBuffer = 0;
2677
2678 StringRef ReservedBytes;
2679 StringRef Indent = "\t";
2680
2681 assert(Bytes.size() == 64);
2682 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2683
2684 switch (Cursor.tell()) {
2686 FourByteBuffer = DE.getU32(Cursor);
2687 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2688 << '\n';
2689 return true;
2690
2692 FourByteBuffer = DE.getU32(Cursor);
2693 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2694 << FourByteBuffer << '\n';
2695 return true;
2696
2698 FourByteBuffer = DE.getU32(Cursor);
2699 KdStream << Indent << ".amdhsa_kernarg_size "
2700 << FourByteBuffer << '\n';
2701 return true;
2702
2704 // 4 reserved bytes, must be 0.
2705 ReservedBytes = DE.getBytes(Cursor, 4);
2706 for (char B : ReservedBytes) {
2707 if (B != 0)
2709 }
2710 return true;
2711
2713 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2714 // So far no directive controls this for Code Object V3, so simply skip for
2715 // disassembly.
2716 DE.skip(Cursor, 8);
2717 return true;
2718
2720 // 20 reserved bytes, must be 0.
2721 ReservedBytes = DE.getBytes(Cursor, 20);
2722 for (char B : ReservedBytes) {
2723 if (B != 0)
2725 }
2726 return true;
2727
2729 FourByteBuffer = DE.getU32(Cursor);
2730 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2731
2733 FourByteBuffer = DE.getU32(Cursor);
2734 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2735
2737 FourByteBuffer = DE.getU32(Cursor);
2738 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2739
2741 using namespace amdhsa;
2742 TwoByteBuffer = DE.getU16(Cursor);
2743
2745 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2746 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2747 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2748 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2749 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2750 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2751 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2752 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2753 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2754 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2756 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2757 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2758 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2759 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2760
2761 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2762 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2764
2765 // Reserved for GFX9
2766 if (isGFX9() &&
2767 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2769 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2770 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2771 }
2772 if (isGFX10Plus()) {
2773 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2774 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2775 }
2776
2777 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2778 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2779 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2780
2781 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2782 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2784 }
2785
2786 return true;
2787
2789 using namespace amdhsa;
2790 TwoByteBuffer = DE.getU16(Cursor);
2791 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2792 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2793 KERNARG_PRELOAD_SPEC_LENGTH);
2794 }
2795
2796 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2797 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2798 KERNARG_PRELOAD_SPEC_OFFSET);
2799 }
2800 return true;
2801
2803 // 4 bytes from here are reserved, must be 0.
2804 ReservedBytes = DE.getBytes(Cursor, 4);
2805 for (char B : ReservedBytes) {
2806 if (B != 0)
2808 }
2809 return true;
2810
2811 default:
2812 llvm_unreachable("Unhandled index. Case statements cover everything.");
2813 return true;
2814 }
2815#undef PRINT_DIRECTIVE
2816}
2817
2819 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2820
2821 // CP microcode requires the kernel descriptor to be 64 aligned.
2822 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2823 return createStringError(std::errc::invalid_argument,
2824 "kernel descriptor must be 64-byte aligned");
2825
2826 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2827 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2828 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2829 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2830 // when required.
2831 if (isGFX10Plus()) {
2832 uint16_t KernelCodeProperties =
2835 EnableWavefrontSize32 =
2836 AMDHSA_BITS_GET(KernelCodeProperties,
2837 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2838 }
2839
2840 std::string Kd;
2841 raw_string_ostream KdStream(Kd);
2842 KdStream << ".amdhsa_kernel " << KdName << '\n';
2843
2845 while (C && C.tell() < Bytes.size()) {
2846 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2847
2848 cantFail(C.takeError());
2849
2850 if (!Res)
2851 return Res;
2852 }
2853 KdStream << ".end_amdhsa_kernel\n";
2854 outs() << KdStream.str();
2855 return true;
2856}
2857
2859 uint64_t &Size,
2860 ArrayRef<uint8_t> Bytes,
2861 uint64_t Address) const {
2862 // Right now only kernel descriptor needs to be handled.
2863 // We ignore all other symbols for target specific handling.
2864 // TODO:
2865 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2866 // Object V2 and V3 when symbols are marked protected.
2867
2868 // amd_kernel_code_t for Code Object V2.
2869 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2870 Size = 256;
2871 return createStringError(std::errc::invalid_argument,
2872 "code object v2 is not supported");
2873 }
2874
2875 // Code Object V3 kernel descriptors.
2876 StringRef Name = Symbol.Name;
2877 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2878 Size = 64; // Size = 64 regardless of success or failure.
2879 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2880 }
2881
2882 return false;
2883}
2884
2885const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2886 int64_t Val) {
2887 MCContext &Ctx = getContext();
2888 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2889 // Note: only set value to Val on a new symbol in case an dissassembler
2890 // has already been initialized in this context.
2891 if (!Sym->isVariable()) {
2893 } else {
2894 int64_t Res = ~Val;
2895 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2896 if (!Valid || Res != Val)
2897 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2898 }
2899 return MCSymbolRefExpr::create(Sym, Ctx);
2900}
2901
2903 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2904
2905 // Check for MUBUF and MTBUF instructions
2906 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2907 return true;
2908
2909 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2910 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2911 return true;
2912
2913 return false;
2914}
2915
2916//===----------------------------------------------------------------------===//
2917// AMDGPUSymbolizer
2918//===----------------------------------------------------------------------===//
2919
2920// Try to find symbol name for specified label
2922 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2923 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2924 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2925
2926 if (!IsBranch) {
2927 return false;
2928 }
2929
2930 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2931 if (!Symbols)
2932 return false;
2933
2934 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2935 return Val.Addr == static_cast<uint64_t>(Value) &&
2936 Val.Type == ELF::STT_NOTYPE;
2937 });
2938 if (Result != Symbols->end()) {
2939 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2940 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2942 return true;
2943 }
2944 // Add to list of referenced addresses, so caller can synthesize a label.
2945 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2946 return false;
2947}
2948
2950 int64_t Value,
2951 uint64_t Address) {
2952 llvm_unreachable("unimplemented");
2953}
2954
2955//===----------------------------------------------------------------------===//
2956// Initialization
2957//===----------------------------------------------------------------------===//
2958
2960 LLVMOpInfoCallback /*GetOpInfo*/,
2961 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2962 void *DisInfo,
2963 MCContext *Ctx,
2964 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2965 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2966}
2967
2969 const MCSubtargetInfo &STI,
2970 MCContext &Ctx) {
2971 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2972}
2973
2974extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
const T * data() const
Definition ArrayRef.h:139
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1170(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1418
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1432
@ STT_OBJECT
Definition ELF.h:1419
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.