LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
533 Imm = getInlineImmValF16(Imm);
534 break;
537 Imm = getInlineImmValF16(Imm);
538 break;
540 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
541 // halves, so we need to produce the duplicated value for correct
542 // round-trip.
543 if (isGFX11Plus()) {
544 int64_t F16Val = getInlineImmValF16(Imm);
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
546 } else {
547 Imm = getInlineImmValF16(Imm);
548 }
549 break;
550 }
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
567 ArrayRef<uint8_t> Bytes_,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
571 Bytes = Bytes_.slice(0, MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min((size_t)4, Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250Plus() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
586 break;
587 Bytes = Bytes_.slice(0, MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX11() &&
594 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
595 DecW, Address, CS))
596 break;
597
598 if (isGFX1250() &&
599 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
600 DecW, Address, CS))
601 break;
602
603 if (isGFX12() &&
604 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
605 DecW, Address, CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
610 break;
611
612 if (isGFX13() &&
613 tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
614 DecW, Address, CS))
615 break;
616
617 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
618 // Return 8 bytes for a potential literal.
619 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
620
621 if (isGFX1250() &&
622 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
623 break;
624 }
625
626 // Reinitialize Bytes
627 Bytes = Bytes_.slice(0, MaxInstBytesNum);
628
629 } else if (Bytes.size() >= 16 &&
630 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
631 std::bitset<128> DecW = eat16Bytes(Bytes);
632 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
633 break;
634
635 // Reinitialize Bytes
636 Bytes = Bytes_.slice(0, MaxInstBytesNum);
637 }
638
639 if (Bytes.size() >= 8) {
640 const uint64_t QW = eatBytes<uint64_t>(Bytes);
641
642 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
643 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
644 break;
645
646 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
647 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
648 break;
649
650 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
651 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
652 break;
653
654 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
655 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
656 // table first so we print the correct name.
657 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
658 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
659 break;
660
661 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
662 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
663 break;
664
665 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
666 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
667 break;
668
669 if ((isVI() || isGFX9()) &&
670 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
671 break;
672
673 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
674 break;
675
676 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
677 break;
678
679 if (isGFX1250() &&
680 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
681 QW, Address, CS))
682 break;
683
684 if (isGFX12() &&
685 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
686 Address, CS))
687 break;
688
689 if (isGFX11() &&
690 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
691 Address, CS))
692 break;
693
694 if (isGFX11() &&
695 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
696 break;
697
698 if (isGFX12() &&
699 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
700 break;
701
702 if (isGFX13() &&
703 tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
704 Address, CS))
705 break;
706
707 // Reinitialize Bytes
708 Bytes = Bytes_.slice(0, MaxInstBytesNum);
709 }
710
711 // Try decode 32-bit instruction
712 if (Bytes.size() >= 4) {
713 const uint32_t DW = eatBytes<uint32_t>(Bytes);
714
715 if ((isVI() || isGFX9()) &&
716 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
717 break;
718
719 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
720 break;
721
722 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
723 break;
724
725 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
726 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
727 break;
728
729 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
730 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
731 break;
732
733 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
734 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
735 break;
736
737 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
738 break;
739
740 if (isGFX11() &&
741 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
742 Address, CS))
743 break;
744
745 if (isGFX1250() &&
746 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
747 DW, Address, CS))
748 break;
749
750 if (isGFX12() &&
751 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
752 Address, CS))
753 break;
754
755 if (isGFX13() &&
756 tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
757 Address, CS))
758 break;
759 }
760
762 } while (false);
763
765
766 decodeImmOperands(MI, *MCII);
767
768 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
769 if (isMacDPP(MI))
771
772 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
774 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
775 convertVOPCDPPInst(MI); // Special VOP3 case
776 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
777 convertVOPC64DPPInst(MI); // Special VOP3 case
778 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
779 -1)
781 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
782 convertVOP3DPPInst(MI); // Regular VOP3 case
783 }
784
786
787 if (AMDGPU::isMAC(MI.getOpcode())) {
788 // Insert dummy unused src2_modifiers.
790 AMDGPU::OpName::src2_modifiers);
791 }
792
793 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
794 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
795 // Insert dummy unused src2_modifiers.
797 AMDGPU::OpName::src2_modifiers);
798 }
799
800 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
802 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
803 }
804
805 if (MCII->get(MI.getOpcode()).TSFlags &
807 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
808 AMDGPU::OpName::cpol);
809 if (CPolPos != -1) {
810 unsigned CPol =
811 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
813 if (MI.getNumOperands() <= (unsigned)CPolPos) {
815 AMDGPU::OpName::cpol);
816 } else if (CPol) {
817 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
818 }
819 }
820 }
821
822 if ((MCII->get(MI.getOpcode()).TSFlags &
824 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
825 // GFX90A lost TFE, its place is occupied by ACC.
826 int TFEOpIdx =
827 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
828 if (TFEOpIdx != -1) {
829 auto *TFEIter = MI.begin();
830 std::advance(TFEIter, TFEOpIdx);
831 MI.insert(TFEIter, MCOperand::createImm(0));
832 }
833 }
834
835 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
837 int OffsetIdx =
838 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
839 if (OffsetIdx != -1) {
840 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
841 int64_t SignedOffset = SignExtend64<24>(Imm);
842 if (SignedOffset < 0)
844 }
845 }
846
847 if (MCII->get(MI.getOpcode()).TSFlags &
849 int SWZOpIdx =
850 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
851 if (SWZOpIdx != -1) {
852 auto *SWZIter = MI.begin();
853 std::advance(SWZIter, SWZOpIdx);
854 MI.insert(SWZIter, MCOperand::createImm(0));
855 }
856 }
857
858 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
859 if (Desc.TSFlags & SIInstrFlags::MIMG) {
860 int VAddr0Idx =
861 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
862 int RsrcIdx =
863 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
864 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
865 if (VAddr0Idx >= 0 && NSAArgs > 0) {
866 unsigned NSAWords = (NSAArgs + 3) / 4;
867 if (Bytes.size() < 4 * NSAWords)
869 for (unsigned i = 0; i < NSAArgs; ++i) {
870 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
871 auto VAddrRCID =
872 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
873 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
874 }
875 Bytes = Bytes.slice(4 * NSAWords);
876 }
877
879 }
880
881 if (MCII->get(MI.getOpcode()).TSFlags &
884
885 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
887
888 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
890
891 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
893
894 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
896
897 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
899
900 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
901 AMDGPU::OpName::vdst_in);
902 if (VDstIn_Idx != -1) {
903 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
905 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
906 !MI.getOperand(VDstIn_Idx).isReg() ||
907 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
908 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
909 MI.erase(&MI.getOperand(VDstIn_Idx));
911 MCOperand::createReg(MI.getOperand(Tied).getReg()),
912 AMDGPU::OpName::vdst_in);
913 }
914 }
915
916 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
917 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
919
920 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
921 // have EXEC as implicit destination. Issue a warning if encoding for
922 // vdst is not EXEC.
923 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
924 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
925 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
926 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
927 if (Bytes_[0] != ExecEncoding)
929 }
930
931 Size = MaxInstBytesNum - Bytes.size();
932 return Status;
933}
934
936 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
937 // The MCInst still has these fields even though they are no longer encoded
938 // in the GFX11 instruction.
939 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
940 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
941 }
942}
943
946 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
947 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
948 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
949 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
950 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
951 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
952 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
953 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
954 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
955 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
956 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
957 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
958 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
959 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
960 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
961 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
962 // The MCInst has this field that is not directly encoded in the
963 // instruction.
964 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
965 }
966}
967
969 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
970 STI.hasFeature(AMDGPU::FeatureGFX10)) {
971 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
972 // VOPC - insert clamp
973 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
974 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
975 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
976 if (SDst != -1) {
977 // VOPC - insert VCC register as sdst
979 AMDGPU::OpName::sdst);
980 } else {
981 // VOP1/2 - insert omod if present in instruction
982 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
983 }
984 }
985}
986
987/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
988/// appropriate subregister for the used format width.
990 MCOperand &MO, uint8_t NumRegs) {
991 switch (NumRegs) {
992 case 4:
993 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
994 case 6:
995 return MO.setReg(
996 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
997 case 8:
998 if (MCRegister NewReg = MRI.getSubReg(
999 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1000 MO.setReg(NewReg);
1001 }
1002 return;
1003 case 12: {
1004 // There is no 384-bit subreg index defined.
1005 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1006 MCRegister NewReg = MRI.getMatchingSuperReg(
1007 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1008 return MO.setReg(NewReg);
1009 }
1010 case 16:
1011 // No-op in cases where one operand is still f8/bf8.
1012 return;
1013 default:
1014 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1015 }
1016}
1017
1018/// f8f6f4 instructions have different pseudos depending on the used formats. In
1019/// the disassembler table, we only have the variants with the largest register
1020/// classes which assume using an fp8/bf8 format for both operands. The actual
1021/// register class depends on the format in blgp and cbsz operands. Adjust the
1022/// register classes depending on the used format.
1024 int BlgpIdx =
1025 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1026 if (BlgpIdx == -1)
1027 return;
1028
1029 int CbszIdx =
1030 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1031
1032 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1033 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1034
1035 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1036 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1037 if (!AdjustedRegClassOpcode ||
1038 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1039 return;
1040
1041 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1042 int Src0Idx =
1043 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1044 int Src1Idx =
1045 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1046 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1047 AdjustedRegClassOpcode->NumRegsSrcA);
1048 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1049 AdjustedRegClassOpcode->NumRegsSrcB);
1050}
1051
1053 int FmtAIdx =
1054 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1055 if (FmtAIdx == -1)
1056 return;
1057
1058 int FmtBIdx =
1059 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1060
1061 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1062 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1063
1064 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1065 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1066 if (!AdjustedRegClassOpcode ||
1067 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1068 return;
1069
1070 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1071 int Src0Idx =
1072 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1073 int Src1Idx =
1074 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1075 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1076 AdjustedRegClassOpcode->NumRegsSrcA);
1077 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1078 AdjustedRegClassOpcode->NumRegsSrcB);
1079}
1080
1082 unsigned OpSel = 0;
1083 unsigned OpSelHi = 0;
1084 unsigned NegLo = 0;
1085 unsigned NegHi = 0;
1086};
1087
1088// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1089// Note that these values do not affect disassembler output,
1090// so this is only necessary for consistency with src_modifiers.
1092 bool IsVOP3P = false) {
1093 VOPModifiers Modifiers;
1094 unsigned Opc = MI.getOpcode();
1095 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1096 AMDGPU::OpName::src1_modifiers,
1097 AMDGPU::OpName::src2_modifiers};
1098 for (int J = 0; J < 3; ++J) {
1099 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1100 if (OpIdx == -1)
1101 continue;
1102
1103 unsigned Val = MI.getOperand(OpIdx).getImm();
1104
1105 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1106 if (IsVOP3P) {
1107 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1108 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1109 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1110 } else if (J == 0) {
1111 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1112 }
1113 }
1114
1115 return Modifiers;
1116}
1117
1118// Instructions decode the op_sel/suffix bits into the src_modifier
1119// operands. Copy those bits into the src operands for true16 VGPRs.
1121 const unsigned Opc = MI.getOpcode();
1122 const MCRegisterClass &ConversionRC =
1123 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1124 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1125 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1127 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1129 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1131 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1133 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1134 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1135 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1136 if (OpIdx == -1 || OpModsIdx == -1)
1137 continue;
1138 MCOperand &Op = MI.getOperand(OpIdx);
1139 if (!Op.isReg())
1140 continue;
1141 if (!ConversionRC.contains(Op.getReg()))
1142 continue;
1143 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1144 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1145 unsigned ModVal = OpMods.getImm();
1146 if (ModVal & OpSelMask) { // isHi
1147 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1148 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1149 }
1150 }
1151}
1152
1153// MAC opcodes have special old and src2 operands.
1154// src2 is tied to dst, while old is not tied (but assumed to be).
1156 constexpr int DST_IDX = 0;
1157 auto Opcode = MI.getOpcode();
1158 const auto &Desc = MCII->get(Opcode);
1159 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1160
1161 if (OldIdx != -1 && Desc.getOperandConstraint(
1162 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1163 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1164 assert(Desc.getOperandConstraint(
1165 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1167 (void)DST_IDX;
1168 return true;
1169 }
1170
1171 return false;
1172}
1173
1174// Create dummy old operand and insert dummy unused src2_modifiers
1176 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1177 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1179 AMDGPU::OpName::src2_modifiers);
1180}
1181
1183 unsigned Opc = MI.getOpcode();
1184
1185 int VDstInIdx =
1186 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1187 if (VDstInIdx != -1)
1188 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1189
1190 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1191 if (MI.getNumOperands() < DescNumOps &&
1192 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1194 auto Mods = collectVOPModifiers(MI);
1196 AMDGPU::OpName::op_sel);
1197 } else {
1198 // Insert dummy unused src modifiers.
1199 if (MI.getNumOperands() < DescNumOps &&
1200 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1202 AMDGPU::OpName::src0_modifiers);
1203
1204 if (MI.getNumOperands() < DescNumOps &&
1205 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1207 AMDGPU::OpName::src1_modifiers);
1208 }
1209}
1210
1213
1214 int VDstInIdx =
1215 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1216 if (VDstInIdx != -1)
1217 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1218
1219 unsigned Opc = MI.getOpcode();
1220 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1221 if (MI.getNumOperands() < DescNumOps &&
1222 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1223 auto Mods = collectVOPModifiers(MI);
1225 AMDGPU::OpName::op_sel);
1226 }
1227}
1228
1229// Given a wide tuple \p Reg check if it will overflow 256 registers.
1230// \returns \p Reg on success or NoRegister otherwise.
1232 const MCRegisterInfo &MRI) {
1233 unsigned NumRegs = RC.getSizeInBits() / 32;
1234 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1235 if (!Sub0)
1236 return Reg;
1237
1238 MCRegister BaseReg;
1239 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1240 BaseReg = AMDGPU::VGPR0;
1241 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1242 BaseReg = AMDGPU::AGPR0;
1243
1244 assert(BaseReg && "Only vector registers expected");
1245
1246 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1247}
1248
1249// Note that before gfx10, the MIMG encoding provided no information about
1250// VADDR size. Consequently, decoded instructions always show address as if it
1251// has 1 dword, which could be not really so.
1253 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1254
1255 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1256 AMDGPU::OpName::vdst);
1257
1258 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1259 AMDGPU::OpName::vdata);
1260 int VAddr0Idx =
1261 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1262 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1263 ? AMDGPU::OpName::srsrc
1264 : AMDGPU::OpName::rsrc;
1265 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1266 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1267 AMDGPU::OpName::dmask);
1268
1269 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1270 AMDGPU::OpName::tfe);
1271 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1272 AMDGPU::OpName::d16);
1273
1274 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1275 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1276 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1277
1278 assert(VDataIdx != -1);
1279 if (BaseOpcode->BVH) {
1280 // Add A16 operand for intersect_ray instructions
1281 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1282 return;
1283 }
1284
1285 bool IsAtomic = (VDstIdx != -1);
1286 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1287 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1288 bool IsNSA = false;
1289 bool IsPartialNSA = false;
1290 unsigned AddrSize = Info->VAddrDwords;
1291
1292 if (isGFX10Plus()) {
1293 unsigned DimIdx =
1294 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1295 int A16Idx =
1296 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1297 const AMDGPU::MIMGDimInfo *Dim =
1298 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1299 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1300
1301 AddrSize =
1302 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1303
1304 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1305 // VIMAGE insts other than BVH never use vaddr4.
1306 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1307 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1308 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1309 if (!IsNSA) {
1310 if (!IsVSample && AddrSize > 12)
1311 AddrSize = 16;
1312 } else {
1313 if (AddrSize > Info->VAddrDwords) {
1314 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1315 // The NSA encoding does not contain enough operands for the
1316 // combination of base opcode / dimension. Should this be an error?
1317 return;
1318 }
1319 IsPartialNSA = true;
1320 }
1321 }
1322 }
1323
1324 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1325 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1326
1327 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1328 if (D16 && AMDGPU::hasPackedD16(STI)) {
1329 DstSize = (DstSize + 1) / 2;
1330 }
1331
1332 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1333 DstSize += 1;
1334
1335 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1336 return;
1337
1338 int NewOpcode =
1339 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1340 if (NewOpcode == -1)
1341 return;
1342
1343 // Widen the register to the correct number of enabled channels.
1344 MCRegister NewVdata;
1345 if (DstSize != Info->VDataDwords) {
1346 auto DataRCID = MCII->getOpRegClassID(
1347 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1348
1349 // Get first subregister of VData
1350 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1351 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1352 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1353
1354 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1355 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1356 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1357 if (!NewVdata) {
1358 // It's possible to encode this such that the low register + enabled
1359 // components exceeds the register count.
1360 return;
1361 }
1362 }
1363
1364 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1365 // If using partial NSA on GFX11+ widen last address register.
1366 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1367 MCRegister NewVAddrSA;
1368 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1369 AddrSize != Info->VAddrDwords) {
1370 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1371 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1372 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1373
1374 auto AddrRCID = MCII->getOpRegClassID(
1375 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1376
1377 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1378 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1379 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1380 if (!NewVAddrSA)
1381 return;
1382 }
1383
1384 MI.setOpcode(NewOpcode);
1385
1386 if (NewVdata != AMDGPU::NoRegister) {
1387 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1388
1389 if (IsAtomic) {
1390 // Atomic operations have an additional operand (a copy of data)
1391 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1392 }
1393 }
1394
1395 if (NewVAddrSA) {
1396 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1397 } else if (IsNSA) {
1398 assert(AddrSize <= Info->VAddrDwords);
1399 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1400 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1401 }
1402}
1403
1404// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1405// decoder only adds to src_modifiers, so manually add the bits to the other
1406// operands.
1408 unsigned Opc = MI.getOpcode();
1409 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1410 auto Mods = collectVOPModifiers(MI, true);
1411
1412 if (MI.getNumOperands() < DescNumOps &&
1413 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1414 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1415
1416 if (MI.getNumOperands() < DescNumOps &&
1417 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1419 AMDGPU::OpName::op_sel);
1420 if (MI.getNumOperands() < DescNumOps &&
1421 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1423 AMDGPU::OpName::op_sel_hi);
1424 if (MI.getNumOperands() < DescNumOps &&
1425 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1427 AMDGPU::OpName::neg_lo);
1428 if (MI.getNumOperands() < DescNumOps &&
1429 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1431 AMDGPU::OpName::neg_hi);
1432}
1433
1434// Create dummy old operand and insert optional operands
1436 unsigned Opc = MI.getOpcode();
1437 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1438
1439 if (MI.getNumOperands() < DescNumOps &&
1440 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1441 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1442
1443 if (MI.getNumOperands() < DescNumOps &&
1444 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1446 AMDGPU::OpName::src0_modifiers);
1447
1448 if (MI.getNumOperands() < DescNumOps &&
1449 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1451 AMDGPU::OpName::src1_modifiers);
1452}
1453
1455 unsigned Opc = MI.getOpcode();
1456 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1457
1459
1460 if (MI.getNumOperands() < DescNumOps &&
1461 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1464 AMDGPU::OpName::op_sel);
1465 }
1466}
1467
1469 assert(HasLiteral && "Should have decoded a literal");
1470 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1471}
1472
1473const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1474 return getContext().getRegisterInfo()->
1475 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1476}
1477
1478inline
1480 const Twine& ErrMsg) const {
1481 *CommentStream << "Error: " + ErrMsg;
1482
1483 // ToDo: add support for error operands to MCInst.h
1484 // return MCOperand::createError(V);
1485 return MCOperand();
1486}
1487
1491
1492inline
1494 unsigned Val) const {
1495 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1496 if (Val >= RegCl.getNumRegs())
1497 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1498 ": unknown register " + Twine(Val));
1499 return createRegOperand(RegCl.getRegister(Val));
1500}
1501
1502inline
1504 unsigned Val) const {
1505 // ToDo: SI/CI have 104 SGPRs, VI - 102
1506 // Valery: here we accepting as much as we can, let assembler sort it out
1507 int shift = 0;
1508 switch (SRegClassID) {
1509 case AMDGPU::SGPR_32RegClassID:
1510 case AMDGPU::TTMP_32RegClassID:
1511 break;
1512 case AMDGPU::SGPR_64RegClassID:
1513 case AMDGPU::TTMP_64RegClassID:
1514 shift = 1;
1515 break;
1516 case AMDGPU::SGPR_96RegClassID:
1517 case AMDGPU::TTMP_96RegClassID:
1518 case AMDGPU::SGPR_128RegClassID:
1519 case AMDGPU::TTMP_128RegClassID:
1520 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1521 // this bundle?
1522 case AMDGPU::SGPR_256RegClassID:
1523 case AMDGPU::TTMP_256RegClassID:
1524 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1525 // this bundle?
1526 case AMDGPU::SGPR_288RegClassID:
1527 case AMDGPU::TTMP_288RegClassID:
1528 case AMDGPU::SGPR_320RegClassID:
1529 case AMDGPU::TTMP_320RegClassID:
1530 case AMDGPU::SGPR_352RegClassID:
1531 case AMDGPU::TTMP_352RegClassID:
1532 case AMDGPU::SGPR_384RegClassID:
1533 case AMDGPU::TTMP_384RegClassID:
1534 case AMDGPU::SGPR_512RegClassID:
1535 case AMDGPU::TTMP_512RegClassID:
1536 shift = 2;
1537 break;
1538 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1539 // this bundle?
1540 default:
1541 llvm_unreachable("unhandled register class");
1542 }
1543
1544 if (Val % (1 << shift)) {
1545 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1546 << ": scalar reg isn't aligned " << Val;
1547 }
1548
1549 return createRegOperand(SRegClassID, Val >> shift);
1550}
1551
1553 bool IsHi) const {
1554 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1555 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1556}
1557
1558// Decode Literals for insts which always have a literal in the encoding
1561 if (HasLiteral) {
1562 assert(
1564 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1565 if (Literal != Val)
1566 return errOperand(Val, "More than one unique literal is illegal");
1567 }
1568 HasLiteral = true;
1569 Literal = Val;
1570 return MCOperand::createImm(Literal);
1571}
1572
1575 if (HasLiteral) {
1576 if (Literal != Val)
1577 return errOperand(Val, "More than one unique literal is illegal");
1578 }
1579 HasLiteral = true;
1580 Literal = Val;
1581
1582 bool UseLit64 = Hi_32(Literal) == 0;
1584 LitModifier::Lit64, Literal, getContext()))
1585 : MCOperand::createImm(Literal);
1586}
1587
1590 const MCOperandInfo &OpDesc) const {
1591 // For now all literal constants are supposed to be unsigned integer
1592 // ToDo: deal with signed/unsigned 64-bit integer constants
1593 // ToDo: deal with float/double constants
1594 if (!HasLiteral) {
1595 if (Bytes.size() < 4) {
1596 return errOperand(0, "cannot read literal, inst bytes left " +
1597 Twine(Bytes.size()));
1598 }
1599 HasLiteral = true;
1600 Literal = eatBytes<uint32_t>(Bytes);
1601 }
1602
1603 // For disassembling always assume all inline constants are available.
1604 bool HasInv2Pi = true;
1605
1606 // Invalid instruction codes may contain literals for inline-only
1607 // operands, so we support them here as well.
1608 int64_t Val = Literal;
1609 bool UseLit = false;
1610 switch (OpDesc.OperandType) {
1611 default:
1612 llvm_unreachable("Unexpected operand type!");
1616 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1617 break;
1620 break;
1624 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1625 break;
1627 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1628 break;
1631 break;
1633 break;
1637 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1638 break;
1640 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1641 break;
1651 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1652 break;
1656 Val <<= 32;
1657 break;
1660 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1661 break;
1663 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1664 // decoding a literal in a position of a register operand. Give
1665 // it special handling in the caller, decodeImmOperands(), instead
1666 // of quietly allowing it here.
1667 break;
1668 }
1669
1672 : MCOperand::createImm(Val);
1673}
1674
1676 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1677
1678 if (!HasLiteral) {
1679 if (Bytes.size() < 8) {
1680 return errOperand(0, "cannot read literal64, inst bytes left " +
1681 Twine(Bytes.size()));
1682 }
1683 HasLiteral = true;
1684 Literal = eatBytes<uint64_t>(Bytes);
1685 }
1686
1687 bool UseLit64 = Hi_32(Literal) == 0;
1689 LitModifier::Lit64, Literal, getContext()))
1690 : MCOperand::createImm(Literal);
1691}
1692
1694 using namespace AMDGPU::EncValues;
1695
1696 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1697 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1698 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1699 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1700 // Cast prevents negative overflow.
1701}
1702
1703static int64_t getInlineImmVal32(unsigned Imm) {
1704 switch (Imm) {
1705 case 240:
1706 return llvm::bit_cast<uint32_t>(0.5f);
1707 case 241:
1708 return llvm::bit_cast<uint32_t>(-0.5f);
1709 case 242:
1710 return llvm::bit_cast<uint32_t>(1.0f);
1711 case 243:
1712 return llvm::bit_cast<uint32_t>(-1.0f);
1713 case 244:
1714 return llvm::bit_cast<uint32_t>(2.0f);
1715 case 245:
1716 return llvm::bit_cast<uint32_t>(-2.0f);
1717 case 246:
1718 return llvm::bit_cast<uint32_t>(4.0f);
1719 case 247:
1720 return llvm::bit_cast<uint32_t>(-4.0f);
1721 case 248: // 1 / (2 * PI)
1722 return 0x3e22f983;
1723 default:
1724 llvm_unreachable("invalid fp inline imm");
1725 }
1726}
1727
1728static int64_t getInlineImmVal64(unsigned Imm) {
1729 switch (Imm) {
1730 case 240:
1731 return llvm::bit_cast<uint64_t>(0.5);
1732 case 241:
1733 return llvm::bit_cast<uint64_t>(-0.5);
1734 case 242:
1735 return llvm::bit_cast<uint64_t>(1.0);
1736 case 243:
1737 return llvm::bit_cast<uint64_t>(-1.0);
1738 case 244:
1739 return llvm::bit_cast<uint64_t>(2.0);
1740 case 245:
1741 return llvm::bit_cast<uint64_t>(-2.0);
1742 case 246:
1743 return llvm::bit_cast<uint64_t>(4.0);
1744 case 247:
1745 return llvm::bit_cast<uint64_t>(-4.0);
1746 case 248: // 1 / (2 * PI)
1747 return 0x3fc45f306dc9c882;
1748 default:
1749 llvm_unreachable("invalid fp inline imm");
1750 }
1751}
1752
1753static int64_t getInlineImmValF16(unsigned Imm) {
1754 switch (Imm) {
1755 case 240:
1756 return 0x3800;
1757 case 241:
1758 return 0xB800;
1759 case 242:
1760 return 0x3C00;
1761 case 243:
1762 return 0xBC00;
1763 case 244:
1764 return 0x4000;
1765 case 245:
1766 return 0xC000;
1767 case 246:
1768 return 0x4400;
1769 case 247:
1770 return 0xC400;
1771 case 248: // 1 / (2 * PI)
1772 return 0x3118;
1773 default:
1774 llvm_unreachable("invalid fp inline imm");
1775 }
1776}
1777
1778static int64_t getInlineImmValBF16(unsigned Imm) {
1779 switch (Imm) {
1780 case 240:
1781 return 0x3F00;
1782 case 241:
1783 return 0xBF00;
1784 case 242:
1785 return 0x3F80;
1786 case 243:
1787 return 0xBF80;
1788 case 244:
1789 return 0x4000;
1790 case 245:
1791 return 0xC000;
1792 case 246:
1793 return 0x4080;
1794 case 247:
1795 return 0xC080;
1796 case 248: // 1 / (2 * PI)
1797 return 0x3E22;
1798 default:
1799 llvm_unreachable("invalid fp inline imm");
1800 }
1801}
1802
1803unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1804 using namespace AMDGPU;
1805
1806 switch (Width) {
1807 case 16:
1808 case 32:
1809 return VGPR_32RegClassID;
1810 case 64:
1811 return VReg_64RegClassID;
1812 case 96:
1813 return VReg_96RegClassID;
1814 case 128:
1815 return VReg_128RegClassID;
1816 case 160:
1817 return VReg_160RegClassID;
1818 case 192:
1819 return VReg_192RegClassID;
1820 case 256:
1821 return VReg_256RegClassID;
1822 case 288:
1823 return VReg_288RegClassID;
1824 case 320:
1825 return VReg_320RegClassID;
1826 case 352:
1827 return VReg_352RegClassID;
1828 case 384:
1829 return VReg_384RegClassID;
1830 case 512:
1831 return VReg_512RegClassID;
1832 case 1024:
1833 return VReg_1024RegClassID;
1834 }
1835 llvm_unreachable("Invalid register width!");
1836}
1837
1838unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1839 using namespace AMDGPU;
1840
1841 switch (Width) {
1842 case 16:
1843 case 32:
1844 return AGPR_32RegClassID;
1845 case 64:
1846 return AReg_64RegClassID;
1847 case 96:
1848 return AReg_96RegClassID;
1849 case 128:
1850 return AReg_128RegClassID;
1851 case 160:
1852 return AReg_160RegClassID;
1853 case 256:
1854 return AReg_256RegClassID;
1855 case 288:
1856 return AReg_288RegClassID;
1857 case 320:
1858 return AReg_320RegClassID;
1859 case 352:
1860 return AReg_352RegClassID;
1861 case 384:
1862 return AReg_384RegClassID;
1863 case 512:
1864 return AReg_512RegClassID;
1865 case 1024:
1866 return AReg_1024RegClassID;
1867 }
1868 llvm_unreachable("Invalid register width!");
1869}
1870
1871unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1872 using namespace AMDGPU;
1873
1874 switch (Width) {
1875 case 16:
1876 case 32:
1877 return SGPR_32RegClassID;
1878 case 64:
1879 return SGPR_64RegClassID;
1880 case 96:
1881 return SGPR_96RegClassID;
1882 case 128:
1883 return SGPR_128RegClassID;
1884 case 160:
1885 return SGPR_160RegClassID;
1886 case 256:
1887 return SGPR_256RegClassID;
1888 case 288:
1889 return SGPR_288RegClassID;
1890 case 320:
1891 return SGPR_320RegClassID;
1892 case 352:
1893 return SGPR_352RegClassID;
1894 case 384:
1895 return SGPR_384RegClassID;
1896 case 512:
1897 return SGPR_512RegClassID;
1898 }
1899 llvm_unreachable("Invalid register width!");
1900}
1901
1902unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1903 using namespace AMDGPU;
1904
1905 switch (Width) {
1906 case 16:
1907 case 32:
1908 return TTMP_32RegClassID;
1909 case 64:
1910 return TTMP_64RegClassID;
1911 case 128:
1912 return TTMP_128RegClassID;
1913 case 256:
1914 return TTMP_256RegClassID;
1915 case 288:
1916 return TTMP_288RegClassID;
1917 case 320:
1918 return TTMP_320RegClassID;
1919 case 352:
1920 return TTMP_352RegClassID;
1921 case 384:
1922 return TTMP_384RegClassID;
1923 case 512:
1924 return TTMP_512RegClassID;
1925 }
1926 llvm_unreachable("Invalid register width!");
1927}
1928
1929int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1930 using namespace AMDGPU::EncValues;
1931
1932 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1933 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1934
1935 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1936}
1937
1939 unsigned Val) const {
1940 using namespace AMDGPU::EncValues;
1941
1942 assert(Val < 1024); // enum10
1943
1944 bool IsAGPR = Val & 512;
1945 Val &= 511;
1946
1947 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1948 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1949 : getVgprClassId(Width), Val - VGPR_MIN);
1950 }
1951 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1952}
1953
1955 unsigned Width,
1956 unsigned Val) const {
1957 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1958 // decoded earlier.
1959 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1960 using namespace AMDGPU::EncValues;
1961
1962 if (Val <= SGPR_MAX) {
1963 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1964 static_assert(SGPR_MIN == 0);
1965 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1966 }
1967
1968 int TTmpIdx = getTTmpIdx(Val);
1969 if (TTmpIdx >= 0) {
1970 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1971 }
1972
1973 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1974 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1975 Val == LITERAL_CONST)
1976 return MCOperand::createImm(Val);
1977
1978 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1979 return decodeLiteral64Constant();
1980 }
1981
1982 switch (Width) {
1983 case 32:
1984 case 16:
1985 return decodeSpecialReg32(Val);
1986 case 64:
1987 return decodeSpecialReg64(Val);
1988 case 96:
1989 case 128:
1990 case 256:
1991 case 512:
1992 return decodeSpecialReg96Plus(Val);
1993 default:
1994 llvm_unreachable("unexpected immediate type");
1995 }
1996}
1997
1998// Bit 0 of DstY isn't stored in the instruction, because it's always the
1999// opposite of bit 0 of DstX.
2001 unsigned Val) const {
2002 int VDstXInd =
2003 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
2004 assert(VDstXInd != -1);
2005 assert(Inst.getOperand(VDstXInd).isReg());
2006 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
2007 Val |= ~XDstReg & 1;
2008 return createRegOperand(getVgprClassId(32), Val);
2009}
2010
2012 using namespace AMDGPU;
2013
2014 switch (Val) {
2015 // clang-format off
2016 case 102: return createRegOperand(FLAT_SCR_LO);
2017 case 103: return createRegOperand(FLAT_SCR_HI);
2018 case 104: return createRegOperand(XNACK_MASK_LO);
2019 case 105: return createRegOperand(XNACK_MASK_HI);
2020 case 106: return createRegOperand(VCC_LO);
2021 case 107: return createRegOperand(VCC_HI);
2022 case 108: return createRegOperand(TBA_LO);
2023 case 109: return createRegOperand(TBA_HI);
2024 case 110: return createRegOperand(TMA_LO);
2025 case 111: return createRegOperand(TMA_HI);
2026 case 124:
2027 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2028 case 125:
2029 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2030 case 126: return createRegOperand(EXEC_LO);
2031 case 127: return createRegOperand(EXEC_HI);
2032 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2033 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2034 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2035 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2036 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2037 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2038 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2039 case 251: return createRegOperand(SRC_VCCZ);
2040 case 252: return createRegOperand(SRC_EXECZ);
2041 case 253: return createRegOperand(SRC_SCC);
2042 case 254: return createRegOperand(LDS_DIRECT);
2043 default: break;
2044 // clang-format on
2045 }
2046 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2047}
2048
2050 using namespace AMDGPU;
2051
2052 switch (Val) {
2053 case 102: return createRegOperand(FLAT_SCR);
2054 case 104: return createRegOperand(XNACK_MASK);
2055 case 106: return createRegOperand(VCC);
2056 case 108: return createRegOperand(TBA);
2057 case 110: return createRegOperand(TMA);
2058 case 124:
2059 if (isGFX11Plus())
2060 return createRegOperand(SGPR_NULL);
2061 break;
2062 case 125:
2063 if (!isGFX11Plus())
2064 return createRegOperand(SGPR_NULL);
2065 break;
2066 case 126: return createRegOperand(EXEC);
2067 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2068 case 235: return createRegOperand(SRC_SHARED_BASE);
2069 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2070 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2071 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2072 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2073 case 251: return createRegOperand(SRC_VCCZ);
2074 case 252: return createRegOperand(SRC_EXECZ);
2075 case 253: return createRegOperand(SRC_SCC);
2076 default: break;
2077 }
2078 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2079}
2080
2082 using namespace AMDGPU;
2083
2084 switch (Val) {
2085 case 124:
2086 if (isGFX11Plus())
2087 return createRegOperand(SGPR_NULL);
2088 break;
2089 case 125:
2090 if (!isGFX11Plus())
2091 return createRegOperand(SGPR_NULL);
2092 break;
2093 default:
2094 break;
2095 }
2096 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2097}
2098
2100 const unsigned Val) const {
2101 using namespace AMDGPU::SDWA;
2102 using namespace AMDGPU::EncValues;
2103
2104 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2105 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2106 // XXX: cast to int is needed to avoid stupid warning:
2107 // compare with unsigned is always true
2108 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2109 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2110 return createRegOperand(getVgprClassId(Width),
2111 Val - SDWA9EncValues::SRC_VGPR_MIN);
2112 }
2113 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2114 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2115 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2116 return createSRegOperand(getSgprClassId(Width),
2117 Val - SDWA9EncValues::SRC_SGPR_MIN);
2118 }
2119 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2120 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2121 return createSRegOperand(getTtmpClassId(Width),
2122 Val - SDWA9EncValues::SRC_TTMP_MIN);
2123 }
2124
2125 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2126
2127 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2128 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2129 return MCOperand::createImm(SVal);
2130
2131 return decodeSpecialReg32(SVal);
2132 }
2133 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2134 return createRegOperand(getVgprClassId(Width), Val);
2135 llvm_unreachable("unsupported target");
2136}
2137
2139 return decodeSDWASrc(16, Val);
2140}
2141
2143 return decodeSDWASrc(32, Val);
2144}
2145
2147 using namespace AMDGPU::SDWA;
2148
2149 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2150 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2151 "SDWAVopcDst should be present only on GFX9+");
2152
2153 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2154
2155 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2156 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2157
2158 int TTmpIdx = getTTmpIdx(Val);
2159 if (TTmpIdx >= 0) {
2160 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2161 return createSRegOperand(TTmpClsId, TTmpIdx);
2162 }
2163 if (Val > SGPR_MAX) {
2164 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2165 }
2166 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2167 }
2168 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2169}
2170
2172 unsigned Val) const {
2173 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2174 ? decodeSrcOp(Inst, 32, Val)
2175 : decodeSrcOp(Inst, 64, Val);
2176}
2177
2179 unsigned Val) const {
2180 return decodeSrcOp(Inst, 32, Val);
2181}
2182
2185 return MCOperand();
2186 return MCOperand::createImm(Val);
2187}
2188
2190 using VersionField = AMDGPU::EncodingField<7, 0>;
2191 using W64Bit = AMDGPU::EncodingBit<13>;
2192 using W32Bit = AMDGPU::EncodingBit<14>;
2193 using MDPBit = AMDGPU::EncodingBit<15>;
2195
2196 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2197
2198 // Decode into a plain immediate if any unused bits are raised.
2199 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2200 return MCOperand::createImm(Imm);
2201
2202 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2203 const auto *I = find_if(
2204 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2205 return V.Code == Version;
2206 });
2207 MCContext &Ctx = getContext();
2208 const MCExpr *E;
2209 if (I == Versions.end())
2211 else
2212 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2213
2214 if (W64)
2215 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2216 if (W32)
2217 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2218 if (MDP)
2219 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2220
2221 return MCOperand::createExpr(E);
2222}
2223
2225 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2226}
2227
2229
2231 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2232}
2233
2235
2237
2241
2243 return STI.hasFeature(AMDGPU::FeatureGFX11);
2244}
2245
2249
2251 return STI.hasFeature(AMDGPU::FeatureGFX12);
2252}
2253
2257
2259
2263
2265
2269
2271 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2272}
2273
2277
2278//===----------------------------------------------------------------------===//
2279// AMDGPU specific symbol handling
2280//===----------------------------------------------------------------------===//
2281
2282/// Print a string describing the reserved bit range specified by Mask with
2283/// offset BaseBytes for use in error comments. Mask is a single continuous
2284/// range of 1s surrounded by zeros. The format here is meant to align with the
2285/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2286static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2287 SmallString<32> Result;
2288 raw_svector_ostream S(Result);
2289
2290 int TrailingZeros = llvm::countr_zero(Mask);
2291 int PopCount = llvm::popcount(Mask);
2292
2293 if (PopCount == 1) {
2294 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2295 } else {
2296 S << "bits in range ("
2297 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2298 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2299 }
2300
2301 return Result;
2302}
2303
2304#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2305#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2306 do { \
2307 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2308 } while (0)
2309#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2310 do { \
2311 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2312 << GET_FIELD(MASK) << '\n'; \
2313 } while (0)
2314
2315#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2316 do { \
2317 if (FourByteBuffer & (MASK)) { \
2318 return createStringError(std::errc::invalid_argument, \
2319 "kernel descriptor " DESC \
2320 " reserved %s set" MSG, \
2321 getBitRangeFromMask((MASK), 0).c_str()); \
2322 } \
2323 } while (0)
2324
2325#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2326#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2327 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2328#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2329 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2330#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2331 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2332
2333// NOLINTNEXTLINE(readability-identifier-naming)
2335 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2336 using namespace amdhsa;
2337 StringRef Indent = "\t";
2338
2339 // We cannot accurately backward compute #VGPRs used from
2340 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2341 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2342 // simply calculate the inverse of what the assembler does.
2343
2344 uint32_t GranulatedWorkitemVGPRCount =
2345 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2346
2347 uint32_t NextFreeVGPR =
2348 (GranulatedWorkitemVGPRCount + 1) *
2349 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2350
2351 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2352
2353 // We cannot backward compute values used to calculate
2354 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2355 // directives can't be computed:
2356 // .amdhsa_reserve_vcc
2357 // .amdhsa_reserve_flat_scratch
2358 // .amdhsa_reserve_xnack_mask
2359 // They take their respective default values if not specified in the assembly.
2360 //
2361 // GRANULATED_WAVEFRONT_SGPR_COUNT
2362 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2363 //
2364 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2365 // are set to 0. So while disassembling we consider that:
2366 //
2367 // GRANULATED_WAVEFRONT_SGPR_COUNT
2368 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2369 //
2370 // The disassembler cannot recover the original values of those 3 directives.
2371
2372 uint32_t GranulatedWavefrontSGPRCount =
2373 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2374
2375 if (isGFX10Plus())
2376 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2377 "must be zero on gfx10+");
2378
2379 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2381
2382 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2384 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2385 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2386 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2387 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2388 << '\n';
2389 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2390
2391 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2392
2393 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2394 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2395 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2396 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2397 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2398 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2399 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2400 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2401
2402 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2403
2404 if (!isGFX12Plus())
2405 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2406 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2407
2408 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2409
2410 if (!isGFX12Plus())
2411 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2412 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2413
2414 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2415 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2416
2417 // Bits [26].
2418 if (isGFX9Plus()) {
2419 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2420 } else {
2421 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2422 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2423 }
2424
2425 // Bits [27].
2426 if (isGFX1250Plus()) {
2427 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2428 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2429 } else {
2430 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2431 "COMPUTE_PGM_RSRC1");
2432 }
2433
2434 // Bits [28].
2435 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2436
2437 // Bits [29-31].
2438 if (isGFX10Plus()) {
2439 // WGP_MODE is not available on GFX1250.
2440 if (!isGFX1250Plus()) {
2441 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2442 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2443 }
2444 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2445 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2446 } else {
2447 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2448 "COMPUTE_PGM_RSRC1");
2449 }
2450
2451 if (isGFX12Plus())
2452 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2453 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2454
2455 return true;
2456}
2457
2458// NOLINTNEXTLINE(readability-identifier-naming)
2460 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2461 using namespace amdhsa;
2462 StringRef Indent = "\t";
2464 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2465 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2466 else
2467 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2468 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2469 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2470 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2471 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2472 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2473 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2474 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2475 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2476 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2477 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2478 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2479
2480 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2481 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2482 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2483
2485 ".amdhsa_exception_fp_ieee_invalid_op",
2486 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2487 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2488 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2490 ".amdhsa_exception_fp_ieee_div_zero",
2491 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2492 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2493 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2494 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2495 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2496 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2497 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2498 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2499 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2500
2501 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2502
2503 return true;
2504}
2505
2506// NOLINTNEXTLINE(readability-identifier-naming)
2508 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2509 using namespace amdhsa;
2510 StringRef Indent = "\t";
2511 if (isGFX90A()) {
2512 KdStream << Indent << ".amdhsa_accum_offset "
2513 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2514 << '\n';
2515
2516 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2517
2518 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2519 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2520 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2521 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2522 } else if (isGFX10Plus()) {
2523 // Bits [0-3].
2524 if (!isGFX12Plus()) {
2525 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2526 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2527 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2528 } else {
2530 "SHARED_VGPR_COUNT",
2531 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2532 }
2533 } else {
2534 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2535 "COMPUTE_PGM_RSRC3",
2536 "must be zero on gfx12+");
2537 }
2538
2539 // Bits [4-11].
2540 if (isGFX11()) {
2541 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2542 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2543 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2544 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2545 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2546 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2547 } else if (isGFX12Plus()) {
2548 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2549 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2550 } else {
2551 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2552 "COMPUTE_PGM_RSRC3",
2553 "must be zero on gfx10");
2554 }
2555
2556 // Bits [12].
2557 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2558 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2559
2560 // Bits [13].
2561 if (isGFX12Plus()) {
2563 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2564 } else {
2565 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2566 "COMPUTE_PGM_RSRC3",
2567 "must be zero on gfx10 or gfx11");
2568 }
2569
2570 // Bits [14-21].
2571 if (isGFX1250Plus()) {
2572 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2573 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2575 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2577 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2579 "ENABLE_DIDT_THROTTLE",
2580 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2581 } else {
2582 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2583 "COMPUTE_PGM_RSRC3",
2584 "must be zero on gfx10+");
2585 }
2586
2587 // Bits [22-30].
2588 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2589 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2590
2591 // Bits [31].
2592 if (isGFX11Plus()) {
2594 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2595 } else {
2596 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2597 "COMPUTE_PGM_RSRC3",
2598 "must be zero on gfx10");
2599 }
2600 } else if (FourByteBuffer) {
2601 return createStringError(
2602 std::errc::invalid_argument,
2603 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2604 }
2605 return true;
2606}
2607#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2608#undef PRINT_DIRECTIVE
2609#undef GET_FIELD
2610#undef CHECK_RESERVED_BITS_IMPL
2611#undef CHECK_RESERVED_BITS
2612#undef CHECK_RESERVED_BITS_MSG
2613#undef CHECK_RESERVED_BITS_DESC
2614#undef CHECK_RESERVED_BITS_DESC_MSG
2615
2616/// Create an error object to return from onSymbolStart for reserved kernel
2617/// descriptor bits being set.
2618static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2619 const char *Msg = "") {
2620 return createStringError(
2621 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2622 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2623}
2624
2625/// Create an error object to return from onSymbolStart for reserved kernel
2626/// descriptor bytes being set.
2627static Error createReservedKDBytesError(unsigned BaseInBytes,
2628 unsigned WidthInBytes) {
2629 // Create an error comment in the same format as the "Kernel Descriptor"
2630 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2631 return createStringError(
2632 std::errc::invalid_argument,
2633 "kernel descriptor reserved bits in range (%u:%u) set",
2634 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2635}
2636
2639 raw_string_ostream &KdStream) const {
2640#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2641 do { \
2642 KdStream << Indent << DIRECTIVE " " \
2643 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2644 } while (0)
2645
2646 uint16_t TwoByteBuffer = 0;
2647 uint32_t FourByteBuffer = 0;
2648
2649 StringRef ReservedBytes;
2650 StringRef Indent = "\t";
2651
2652 assert(Bytes.size() == 64);
2653 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2654
2655 switch (Cursor.tell()) {
2657 FourByteBuffer = DE.getU32(Cursor);
2658 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2659 << '\n';
2660 return true;
2661
2663 FourByteBuffer = DE.getU32(Cursor);
2664 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2665 << FourByteBuffer << '\n';
2666 return true;
2667
2669 FourByteBuffer = DE.getU32(Cursor);
2670 KdStream << Indent << ".amdhsa_kernarg_size "
2671 << FourByteBuffer << '\n';
2672 return true;
2673
2675 // 4 reserved bytes, must be 0.
2676 ReservedBytes = DE.getBytes(Cursor, 4);
2677 for (int I = 0; I < 4; ++I) {
2678 if (ReservedBytes[I] != 0)
2680 }
2681 return true;
2682
2684 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2685 // So far no directive controls this for Code Object V3, so simply skip for
2686 // disassembly.
2687 DE.skip(Cursor, 8);
2688 return true;
2689
2691 // 20 reserved bytes, must be 0.
2692 ReservedBytes = DE.getBytes(Cursor, 20);
2693 for (int I = 0; I < 20; ++I) {
2694 if (ReservedBytes[I] != 0)
2696 }
2697 return true;
2698
2700 FourByteBuffer = DE.getU32(Cursor);
2701 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2702
2704 FourByteBuffer = DE.getU32(Cursor);
2705 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2706
2708 FourByteBuffer = DE.getU32(Cursor);
2709 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2710
2712 using namespace amdhsa;
2713 TwoByteBuffer = DE.getU16(Cursor);
2714
2716 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2717 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2718 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2719 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2720 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2721 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2722 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2723 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2724 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2725 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2727 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2728 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2729 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2730 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2731
2732 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2733 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2735
2736 // Reserved for GFX9
2737 if (isGFX9() &&
2738 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2740 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2741 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2742 }
2743 if (isGFX10Plus()) {
2744 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2745 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2746 }
2747
2748 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2749 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2750 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2751
2752 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2753 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2755 }
2756
2757 return true;
2758
2760 using namespace amdhsa;
2761 TwoByteBuffer = DE.getU16(Cursor);
2762 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2763 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2764 KERNARG_PRELOAD_SPEC_LENGTH);
2765 }
2766
2767 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2768 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2769 KERNARG_PRELOAD_SPEC_OFFSET);
2770 }
2771 return true;
2772
2774 // 4 bytes from here are reserved, must be 0.
2775 ReservedBytes = DE.getBytes(Cursor, 4);
2776 for (int I = 0; I < 4; ++I) {
2777 if (ReservedBytes[I] != 0)
2779 }
2780 return true;
2781
2782 default:
2783 llvm_unreachable("Unhandled index. Case statements cover everything.");
2784 return true;
2785 }
2786#undef PRINT_DIRECTIVE
2787}
2788
2790 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2791
2792 // CP microcode requires the kernel descriptor to be 64 aligned.
2793 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2794 return createStringError(std::errc::invalid_argument,
2795 "kernel descriptor must be 64-byte aligned");
2796
2797 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2798 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2799 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2800 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2801 // when required.
2802 if (isGFX10Plus()) {
2803 uint16_t KernelCodeProperties =
2806 EnableWavefrontSize32 =
2807 AMDHSA_BITS_GET(KernelCodeProperties,
2808 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2809 }
2810
2811 std::string Kd;
2812 raw_string_ostream KdStream(Kd);
2813 KdStream << ".amdhsa_kernel " << KdName << '\n';
2814
2816 while (C && C.tell() < Bytes.size()) {
2817 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2818
2819 cantFail(C.takeError());
2820
2821 if (!Res)
2822 return Res;
2823 }
2824 KdStream << ".end_amdhsa_kernel\n";
2825 outs() << KdStream.str();
2826 return true;
2827}
2828
2830 uint64_t &Size,
2831 ArrayRef<uint8_t> Bytes,
2832 uint64_t Address) const {
2833 // Right now only kernel descriptor needs to be handled.
2834 // We ignore all other symbols for target specific handling.
2835 // TODO:
2836 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2837 // Object V2 and V3 when symbols are marked protected.
2838
2839 // amd_kernel_code_t for Code Object V2.
2840 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2841 Size = 256;
2842 return createStringError(std::errc::invalid_argument,
2843 "code object v2 is not supported");
2844 }
2845
2846 // Code Object V3 kernel descriptors.
2847 StringRef Name = Symbol.Name;
2848 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2849 Size = 64; // Size = 64 regardless of success or failure.
2850 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2851 }
2852
2853 return false;
2854}
2855
2856const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2857 int64_t Val) {
2858 MCContext &Ctx = getContext();
2859 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2860 // Note: only set value to Val on a new symbol in case an dissassembler
2861 // has already been initialized in this context.
2862 if (!Sym->isVariable()) {
2864 } else {
2865 int64_t Res = ~Val;
2866 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2867 if (!Valid || Res != Val)
2868 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2869 }
2870 return MCSymbolRefExpr::create(Sym, Ctx);
2871}
2872
2874 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2875
2876 // Check for MUBUF and MTBUF instructions
2877 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2878 return true;
2879
2880 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2881 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2882 return true;
2883
2884 return false;
2885}
2886
2887//===----------------------------------------------------------------------===//
2888// AMDGPUSymbolizer
2889//===----------------------------------------------------------------------===//
2890
2891// Try to find symbol name for specified label
2893 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2894 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2895 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2896
2897 if (!IsBranch) {
2898 return false;
2899 }
2900
2901 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2902 if (!Symbols)
2903 return false;
2904
2905 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2906 return Val.Addr == static_cast<uint64_t>(Value) &&
2907 Val.Type == ELF::STT_NOTYPE;
2908 });
2909 if (Result != Symbols->end()) {
2910 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2911 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2913 return true;
2914 }
2915 // Add to list of referenced addresses, so caller can synthesize a label.
2916 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2917 return false;
2918}
2919
2921 int64_t Value,
2922 uint64_t Address) {
2923 llvm_unreachable("unimplemented");
2924}
2925
2926//===----------------------------------------------------------------------===//
2927// Initialization
2928//===----------------------------------------------------------------------===//
2929
2931 LLVMOpInfoCallback /*GetOpInfo*/,
2932 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2933 void *DisInfo,
2934 MCContext *Ctx,
2935 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2936 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2937}
2938
2940 const MCSubtargetInfo &STI,
2941 MCContext &Ctx) {
2942 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2943}
2944
2945extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
const T * data() const
Definition ArrayRef.h:139
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:233
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:210
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:207
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:218
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:227
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:239
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:223
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:225
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:215
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:240
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:222
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:204
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1416
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1430
@ STT_OBJECT
Definition ELF.h:1417
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.