LLVM 22.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
535 Imm = getInlineImmValF16(Imm);
536 break;
542 Imm = getInlineImmVal64(Imm);
543 break;
544 default:
545 Imm = getInlineImmVal32(Imm);
546 }
547 Op.setImm(Imm);
548 }
549 }
550}
551
553 ArrayRef<uint8_t> Bytes_,
555 raw_ostream &CS) const {
556 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
557 Bytes = Bytes_.slice(0, MaxInstBytesNum);
558
559 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
560 // there are fewer bytes left). This will be overridden on success.
561 Size = std::min((size_t)4, Bytes_.size());
562
563 do {
564 // ToDo: better to switch encoding length using some bit predicate
565 // but it is unknown yet, so try all we can
566
567 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
568 // encodings
569 if (isGFX1250() && Bytes.size() >= 16) {
570 std::bitset<128> DecW = eat16Bytes(Bytes);
571 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
572 break;
573 Bytes = Bytes_.slice(0, MaxInstBytesNum);
574 }
575
576 if (isGFX11Plus() && Bytes.size() >= 12) {
577 std::bitset<96> DecW = eat12Bytes(Bytes);
578
579 if (isGFX11() &&
580 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
581 DecW, Address, CS))
582 break;
583
584 if (isGFX1250() &&
585 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
586 DecW, Address, CS))
587 break;
588
589 if (isGFX12() &&
590 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
591 DecW, Address, CS))
592 break;
593
594 if (isGFX12() &&
595 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
596 break;
597
598 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
599 // Return 8 bytes for a potential literal.
600 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
601
602 if (isGFX1250() &&
603 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
604 break;
605 }
606
607 // Reinitialize Bytes
608 Bytes = Bytes_.slice(0, MaxInstBytesNum);
609
610 } else if (Bytes.size() >= 16 &&
611 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
612 std::bitset<128> DecW = eat16Bytes(Bytes);
613 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
614 break;
615
616 // Reinitialize Bytes
617 Bytes = Bytes_.slice(0, MaxInstBytesNum);
618 }
619
620 if (Bytes.size() >= 8) {
621 const uint64_t QW = eatBytes<uint64_t>(Bytes);
622
623 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
624 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
625 break;
626
627 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
628 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
629 break;
630
631 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
632 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
633 break;
634
635 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
636 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
637 // table first so we print the correct name.
638 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
639 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
640 break;
641
642 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
643 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
644 break;
645
646 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
647 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
648 break;
649
650 if ((isVI() || isGFX9()) &&
651 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
652 break;
653
654 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
655 break;
656
657 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
658 break;
659
660 if (isGFX1250() &&
661 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
662 QW, Address, CS))
663 break;
664
665 if (isGFX12() &&
666 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
667 Address, CS))
668 break;
669
670 if (isGFX11() &&
671 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
672 Address, CS))
673 break;
674
675 if (isGFX11() &&
676 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
677 break;
678
679 if (isGFX12() &&
680 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
681 break;
682
683 // Reinitialize Bytes
684 Bytes = Bytes_.slice(0, MaxInstBytesNum);
685 }
686
687 // Try decode 32-bit instruction
688 if (Bytes.size() >= 4) {
689 const uint32_t DW = eatBytes<uint32_t>(Bytes);
690
691 if ((isVI() || isGFX9()) &&
692 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
693 break;
694
695 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
696 break;
697
698 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
699 break;
700
701 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
702 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
703 break;
704
705 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
706 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
707 break;
708
709 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
710 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
711 break;
712
713 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
714 break;
715
716 if (isGFX11() &&
717 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
718 Address, CS))
719 break;
720
721 if (isGFX1250() &&
722 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
723 DW, Address, CS))
724 break;
725
726 if (isGFX12() &&
727 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
728 Address, CS))
729 break;
730 }
731
733 } while (false);
734
736
737 decodeImmOperands(MI, *MCII);
738
739 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
740 if (isMacDPP(MI))
742
743 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
745 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
746 convertVOPCDPPInst(MI); // Special VOP3 case
747 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
748 convertVOPC64DPPInst(MI); // Special VOP3 case
749 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
750 -1)
752 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
753 convertVOP3DPPInst(MI); // Regular VOP3 case
754 }
755
757
758 if (AMDGPU::isMAC(MI.getOpcode())) {
759 // Insert dummy unused src2_modifiers.
761 AMDGPU::OpName::src2_modifiers);
762 }
763
764 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
765 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
766 // Insert dummy unused src2_modifiers.
768 AMDGPU::OpName::src2_modifiers);
769 }
770
771 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
773 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
774 }
775
776 if (MCII->get(MI.getOpcode()).TSFlags &
778 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
779 AMDGPU::OpName::cpol);
780 if (CPolPos != -1) {
781 unsigned CPol =
782 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
784 if (MI.getNumOperands() <= (unsigned)CPolPos) {
786 AMDGPU::OpName::cpol);
787 } else if (CPol) {
788 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
789 }
790 }
791 }
792
793 if ((MCII->get(MI.getOpcode()).TSFlags &
795 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
796 // GFX90A lost TFE, its place is occupied by ACC.
797 int TFEOpIdx =
798 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
799 if (TFEOpIdx != -1) {
800 auto *TFEIter = MI.begin();
801 std::advance(TFEIter, TFEOpIdx);
802 MI.insert(TFEIter, MCOperand::createImm(0));
803 }
804 }
805
806 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
808 int OffsetIdx =
809 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
810 if (OffsetIdx != -1) {
811 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
812 int64_t SignedOffset = SignExtend64<24>(Imm);
813 if (SignedOffset < 0)
815 }
816 }
817
818 if (MCII->get(MI.getOpcode()).TSFlags &
820 int SWZOpIdx =
821 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
822 if (SWZOpIdx != -1) {
823 auto *SWZIter = MI.begin();
824 std::advance(SWZIter, SWZOpIdx);
825 MI.insert(SWZIter, MCOperand::createImm(0));
826 }
827 }
828
829 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
830 if (Desc.TSFlags & SIInstrFlags::MIMG) {
831 int VAddr0Idx =
832 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
833 int RsrcIdx =
834 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
835 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
836 if (VAddr0Idx >= 0 && NSAArgs > 0) {
837 unsigned NSAWords = (NSAArgs + 3) / 4;
838 if (Bytes.size() < 4 * NSAWords)
840 for (unsigned i = 0; i < NSAArgs; ++i) {
841 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
842 auto VAddrRCID =
843 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
844 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
845 }
846 Bytes = Bytes.slice(4 * NSAWords);
847 }
848
850 }
851
852 if (MCII->get(MI.getOpcode()).TSFlags &
855
856 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
858
859 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
861
862 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
864
865 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
867
868 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
870
871 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
872 AMDGPU::OpName::vdst_in);
873 if (VDstIn_Idx != -1) {
874 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
876 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
877 !MI.getOperand(VDstIn_Idx).isReg() ||
878 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
879 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
880 MI.erase(&MI.getOperand(VDstIn_Idx));
882 MCOperand::createReg(MI.getOperand(Tied).getReg()),
883 AMDGPU::OpName::vdst_in);
884 }
885 }
886
887 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
888 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
890
891 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
892 // have EXEC as implicit destination. Issue a warning if encoding for
893 // vdst is not EXEC.
894 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
895 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
896 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
897 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
898 if (Bytes_[0] != ExecEncoding)
900 }
901
902 Size = MaxInstBytesNum - Bytes.size();
903 return Status;
904}
905
907 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
908 // The MCInst still has these fields even though they are no longer encoded
909 // in the GFX11 instruction.
910 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
911 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
912 }
913}
914
917 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
925 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
929 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
930 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
931 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
932 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
933 // The MCInst has this field that is not directly encoded in the
934 // instruction.
935 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
936 }
937}
938
940 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
941 STI.hasFeature(AMDGPU::FeatureGFX10)) {
942 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
943 // VOPC - insert clamp
944 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
945 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
946 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
947 if (SDst != -1) {
948 // VOPC - insert VCC register as sdst
950 AMDGPU::OpName::sdst);
951 } else {
952 // VOP1/2 - insert omod if present in instruction
953 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
954 }
955 }
956}
957
958/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
959/// appropriate subregister for the used format width.
961 MCOperand &MO, uint8_t NumRegs) {
962 switch (NumRegs) {
963 case 4:
964 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
965 case 6:
966 return MO.setReg(
967 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
968 case 8:
969 if (MCRegister NewReg = MRI.getSubReg(
970 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
971 MO.setReg(NewReg);
972 }
973 return;
974 case 12: {
975 // There is no 384-bit subreg index defined.
976 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
977 MCRegister NewReg = MRI.getMatchingSuperReg(
978 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
979 return MO.setReg(NewReg);
980 }
981 case 16:
982 // No-op in cases where one operand is still f8/bf8.
983 return;
984 default:
985 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
986 }
987}
988
989/// f8f6f4 instructions have different pseudos depending on the used formats. In
990/// the disassembler table, we only have the variants with the largest register
991/// classes which assume using an fp8/bf8 format for both operands. The actual
992/// register class depends on the format in blgp and cbsz operands. Adjust the
993/// register classes depending on the used format.
995 int BlgpIdx =
996 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
997 if (BlgpIdx == -1)
998 return;
999
1000 int CbszIdx =
1001 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1002
1003 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1004 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1005
1006 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1007 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1008 if (!AdjustedRegClassOpcode ||
1009 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1010 return;
1011
1012 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1013 int Src0Idx =
1014 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1015 int Src1Idx =
1016 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1017 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1018 AdjustedRegClassOpcode->NumRegsSrcA);
1019 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1020 AdjustedRegClassOpcode->NumRegsSrcB);
1021}
1022
1024 int FmtAIdx =
1025 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1026 if (FmtAIdx == -1)
1027 return;
1028
1029 int FmtBIdx =
1030 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1031
1032 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1033 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1034
1035 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1036 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1037 if (!AdjustedRegClassOpcode ||
1038 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1039 return;
1040
1041 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1042 int Src0Idx =
1043 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1044 int Src1Idx =
1045 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1046 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1047 AdjustedRegClassOpcode->NumRegsSrcA);
1048 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1049 AdjustedRegClassOpcode->NumRegsSrcB);
1050}
1051
1053 unsigned OpSel = 0;
1054 unsigned OpSelHi = 0;
1055 unsigned NegLo = 0;
1056 unsigned NegHi = 0;
1057};
1058
1059// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1060// Note that these values do not affect disassembler output,
1061// so this is only necessary for consistency with src_modifiers.
1063 bool IsVOP3P = false) {
1064 VOPModifiers Modifiers;
1065 unsigned Opc = MI.getOpcode();
1066 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1067 AMDGPU::OpName::src1_modifiers,
1068 AMDGPU::OpName::src2_modifiers};
1069 for (int J = 0; J < 3; ++J) {
1070 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1071 if (OpIdx == -1)
1072 continue;
1073
1074 unsigned Val = MI.getOperand(OpIdx).getImm();
1075
1076 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1077 if (IsVOP3P) {
1078 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1079 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1080 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1081 } else if (J == 0) {
1082 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1083 }
1084 }
1085
1086 return Modifiers;
1087}
1088
1089// Instructions decode the op_sel/suffix bits into the src_modifier
1090// operands. Copy those bits into the src operands for true16 VGPRs.
1092 const unsigned Opc = MI.getOpcode();
1093 const MCRegisterClass &ConversionRC =
1094 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1095 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1096 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1098 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1100 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1102 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1104 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1105 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1106 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1107 if (OpIdx == -1 || OpModsIdx == -1)
1108 continue;
1109 MCOperand &Op = MI.getOperand(OpIdx);
1110 if (!Op.isReg())
1111 continue;
1112 if (!ConversionRC.contains(Op.getReg()))
1113 continue;
1114 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1115 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1116 unsigned ModVal = OpMods.getImm();
1117 if (ModVal & OpSelMask) { // isHi
1118 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1119 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1120 }
1121 }
1122}
1123
1124// MAC opcodes have special old and src2 operands.
1125// src2 is tied to dst, while old is not tied (but assumed to be).
1127 constexpr int DST_IDX = 0;
1128 auto Opcode = MI.getOpcode();
1129 const auto &Desc = MCII->get(Opcode);
1130 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1131
1132 if (OldIdx != -1 && Desc.getOperandConstraint(
1133 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1134 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1135 assert(Desc.getOperandConstraint(
1136 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1138 (void)DST_IDX;
1139 return true;
1140 }
1141
1142 return false;
1143}
1144
1145// Create dummy old operand and insert dummy unused src2_modifiers
1147 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1148 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1150 AMDGPU::OpName::src2_modifiers);
1151}
1152
1154 unsigned Opc = MI.getOpcode();
1155
1156 int VDstInIdx =
1157 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1158 if (VDstInIdx != -1)
1159 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1160
1161 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1162 if (MI.getNumOperands() < DescNumOps &&
1163 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1165 auto Mods = collectVOPModifiers(MI);
1167 AMDGPU::OpName::op_sel);
1168 } else {
1169 // Insert dummy unused src modifiers.
1170 if (MI.getNumOperands() < DescNumOps &&
1171 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1173 AMDGPU::OpName::src0_modifiers);
1174
1175 if (MI.getNumOperands() < DescNumOps &&
1176 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1178 AMDGPU::OpName::src1_modifiers);
1179 }
1180}
1181
1184
1185 int VDstInIdx =
1186 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1187 if (VDstInIdx != -1)
1188 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1189
1190 unsigned Opc = MI.getOpcode();
1191 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1192 if (MI.getNumOperands() < DescNumOps &&
1193 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1194 auto Mods = collectVOPModifiers(MI);
1196 AMDGPU::OpName::op_sel);
1197 }
1198}
1199
1200// Given a wide tuple \p Reg check if it will overflow 256 registers.
1201// \returns \p Reg on success or NoRegister otherwise.
1203 const MCRegisterInfo &MRI) {
1204 unsigned NumRegs = RC.getSizeInBits() / 32;
1205 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1206 if (!Sub0)
1207 return Reg;
1208
1209 MCRegister BaseReg;
1210 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1211 BaseReg = AMDGPU::VGPR0;
1212 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1213 BaseReg = AMDGPU::AGPR0;
1214
1215 assert(BaseReg && "Only vector registers expected");
1216
1217 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1218}
1219
1220// Note that before gfx10, the MIMG encoding provided no information about
1221// VADDR size. Consequently, decoded instructions always show address as if it
1222// has 1 dword, which could be not really so.
1224 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1225
1226 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1227 AMDGPU::OpName::vdst);
1228
1229 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1230 AMDGPU::OpName::vdata);
1231 int VAddr0Idx =
1232 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1233 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1234 ? AMDGPU::OpName::srsrc
1235 : AMDGPU::OpName::rsrc;
1236 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1237 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1238 AMDGPU::OpName::dmask);
1239
1240 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1241 AMDGPU::OpName::tfe);
1242 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1243 AMDGPU::OpName::d16);
1244
1245 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1246 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1247 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1248
1249 assert(VDataIdx != -1);
1250 if (BaseOpcode->BVH) {
1251 // Add A16 operand for intersect_ray instructions
1252 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1253 return;
1254 }
1255
1256 bool IsAtomic = (VDstIdx != -1);
1257 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1258 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1259 bool IsNSA = false;
1260 bool IsPartialNSA = false;
1261 unsigned AddrSize = Info->VAddrDwords;
1262
1263 if (isGFX10Plus()) {
1264 unsigned DimIdx =
1265 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1266 int A16Idx =
1267 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1268 const AMDGPU::MIMGDimInfo *Dim =
1269 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1270 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1271
1272 AddrSize =
1273 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1274
1275 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1276 // VIMAGE insts other than BVH never use vaddr4.
1277 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1278 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1279 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1280 if (!IsNSA) {
1281 if (!IsVSample && AddrSize > 12)
1282 AddrSize = 16;
1283 } else {
1284 if (AddrSize > Info->VAddrDwords) {
1285 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1286 // The NSA encoding does not contain enough operands for the
1287 // combination of base opcode / dimension. Should this be an error?
1288 return;
1289 }
1290 IsPartialNSA = true;
1291 }
1292 }
1293 }
1294
1295 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1296 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1297
1298 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1299 if (D16 && AMDGPU::hasPackedD16(STI)) {
1300 DstSize = (DstSize + 1) / 2;
1301 }
1302
1303 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1304 DstSize += 1;
1305
1306 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1307 return;
1308
1309 int NewOpcode =
1310 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1311 if (NewOpcode == -1)
1312 return;
1313
1314 // Widen the register to the correct number of enabled channels.
1315 MCRegister NewVdata;
1316 if (DstSize != Info->VDataDwords) {
1317 auto DataRCID = MCII->getOpRegClassID(
1318 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1319
1320 // Get first subregister of VData
1321 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1322 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1323 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1324
1325 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1326 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1327 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1328 if (!NewVdata) {
1329 // It's possible to encode this such that the low register + enabled
1330 // components exceeds the register count.
1331 return;
1332 }
1333 }
1334
1335 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1336 // If using partial NSA on GFX11+ widen last address register.
1337 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1338 MCRegister NewVAddrSA;
1339 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1340 AddrSize != Info->VAddrDwords) {
1341 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1342 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1343 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1344
1345 auto AddrRCID = MCII->getOpRegClassID(
1346 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1347
1348 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1349 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1350 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1351 if (!NewVAddrSA)
1352 return;
1353 }
1354
1355 MI.setOpcode(NewOpcode);
1356
1357 if (NewVdata != AMDGPU::NoRegister) {
1358 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1359
1360 if (IsAtomic) {
1361 // Atomic operations have an additional operand (a copy of data)
1362 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1363 }
1364 }
1365
1366 if (NewVAddrSA) {
1367 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1368 } else if (IsNSA) {
1369 assert(AddrSize <= Info->VAddrDwords);
1370 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1371 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1372 }
1373}
1374
1375// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1376// decoder only adds to src_modifiers, so manually add the bits to the other
1377// operands.
1379 unsigned Opc = MI.getOpcode();
1380 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1381 auto Mods = collectVOPModifiers(MI, true);
1382
1383 if (MI.getNumOperands() < DescNumOps &&
1384 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1385 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1386
1387 if (MI.getNumOperands() < DescNumOps &&
1388 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1390 AMDGPU::OpName::op_sel);
1391 if (MI.getNumOperands() < DescNumOps &&
1392 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1394 AMDGPU::OpName::op_sel_hi);
1395 if (MI.getNumOperands() < DescNumOps &&
1396 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1398 AMDGPU::OpName::neg_lo);
1399 if (MI.getNumOperands() < DescNumOps &&
1400 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1402 AMDGPU::OpName::neg_hi);
1403}
1404
1405// Create dummy old operand and insert optional operands
1407 unsigned Opc = MI.getOpcode();
1408 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1409
1410 if (MI.getNumOperands() < DescNumOps &&
1411 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1412 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1413
1414 if (MI.getNumOperands() < DescNumOps &&
1415 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1417 AMDGPU::OpName::src0_modifiers);
1418
1419 if (MI.getNumOperands() < DescNumOps &&
1420 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1422 AMDGPU::OpName::src1_modifiers);
1423}
1424
1426 unsigned Opc = MI.getOpcode();
1427 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1428
1430
1431 if (MI.getNumOperands() < DescNumOps &&
1432 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1435 AMDGPU::OpName::op_sel);
1436 }
1437}
1438
1440 assert(HasLiteral && "Should have decoded a literal");
1441 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1442}
1443
1444const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1445 return getContext().getRegisterInfo()->
1446 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1447}
1448
1449inline
1451 const Twine& ErrMsg) const {
1452 *CommentStream << "Error: " + ErrMsg;
1453
1454 // ToDo: add support for error operands to MCInst.h
1455 // return MCOperand::createError(V);
1456 return MCOperand();
1457}
1458
1462
1463inline
1465 unsigned Val) const {
1466 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1467 if (Val >= RegCl.getNumRegs())
1468 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1469 ": unknown register " + Twine(Val));
1470 return createRegOperand(RegCl.getRegister(Val));
1471}
1472
1473inline
1475 unsigned Val) const {
1476 // ToDo: SI/CI have 104 SGPRs, VI - 102
1477 // Valery: here we accepting as much as we can, let assembler sort it out
1478 int shift = 0;
1479 switch (SRegClassID) {
1480 case AMDGPU::SGPR_32RegClassID:
1481 case AMDGPU::TTMP_32RegClassID:
1482 break;
1483 case AMDGPU::SGPR_64RegClassID:
1484 case AMDGPU::TTMP_64RegClassID:
1485 shift = 1;
1486 break;
1487 case AMDGPU::SGPR_96RegClassID:
1488 case AMDGPU::TTMP_96RegClassID:
1489 case AMDGPU::SGPR_128RegClassID:
1490 case AMDGPU::TTMP_128RegClassID:
1491 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1492 // this bundle?
1493 case AMDGPU::SGPR_256RegClassID:
1494 case AMDGPU::TTMP_256RegClassID:
1495 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1496 // this bundle?
1497 case AMDGPU::SGPR_288RegClassID:
1498 case AMDGPU::TTMP_288RegClassID:
1499 case AMDGPU::SGPR_320RegClassID:
1500 case AMDGPU::TTMP_320RegClassID:
1501 case AMDGPU::SGPR_352RegClassID:
1502 case AMDGPU::TTMP_352RegClassID:
1503 case AMDGPU::SGPR_384RegClassID:
1504 case AMDGPU::TTMP_384RegClassID:
1505 case AMDGPU::SGPR_512RegClassID:
1506 case AMDGPU::TTMP_512RegClassID:
1507 shift = 2;
1508 break;
1509 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1510 // this bundle?
1511 default:
1512 llvm_unreachable("unhandled register class");
1513 }
1514
1515 if (Val % (1 << shift)) {
1516 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1517 << ": scalar reg isn't aligned " << Val;
1518 }
1519
1520 return createRegOperand(SRegClassID, Val >> shift);
1521}
1522
1524 bool IsHi) const {
1525 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1526 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1527}
1528
1529// Decode Literals for insts which always have a literal in the encoding
1532 if (HasLiteral) {
1533 assert(
1535 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1536 if (Literal != Val)
1537 return errOperand(Val, "More than one unique literal is illegal");
1538 }
1539 HasLiteral = true;
1540 Literal = Val;
1541 return MCOperand::createImm(Literal);
1542}
1543
1546 if (HasLiteral) {
1547 if (Literal != Val)
1548 return errOperand(Val, "More than one unique literal is illegal");
1549 }
1550 HasLiteral = true;
1551 Literal = Val;
1552
1553 bool UseLit64 = Hi_32(Literal) == 0;
1555 LitModifier::Lit64, Literal, getContext()))
1556 : MCOperand::createImm(Literal);
1557}
1558
1561 const MCOperandInfo &OpDesc) const {
1562 // For now all literal constants are supposed to be unsigned integer
1563 // ToDo: deal with signed/unsigned 64-bit integer constants
1564 // ToDo: deal with float/double constants
1565 if (!HasLiteral) {
1566 if (Bytes.size() < 4) {
1567 return errOperand(0, "cannot read literal, inst bytes left " +
1568 Twine(Bytes.size()));
1569 }
1570 HasLiteral = true;
1571 Literal = eatBytes<uint32_t>(Bytes);
1572 }
1573
1574 // For disassembling always assume all inline constants are available.
1575 bool HasInv2Pi = true;
1576
1577 // Invalid instruction codes may contain literals for inline-only
1578 // operands, so we support them here as well.
1579 int64_t Val = Literal;
1580 bool UseLit = false;
1581 switch (OpDesc.OperandType) {
1582 default:
1583 llvm_unreachable("Unexpected operand type!");
1587 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1588 break;
1591 break;
1595 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1596 break;
1598 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1599 break;
1601 break;
1605 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1606 break;
1608 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1609 break;
1619 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1620 break;
1624 Val <<= 32;
1625 break;
1628 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1629 break;
1631 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1632 // decoding a literal in a position of a register operand. Give
1633 // it special handling in the caller, decodeImmOperands(), instead
1634 // of quietly allowing it here.
1635 break;
1636 }
1637
1640 : MCOperand::createImm(Val);
1641}
1642
1644 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1645
1646 if (!HasLiteral) {
1647 if (Bytes.size() < 8) {
1648 return errOperand(0, "cannot read literal64, inst bytes left " +
1649 Twine(Bytes.size()));
1650 }
1651 HasLiteral = true;
1652 Literal = eatBytes<uint64_t>(Bytes);
1653 }
1654
1655 bool UseLit64 = Hi_32(Literal) == 0;
1657 LitModifier::Lit64, Literal, getContext()))
1658 : MCOperand::createImm(Literal);
1659}
1660
1662 using namespace AMDGPU::EncValues;
1663
1664 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1665 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1666 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1667 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1668 // Cast prevents negative overflow.
1669}
1670
1671static int64_t getInlineImmVal32(unsigned Imm) {
1672 switch (Imm) {
1673 case 240:
1674 return llvm::bit_cast<uint32_t>(0.5f);
1675 case 241:
1676 return llvm::bit_cast<uint32_t>(-0.5f);
1677 case 242:
1678 return llvm::bit_cast<uint32_t>(1.0f);
1679 case 243:
1680 return llvm::bit_cast<uint32_t>(-1.0f);
1681 case 244:
1682 return llvm::bit_cast<uint32_t>(2.0f);
1683 case 245:
1684 return llvm::bit_cast<uint32_t>(-2.0f);
1685 case 246:
1686 return llvm::bit_cast<uint32_t>(4.0f);
1687 case 247:
1688 return llvm::bit_cast<uint32_t>(-4.0f);
1689 case 248: // 1 / (2 * PI)
1690 return 0x3e22f983;
1691 default:
1692 llvm_unreachable("invalid fp inline imm");
1693 }
1694}
1695
1696static int64_t getInlineImmVal64(unsigned Imm) {
1697 switch (Imm) {
1698 case 240:
1699 return llvm::bit_cast<uint64_t>(0.5);
1700 case 241:
1701 return llvm::bit_cast<uint64_t>(-0.5);
1702 case 242:
1703 return llvm::bit_cast<uint64_t>(1.0);
1704 case 243:
1705 return llvm::bit_cast<uint64_t>(-1.0);
1706 case 244:
1707 return llvm::bit_cast<uint64_t>(2.0);
1708 case 245:
1709 return llvm::bit_cast<uint64_t>(-2.0);
1710 case 246:
1711 return llvm::bit_cast<uint64_t>(4.0);
1712 case 247:
1713 return llvm::bit_cast<uint64_t>(-4.0);
1714 case 248: // 1 / (2 * PI)
1715 return 0x3fc45f306dc9c882;
1716 default:
1717 llvm_unreachable("invalid fp inline imm");
1718 }
1719}
1720
1721static int64_t getInlineImmValF16(unsigned Imm) {
1722 switch (Imm) {
1723 case 240:
1724 return 0x3800;
1725 case 241:
1726 return 0xB800;
1727 case 242:
1728 return 0x3C00;
1729 case 243:
1730 return 0xBC00;
1731 case 244:
1732 return 0x4000;
1733 case 245:
1734 return 0xC000;
1735 case 246:
1736 return 0x4400;
1737 case 247:
1738 return 0xC400;
1739 case 248: // 1 / (2 * PI)
1740 return 0x3118;
1741 default:
1742 llvm_unreachable("invalid fp inline imm");
1743 }
1744}
1745
1746static int64_t getInlineImmValBF16(unsigned Imm) {
1747 switch (Imm) {
1748 case 240:
1749 return 0x3F00;
1750 case 241:
1751 return 0xBF00;
1752 case 242:
1753 return 0x3F80;
1754 case 243:
1755 return 0xBF80;
1756 case 244:
1757 return 0x4000;
1758 case 245:
1759 return 0xC000;
1760 case 246:
1761 return 0x4080;
1762 case 247:
1763 return 0xC080;
1764 case 248: // 1 / (2 * PI)
1765 return 0x3E22;
1766 default:
1767 llvm_unreachable("invalid fp inline imm");
1768 }
1769}
1770
1771unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1772 using namespace AMDGPU;
1773
1774 switch (Width) {
1775 case 16:
1776 case 32:
1777 return VGPR_32RegClassID;
1778 case 64:
1779 return VReg_64RegClassID;
1780 case 96:
1781 return VReg_96RegClassID;
1782 case 128:
1783 return VReg_128RegClassID;
1784 case 160:
1785 return VReg_160RegClassID;
1786 case 192:
1787 return VReg_192RegClassID;
1788 case 256:
1789 return VReg_256RegClassID;
1790 case 288:
1791 return VReg_288RegClassID;
1792 case 320:
1793 return VReg_320RegClassID;
1794 case 352:
1795 return VReg_352RegClassID;
1796 case 384:
1797 return VReg_384RegClassID;
1798 case 512:
1799 return VReg_512RegClassID;
1800 case 1024:
1801 return VReg_1024RegClassID;
1802 }
1803 llvm_unreachable("Invalid register width!");
1804}
1805
1806unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1807 using namespace AMDGPU;
1808
1809 switch (Width) {
1810 case 16:
1811 case 32:
1812 return AGPR_32RegClassID;
1813 case 64:
1814 return AReg_64RegClassID;
1815 case 96:
1816 return AReg_96RegClassID;
1817 case 128:
1818 return AReg_128RegClassID;
1819 case 160:
1820 return AReg_160RegClassID;
1821 case 256:
1822 return AReg_256RegClassID;
1823 case 288:
1824 return AReg_288RegClassID;
1825 case 320:
1826 return AReg_320RegClassID;
1827 case 352:
1828 return AReg_352RegClassID;
1829 case 384:
1830 return AReg_384RegClassID;
1831 case 512:
1832 return AReg_512RegClassID;
1833 case 1024:
1834 return AReg_1024RegClassID;
1835 }
1836 llvm_unreachable("Invalid register width!");
1837}
1838
1839unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1840 using namespace AMDGPU;
1841
1842 switch (Width) {
1843 case 16:
1844 case 32:
1845 return SGPR_32RegClassID;
1846 case 64:
1847 return SGPR_64RegClassID;
1848 case 96:
1849 return SGPR_96RegClassID;
1850 case 128:
1851 return SGPR_128RegClassID;
1852 case 160:
1853 return SGPR_160RegClassID;
1854 case 256:
1855 return SGPR_256RegClassID;
1856 case 288:
1857 return SGPR_288RegClassID;
1858 case 320:
1859 return SGPR_320RegClassID;
1860 case 352:
1861 return SGPR_352RegClassID;
1862 case 384:
1863 return SGPR_384RegClassID;
1864 case 512:
1865 return SGPR_512RegClassID;
1866 }
1867 llvm_unreachable("Invalid register width!");
1868}
1869
1870unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1871 using namespace AMDGPU;
1872
1873 switch (Width) {
1874 case 16:
1875 case 32:
1876 return TTMP_32RegClassID;
1877 case 64:
1878 return TTMP_64RegClassID;
1879 case 128:
1880 return TTMP_128RegClassID;
1881 case 256:
1882 return TTMP_256RegClassID;
1883 case 288:
1884 return TTMP_288RegClassID;
1885 case 320:
1886 return TTMP_320RegClassID;
1887 case 352:
1888 return TTMP_352RegClassID;
1889 case 384:
1890 return TTMP_384RegClassID;
1891 case 512:
1892 return TTMP_512RegClassID;
1893 }
1894 llvm_unreachable("Invalid register width!");
1895}
1896
1897int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1898 using namespace AMDGPU::EncValues;
1899
1900 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1901 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1902
1903 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1904}
1905
1907 unsigned Val) const {
1908 using namespace AMDGPU::EncValues;
1909
1910 assert(Val < 1024); // enum10
1911
1912 bool IsAGPR = Val & 512;
1913 Val &= 511;
1914
1915 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1916 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1917 : getVgprClassId(Width), Val - VGPR_MIN);
1918 }
1919 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1920}
1921
1923 unsigned Width,
1924 unsigned Val) const {
1925 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1926 // decoded earlier.
1927 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1928 using namespace AMDGPU::EncValues;
1929
1930 if (Val <= SGPR_MAX) {
1931 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1932 static_assert(SGPR_MIN == 0);
1933 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1934 }
1935
1936 int TTmpIdx = getTTmpIdx(Val);
1937 if (TTmpIdx >= 0) {
1938 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1939 }
1940
1941 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1942 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1943 Val == LITERAL_CONST)
1944 return MCOperand::createImm(Val);
1945
1946 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1947 return decodeLiteral64Constant();
1948 }
1949
1950 switch (Width) {
1951 case 32:
1952 case 16:
1953 return decodeSpecialReg32(Val);
1954 case 64:
1955 return decodeSpecialReg64(Val);
1956 case 96:
1957 case 128:
1958 case 256:
1959 case 512:
1960 return decodeSpecialReg96Plus(Val);
1961 default:
1962 llvm_unreachable("unexpected immediate type");
1963 }
1964}
1965
1966// Bit 0 of DstY isn't stored in the instruction, because it's always the
1967// opposite of bit 0 of DstX.
1969 unsigned Val) const {
1970 int VDstXInd =
1971 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1972 assert(VDstXInd != -1);
1973 assert(Inst.getOperand(VDstXInd).isReg());
1974 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1975 Val |= ~XDstReg & 1;
1976 return createRegOperand(getVgprClassId(32), Val);
1977}
1978
1980 using namespace AMDGPU;
1981
1982 switch (Val) {
1983 // clang-format off
1984 case 102: return createRegOperand(FLAT_SCR_LO);
1985 case 103: return createRegOperand(FLAT_SCR_HI);
1986 case 104: return createRegOperand(XNACK_MASK_LO);
1987 case 105: return createRegOperand(XNACK_MASK_HI);
1988 case 106: return createRegOperand(VCC_LO);
1989 case 107: return createRegOperand(VCC_HI);
1990 case 108: return createRegOperand(TBA_LO);
1991 case 109: return createRegOperand(TBA_HI);
1992 case 110: return createRegOperand(TMA_LO);
1993 case 111: return createRegOperand(TMA_HI);
1994 case 124:
1995 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1996 case 125:
1997 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1998 case 126: return createRegOperand(EXEC_LO);
1999 case 127: return createRegOperand(EXEC_HI);
2000 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2001 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2002 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2003 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2004 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2005 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2006 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2007 case 251: return createRegOperand(SRC_VCCZ);
2008 case 252: return createRegOperand(SRC_EXECZ);
2009 case 253: return createRegOperand(SRC_SCC);
2010 case 254: return createRegOperand(LDS_DIRECT);
2011 default: break;
2012 // clang-format on
2013 }
2014 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2015}
2016
2018 using namespace AMDGPU;
2019
2020 switch (Val) {
2021 case 102: return createRegOperand(FLAT_SCR);
2022 case 104: return createRegOperand(XNACK_MASK);
2023 case 106: return createRegOperand(VCC);
2024 case 108: return createRegOperand(TBA);
2025 case 110: return createRegOperand(TMA);
2026 case 124:
2027 if (isGFX11Plus())
2028 return createRegOperand(SGPR_NULL);
2029 break;
2030 case 125:
2031 if (!isGFX11Plus())
2032 return createRegOperand(SGPR_NULL);
2033 break;
2034 case 126: return createRegOperand(EXEC);
2035 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2036 case 235: return createRegOperand(SRC_SHARED_BASE);
2037 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2038 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2039 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2040 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2041 case 251: return createRegOperand(SRC_VCCZ);
2042 case 252: return createRegOperand(SRC_EXECZ);
2043 case 253: return createRegOperand(SRC_SCC);
2044 default: break;
2045 }
2046 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2047}
2048
2050 using namespace AMDGPU;
2051
2052 switch (Val) {
2053 case 124:
2054 if (isGFX11Plus())
2055 return createRegOperand(SGPR_NULL);
2056 break;
2057 case 125:
2058 if (!isGFX11Plus())
2059 return createRegOperand(SGPR_NULL);
2060 break;
2061 default:
2062 break;
2063 }
2064 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2065}
2066
2068 const unsigned Val) const {
2069 using namespace AMDGPU::SDWA;
2070 using namespace AMDGPU::EncValues;
2071
2072 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2073 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2074 // XXX: cast to int is needed to avoid stupid warning:
2075 // compare with unsigned is always true
2076 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2077 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2078 return createRegOperand(getVgprClassId(Width),
2079 Val - SDWA9EncValues::SRC_VGPR_MIN);
2080 }
2081 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2082 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2083 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2084 return createSRegOperand(getSgprClassId(Width),
2085 Val - SDWA9EncValues::SRC_SGPR_MIN);
2086 }
2087 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2088 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2089 return createSRegOperand(getTtmpClassId(Width),
2090 Val - SDWA9EncValues::SRC_TTMP_MIN);
2091 }
2092
2093 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2094
2095 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2096 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2097 return MCOperand::createImm(SVal);
2098
2099 return decodeSpecialReg32(SVal);
2100 }
2101 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2102 return createRegOperand(getVgprClassId(Width), Val);
2103 llvm_unreachable("unsupported target");
2104}
2105
2107 return decodeSDWASrc(16, Val);
2108}
2109
2111 return decodeSDWASrc(32, Val);
2112}
2113
2115 using namespace AMDGPU::SDWA;
2116
2117 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2118 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2119 "SDWAVopcDst should be present only on GFX9+");
2120
2121 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2122
2123 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2124 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2125
2126 int TTmpIdx = getTTmpIdx(Val);
2127 if (TTmpIdx >= 0) {
2128 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2129 return createSRegOperand(TTmpClsId, TTmpIdx);
2130 }
2131 if (Val > SGPR_MAX) {
2132 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2133 }
2134 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2135 }
2136 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2137}
2138
2140 unsigned Val) const {
2141 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2142 ? decodeSrcOp(Inst, 32, Val)
2143 : decodeSrcOp(Inst, 64, Val);
2144}
2145
2147 unsigned Val) const {
2148 return decodeSrcOp(Inst, 32, Val);
2149}
2150
2153 return MCOperand();
2154 return MCOperand::createImm(Val);
2155}
2156
2158 using VersionField = AMDGPU::EncodingField<7, 0>;
2159 using W64Bit = AMDGPU::EncodingBit<13>;
2160 using W32Bit = AMDGPU::EncodingBit<14>;
2161 using MDPBit = AMDGPU::EncodingBit<15>;
2163
2164 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2165
2166 // Decode into a plain immediate if any unused bits are raised.
2167 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2168 return MCOperand::createImm(Imm);
2169
2170 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2171 const auto *I = find_if(
2172 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2173 return V.Code == Version;
2174 });
2175 MCContext &Ctx = getContext();
2176 const MCExpr *E;
2177 if (I == Versions.end())
2179 else
2180 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2181
2182 if (W64)
2183 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2184 if (W32)
2185 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2186 if (MDP)
2187 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2188
2189 return MCOperand::createExpr(E);
2190}
2191
2193 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2194}
2195
2197
2199 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2200}
2201
2203
2205
2209
2211 return STI.hasFeature(AMDGPU::FeatureGFX11);
2212}
2213
2217
2219 return STI.hasFeature(AMDGPU::FeatureGFX12);
2220}
2221
2225
2227
2229 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2230}
2231
2235
2236//===----------------------------------------------------------------------===//
2237// AMDGPU specific symbol handling
2238//===----------------------------------------------------------------------===//
2239
2240/// Print a string describing the reserved bit range specified by Mask with
2241/// offset BaseBytes for use in error comments. Mask is a single continuous
2242/// range of 1s surrounded by zeros. The format here is meant to align with the
2243/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2244static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2245 SmallString<32> Result;
2246 raw_svector_ostream S(Result);
2247
2248 int TrailingZeros = llvm::countr_zero(Mask);
2249 int PopCount = llvm::popcount(Mask);
2250
2251 if (PopCount == 1) {
2252 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2253 } else {
2254 S << "bits in range ("
2255 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2256 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2257 }
2258
2259 return Result;
2260}
2261
2262#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2263#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2264 do { \
2265 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2266 } while (0)
2267#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2268 do { \
2269 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2270 << GET_FIELD(MASK) << '\n'; \
2271 } while (0)
2272
2273#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2274 do { \
2275 if (FourByteBuffer & (MASK)) { \
2276 return createStringError(std::errc::invalid_argument, \
2277 "kernel descriptor " DESC \
2278 " reserved %s set" MSG, \
2279 getBitRangeFromMask((MASK), 0).c_str()); \
2280 } \
2281 } while (0)
2282
2283#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2284#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2285 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2286#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2287 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2288#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2289 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2290
2291// NOLINTNEXTLINE(readability-identifier-naming)
2293 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2294 using namespace amdhsa;
2295 StringRef Indent = "\t";
2296
2297 // We cannot accurately backward compute #VGPRs used from
2298 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2299 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2300 // simply calculate the inverse of what the assembler does.
2301
2302 uint32_t GranulatedWorkitemVGPRCount =
2303 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2304
2305 uint32_t NextFreeVGPR =
2306 (GranulatedWorkitemVGPRCount + 1) *
2307 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2308
2309 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2310
2311 // We cannot backward compute values used to calculate
2312 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2313 // directives can't be computed:
2314 // .amdhsa_reserve_vcc
2315 // .amdhsa_reserve_flat_scratch
2316 // .amdhsa_reserve_xnack_mask
2317 // They take their respective default values if not specified in the assembly.
2318 //
2319 // GRANULATED_WAVEFRONT_SGPR_COUNT
2320 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2321 //
2322 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2323 // are set to 0. So while disassembling we consider that:
2324 //
2325 // GRANULATED_WAVEFRONT_SGPR_COUNT
2326 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2327 //
2328 // The disassembler cannot recover the original values of those 3 directives.
2329
2330 uint32_t GranulatedWavefrontSGPRCount =
2331 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2332
2333 if (isGFX10Plus())
2334 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2335 "must be zero on gfx10+");
2336
2337 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2339
2340 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2342 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2343 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2344 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2345 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2346 << '\n';
2347 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2348
2349 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2350
2351 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2352 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2353 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2354 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2355 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2356 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2357 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2358 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2359
2360 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2361
2362 if (!isGFX12Plus())
2363 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2364 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2365
2366 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2367
2368 if (!isGFX12Plus())
2369 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2370 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2371
2372 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2373 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2374
2375 // Bits [26].
2376 if (isGFX9Plus()) {
2377 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2378 } else {
2379 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2380 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2381 }
2382
2383 // Bits [27].
2384 if (isGFX1250()) {
2385 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2386 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2387 } else {
2388 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2389 "COMPUTE_PGM_RSRC1");
2390 }
2391
2392 // Bits [28].
2393 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2394
2395 // Bits [29-31].
2396 if (isGFX10Plus()) {
2397 // WGP_MODE is not available on GFX1250.
2398 if (!isGFX1250()) {
2399 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2400 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2401 }
2402 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2403 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2404 } else {
2405 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2406 "COMPUTE_PGM_RSRC1");
2407 }
2408
2409 if (isGFX12Plus())
2410 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2411 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2412
2413 return true;
2414}
2415
2416// NOLINTNEXTLINE(readability-identifier-naming)
2418 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2419 using namespace amdhsa;
2420 StringRef Indent = "\t";
2422 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2423 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2424 else
2425 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2426 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2427 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2428 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2429 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2430 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2431 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2432 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2433 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2434 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2435 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2436 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2437
2438 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2439 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2440 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2441
2443 ".amdhsa_exception_fp_ieee_invalid_op",
2444 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2445 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2446 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2448 ".amdhsa_exception_fp_ieee_div_zero",
2449 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2450 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2451 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2452 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2453 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2454 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2455 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2456 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2457 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2458
2459 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2460
2461 return true;
2462}
2463
2464// NOLINTNEXTLINE(readability-identifier-naming)
2466 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2467 using namespace amdhsa;
2468 StringRef Indent = "\t";
2469 if (isGFX90A()) {
2470 KdStream << Indent << ".amdhsa_accum_offset "
2471 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2472 << '\n';
2473
2474 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2475
2476 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2477 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2478 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2479 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2480 } else if (isGFX10Plus()) {
2481 // Bits [0-3].
2482 if (!isGFX12Plus()) {
2483 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2484 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2485 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2486 } else {
2488 "SHARED_VGPR_COUNT",
2489 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2490 }
2491 } else {
2492 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2493 "COMPUTE_PGM_RSRC3",
2494 "must be zero on gfx12+");
2495 }
2496
2497 // Bits [4-11].
2498 if (isGFX11()) {
2499 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2500 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2501 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2502 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2503 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2504 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2505 } else if (isGFX12Plus()) {
2506 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2507 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2508 } else {
2509 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2510 "COMPUTE_PGM_RSRC3",
2511 "must be zero on gfx10");
2512 }
2513
2514 // Bits [12].
2515 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2516 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2517
2518 // Bits [13].
2519 if (isGFX12Plus()) {
2521 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2522 } else {
2523 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2524 "COMPUTE_PGM_RSRC3",
2525 "must be zero on gfx10 or gfx11");
2526 }
2527
2528 // Bits [14-21].
2529 if (isGFX1250()) {
2530 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2531 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2533 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2535 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2537 "ENABLE_DIDT_THROTTLE",
2538 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2539 } else {
2540 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2541 "COMPUTE_PGM_RSRC3",
2542 "must be zero on gfx10+");
2543 }
2544
2545 // Bits [22-30].
2546 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2547 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2548
2549 // Bits [31].
2550 if (isGFX11Plus()) {
2552 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2553 } else {
2554 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2555 "COMPUTE_PGM_RSRC3",
2556 "must be zero on gfx10");
2557 }
2558 } else if (FourByteBuffer) {
2559 return createStringError(
2560 std::errc::invalid_argument,
2561 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2562 }
2563 return true;
2564}
2565#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2566#undef PRINT_DIRECTIVE
2567#undef GET_FIELD
2568#undef CHECK_RESERVED_BITS_IMPL
2569#undef CHECK_RESERVED_BITS
2570#undef CHECK_RESERVED_BITS_MSG
2571#undef CHECK_RESERVED_BITS_DESC
2572#undef CHECK_RESERVED_BITS_DESC_MSG
2573
2574/// Create an error object to return from onSymbolStart for reserved kernel
2575/// descriptor bits being set.
2576static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2577 const char *Msg = "") {
2578 return createStringError(
2579 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2580 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2581}
2582
2583/// Create an error object to return from onSymbolStart for reserved kernel
2584/// descriptor bytes being set.
2585static Error createReservedKDBytesError(unsigned BaseInBytes,
2586 unsigned WidthInBytes) {
2587 // Create an error comment in the same format as the "Kernel Descriptor"
2588 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2589 return createStringError(
2590 std::errc::invalid_argument,
2591 "kernel descriptor reserved bits in range (%u:%u) set",
2592 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2593}
2594
2597 raw_string_ostream &KdStream) const {
2598#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2599 do { \
2600 KdStream << Indent << DIRECTIVE " " \
2601 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2602 } while (0)
2603
2604 uint16_t TwoByteBuffer = 0;
2605 uint32_t FourByteBuffer = 0;
2606
2607 StringRef ReservedBytes;
2608 StringRef Indent = "\t";
2609
2610 assert(Bytes.size() == 64);
2611 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2612
2613 switch (Cursor.tell()) {
2615 FourByteBuffer = DE.getU32(Cursor);
2616 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2617 << '\n';
2618 return true;
2619
2621 FourByteBuffer = DE.getU32(Cursor);
2622 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2623 << FourByteBuffer << '\n';
2624 return true;
2625
2627 FourByteBuffer = DE.getU32(Cursor);
2628 KdStream << Indent << ".amdhsa_kernarg_size "
2629 << FourByteBuffer << '\n';
2630 return true;
2631
2633 // 4 reserved bytes, must be 0.
2634 ReservedBytes = DE.getBytes(Cursor, 4);
2635 for (int I = 0; I < 4; ++I) {
2636 if (ReservedBytes[I] != 0)
2638 }
2639 return true;
2640
2642 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2643 // So far no directive controls this for Code Object V3, so simply skip for
2644 // disassembly.
2645 DE.skip(Cursor, 8);
2646 return true;
2647
2649 // 20 reserved bytes, must be 0.
2650 ReservedBytes = DE.getBytes(Cursor, 20);
2651 for (int I = 0; I < 20; ++I) {
2652 if (ReservedBytes[I] != 0)
2654 }
2655 return true;
2656
2658 FourByteBuffer = DE.getU32(Cursor);
2659 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2660
2662 FourByteBuffer = DE.getU32(Cursor);
2663 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2664
2666 FourByteBuffer = DE.getU32(Cursor);
2667 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2668
2670 using namespace amdhsa;
2671 TwoByteBuffer = DE.getU16(Cursor);
2672
2674 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2675 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2676 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2677 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2678 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2679 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2680 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2681 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2682 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2683 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2685 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2686 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2687 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2688 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2689
2690 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2691 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2693
2694 // Reserved for GFX9
2695 if (isGFX9() &&
2696 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2698 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2699 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2700 }
2701 if (isGFX10Plus()) {
2702 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2703 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2704 }
2705
2706 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2707 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2708 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2709
2710 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2711 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2713 }
2714
2715 return true;
2716
2718 using namespace amdhsa;
2719 TwoByteBuffer = DE.getU16(Cursor);
2720 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2721 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2722 KERNARG_PRELOAD_SPEC_LENGTH);
2723 }
2724
2725 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2726 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2727 KERNARG_PRELOAD_SPEC_OFFSET);
2728 }
2729 return true;
2730
2732 // 4 bytes from here are reserved, must be 0.
2733 ReservedBytes = DE.getBytes(Cursor, 4);
2734 for (int I = 0; I < 4; ++I) {
2735 if (ReservedBytes[I] != 0)
2737 }
2738 return true;
2739
2740 default:
2741 llvm_unreachable("Unhandled index. Case statements cover everything.");
2742 return true;
2743 }
2744#undef PRINT_DIRECTIVE
2745}
2746
2748 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2749
2750 // CP microcode requires the kernel descriptor to be 64 aligned.
2751 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2752 return createStringError(std::errc::invalid_argument,
2753 "kernel descriptor must be 64-byte aligned");
2754
2755 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2756 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2757 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2758 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2759 // when required.
2760 if (isGFX10Plus()) {
2761 uint16_t KernelCodeProperties =
2764 EnableWavefrontSize32 =
2765 AMDHSA_BITS_GET(KernelCodeProperties,
2766 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2767 }
2768
2769 std::string Kd;
2770 raw_string_ostream KdStream(Kd);
2771 KdStream << ".amdhsa_kernel " << KdName << '\n';
2772
2774 while (C && C.tell() < Bytes.size()) {
2775 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2776
2777 cantFail(C.takeError());
2778
2779 if (!Res)
2780 return Res;
2781 }
2782 KdStream << ".end_amdhsa_kernel\n";
2783 outs() << KdStream.str();
2784 return true;
2785}
2786
2788 uint64_t &Size,
2789 ArrayRef<uint8_t> Bytes,
2790 uint64_t Address) const {
2791 // Right now only kernel descriptor needs to be handled.
2792 // We ignore all other symbols for target specific handling.
2793 // TODO:
2794 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2795 // Object V2 and V3 when symbols are marked protected.
2796
2797 // amd_kernel_code_t for Code Object V2.
2798 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2799 Size = 256;
2800 return createStringError(std::errc::invalid_argument,
2801 "code object v2 is not supported");
2802 }
2803
2804 // Code Object V3 kernel descriptors.
2805 StringRef Name = Symbol.Name;
2806 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2807 Size = 64; // Size = 64 regardless of success or failure.
2808 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2809 }
2810
2811 return false;
2812}
2813
2814const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2815 int64_t Val) {
2816 MCContext &Ctx = getContext();
2817 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2818 // Note: only set value to Val on a new symbol in case an dissassembler
2819 // has already been initialized in this context.
2820 if (!Sym->isVariable()) {
2822 } else {
2823 int64_t Res = ~Val;
2824 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2825 if (!Valid || Res != Val)
2826 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2827 }
2828 return MCSymbolRefExpr::create(Sym, Ctx);
2829}
2830
2832 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2833
2834 // Check for MUBUF and MTBUF instructions
2835 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2836 return true;
2837
2838 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2839 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2840 return true;
2841
2842 return false;
2843}
2844
2845//===----------------------------------------------------------------------===//
2846// AMDGPUSymbolizer
2847//===----------------------------------------------------------------------===//
2848
2849// Try to find symbol name for specified label
2851 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2852 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2853 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2854
2855 if (!IsBranch) {
2856 return false;
2857 }
2858
2859 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2860 if (!Symbols)
2861 return false;
2862
2863 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2864 return Val.Addr == static_cast<uint64_t>(Value) &&
2865 Val.Type == ELF::STT_NOTYPE;
2866 });
2867 if (Result != Symbols->end()) {
2868 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2869 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2871 return true;
2872 }
2873 // Add to list of referenced addresses, so caller can synthesize a label.
2874 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2875 return false;
2876}
2877
2879 int64_t Value,
2880 uint64_t Address) {
2881 llvm_unreachable("unimplemented");
2882}
2883
2884//===----------------------------------------------------------------------===//
2885// Initialization
2886//===----------------------------------------------------------------------===//
2887
2889 LLVMOpInfoCallback /*GetOpInfo*/,
2890 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2891 void *DisInfo,
2892 MCContext *Ctx,
2893 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2894 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2895}
2896
2898 const MCSubtargetInfo &STI,
2899 MCContext &Ctx) {
2900 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2901}
2902
2903extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
const T * data() const
Definition ArrayRef.h:139
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:87
uint8_t OperandType
Information about the type of the operand.
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1416
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1430
@ STT_OBJECT
Definition ELF.h:1417
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.