LLVM 22.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
517 Desc, OpDesc, OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64);
518 continue;
519 }
520
523 switch (OpDesc.OperandType) {
529 break;
536 Imm = getInlineImmValF16(Imm);
537 break;
543 Imm = getInlineImmVal64(Imm);
544 break;
545 default:
546 Imm = getInlineImmVal32(Imm);
547 }
548 Op.setImm(Imm);
549 }
550 }
551}
552
554 ArrayRef<uint8_t> Bytes_,
556 raw_ostream &CS) const {
557 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
558 Bytes = Bytes_.slice(0, MaxInstBytesNum);
559
560 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
561 // there are fewer bytes left). This will be overridden on success.
562 Size = std::min((size_t)4, Bytes_.size());
563
564 do {
565 // ToDo: better to switch encoding length using some bit predicate
566 // but it is unknown yet, so try all we can
567
568 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
569 // encodings
570 if (isGFX1250() && Bytes.size() >= 16) {
571 std::bitset<128> DecW = eat16Bytes(Bytes);
572 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
573 break;
574 Bytes = Bytes_.slice(0, MaxInstBytesNum);
575 }
576
577 if (isGFX11Plus() && Bytes.size() >= 12) {
578 std::bitset<96> DecW = eat12Bytes(Bytes);
579
580 if (isGFX11() &&
581 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
582 DecW, Address, CS))
583 break;
584
585 if (isGFX1250() &&
586 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
587 DecW, Address, CS))
588 break;
589
590 if (isGFX12() &&
591 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
592 DecW, Address, CS))
593 break;
594
595 if (isGFX12() &&
596 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
597 break;
598
599 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
600 // Return 8 bytes for a potential literal.
601 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
602
603 if (isGFX1250() &&
604 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
605 break;
606 }
607
608 // Reinitialize Bytes
609 Bytes = Bytes_.slice(0, MaxInstBytesNum);
610
611 } else if (Bytes.size() >= 16 &&
612 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
613 std::bitset<128> DecW = eat16Bytes(Bytes);
614 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
615 break;
616
617 // Reinitialize Bytes
618 Bytes = Bytes_.slice(0, MaxInstBytesNum);
619 }
620
621 if (Bytes.size() >= 8) {
622 const uint64_t QW = eatBytes<uint64_t>(Bytes);
623
624 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
625 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
626 break;
627
628 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
629 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
630 break;
631
632 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
633 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
634 break;
635
636 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
637 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
638 // table first so we print the correct name.
639 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
640 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
641 break;
642
643 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
644 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
645 break;
646
647 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
648 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
649 break;
650
651 if ((isVI() || isGFX9()) &&
652 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
653 break;
654
655 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
656 break;
657
658 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
659 break;
660
661 if (isGFX1250() &&
662 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
663 QW, Address, CS))
664 break;
665
666 if (isGFX12() &&
667 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
668 Address, CS))
669 break;
670
671 if (isGFX11() &&
672 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
673 Address, CS))
674 break;
675
676 if (isGFX11() &&
677 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
678 break;
679
680 if (isGFX12() &&
681 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
682 break;
683
684 // Reinitialize Bytes
685 Bytes = Bytes_.slice(0, MaxInstBytesNum);
686 }
687
688 // Try decode 32-bit instruction
689 if (Bytes.size() >= 4) {
690 const uint32_t DW = eatBytes<uint32_t>(Bytes);
691
692 if ((isVI() || isGFX9()) &&
693 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
694 break;
695
696 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
697 break;
698
699 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
700 break;
701
702 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
703 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
704 break;
705
706 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
707 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
708 break;
709
710 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
711 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
712 break;
713
714 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
715 break;
716
717 if (isGFX11() &&
718 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
719 Address, CS))
720 break;
721
722 if (isGFX1250() &&
723 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
724 DW, Address, CS))
725 break;
726
727 if (isGFX12() &&
728 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
729 Address, CS))
730 break;
731 }
732
734 } while (false);
735
737
738 decodeImmOperands(MI, *MCII);
739
740 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
741 if (isMacDPP(MI))
743
744 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
746 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
747 convertVOPCDPPInst(MI); // Special VOP3 case
748 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
749 convertVOPC64DPPInst(MI); // Special VOP3 case
750 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
751 -1)
753 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
754 convertVOP3DPPInst(MI); // Regular VOP3 case
755 }
756
758
759 if (AMDGPU::isMAC(MI.getOpcode())) {
760 // Insert dummy unused src2_modifiers.
762 AMDGPU::OpName::src2_modifiers);
763 }
764
765 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
766 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
767 // Insert dummy unused src2_modifiers.
769 AMDGPU::OpName::src2_modifiers);
770 }
771
772 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
774 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
775 }
776
777 if (MCII->get(MI.getOpcode()).TSFlags &
779 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
780 AMDGPU::OpName::cpol);
781 if (CPolPos != -1) {
782 unsigned CPol =
783 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
785 if (MI.getNumOperands() <= (unsigned)CPolPos) {
787 AMDGPU::OpName::cpol);
788 } else if (CPol) {
789 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
790 }
791 }
792 }
793
794 if ((MCII->get(MI.getOpcode()).TSFlags &
796 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
797 // GFX90A lost TFE, its place is occupied by ACC.
798 int TFEOpIdx =
799 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
800 if (TFEOpIdx != -1) {
801 auto *TFEIter = MI.begin();
802 std::advance(TFEIter, TFEOpIdx);
803 MI.insert(TFEIter, MCOperand::createImm(0));
804 }
805 }
806
807 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
809 int OffsetIdx =
810 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
811 if (OffsetIdx != -1) {
812 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
813 int64_t SignedOffset = SignExtend64<24>(Imm);
814 if (SignedOffset < 0)
816 }
817 }
818
819 if (MCII->get(MI.getOpcode()).TSFlags &
821 int SWZOpIdx =
822 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
823 if (SWZOpIdx != -1) {
824 auto *SWZIter = MI.begin();
825 std::advance(SWZIter, SWZOpIdx);
826 MI.insert(SWZIter, MCOperand::createImm(0));
827 }
828 }
829
830 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
831 if (Desc.TSFlags & SIInstrFlags::MIMG) {
832 int VAddr0Idx =
833 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
834 int RsrcIdx =
835 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
836 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
837 if (VAddr0Idx >= 0 && NSAArgs > 0) {
838 unsigned NSAWords = (NSAArgs + 3) / 4;
839 if (Bytes.size() < 4 * NSAWords)
841 for (unsigned i = 0; i < NSAArgs; ++i) {
842 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
843 auto VAddrRCID =
844 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
845 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
846 }
847 Bytes = Bytes.slice(4 * NSAWords);
848 }
849
851 }
852
853 if (MCII->get(MI.getOpcode()).TSFlags &
856
857 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
859
860 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
862
863 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
865
866 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
868
869 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
871
872 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
873 AMDGPU::OpName::vdst_in);
874 if (VDstIn_Idx != -1) {
875 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
877 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
878 !MI.getOperand(VDstIn_Idx).isReg() ||
879 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
880 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
881 MI.erase(&MI.getOperand(VDstIn_Idx));
883 MCOperand::createReg(MI.getOperand(Tied).getReg()),
884 AMDGPU::OpName::vdst_in);
885 }
886 }
887
888 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
889 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
891
892 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
893 // have EXEC as implicit destination. Issue a warning if encoding for
894 // vdst is not EXEC.
895 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
896 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
897 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
898 if (Bytes_[0] != ExecEncoding)
900 }
901
902 Size = MaxInstBytesNum - Bytes.size();
903 return Status;
904}
905
907 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
908 // The MCInst still has these fields even though they are no longer encoded
909 // in the GFX11 instruction.
910 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
911 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
912 }
913}
914
917 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
925 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
929 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
930 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
931 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
932 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
933 // The MCInst has this field that is not directly encoded in the
934 // instruction.
935 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
936 }
937}
938
940 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
941 STI.hasFeature(AMDGPU::FeatureGFX10)) {
942 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
943 // VOPC - insert clamp
944 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
945 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
946 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
947 if (SDst != -1) {
948 // VOPC - insert VCC register as sdst
950 AMDGPU::OpName::sdst);
951 } else {
952 // VOP1/2 - insert omod if present in instruction
953 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
954 }
955 }
956}
957
958/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
959/// appropriate subregister for the used format width.
961 MCOperand &MO, uint8_t NumRegs) {
962 switch (NumRegs) {
963 case 4:
964 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
965 case 6:
966 return MO.setReg(
967 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
968 case 8:
969 if (MCRegister NewReg = MRI.getSubReg(
970 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
971 MO.setReg(NewReg);
972 }
973 return;
974 case 12: {
975 // There is no 384-bit subreg index defined.
976 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
977 MCRegister NewReg = MRI.getMatchingSuperReg(
978 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
979 return MO.setReg(NewReg);
980 }
981 case 16:
982 // No-op in cases where one operand is still f8/bf8.
983 return;
984 default:
985 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
986 }
987}
988
989/// f8f6f4 instructions have different pseudos depending on the used formats. In
990/// the disassembler table, we only have the variants with the largest register
991/// classes which assume using an fp8/bf8 format for both operands. The actual
992/// register class depends on the format in blgp and cbsz operands. Adjust the
993/// register classes depending on the used format.
995 int BlgpIdx =
996 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
997 if (BlgpIdx == -1)
998 return;
999
1000 int CbszIdx =
1001 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1002
1003 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1004 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1005
1006 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1007 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1008 if (!AdjustedRegClassOpcode ||
1009 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1010 return;
1011
1012 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1013 int Src0Idx =
1014 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1015 int Src1Idx =
1016 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1017 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1018 AdjustedRegClassOpcode->NumRegsSrcA);
1019 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1020 AdjustedRegClassOpcode->NumRegsSrcB);
1021}
1022
1024 int FmtAIdx =
1025 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1026 if (FmtAIdx == -1)
1027 return;
1028
1029 int FmtBIdx =
1030 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1031
1032 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1033 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1034
1035 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1036 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1037 if (!AdjustedRegClassOpcode ||
1038 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1039 return;
1040
1041 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1042 int Src0Idx =
1043 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1044 int Src1Idx =
1045 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1046 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1047 AdjustedRegClassOpcode->NumRegsSrcA);
1048 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1049 AdjustedRegClassOpcode->NumRegsSrcB);
1050}
1051
1053 unsigned OpSel = 0;
1054 unsigned OpSelHi = 0;
1055 unsigned NegLo = 0;
1056 unsigned NegHi = 0;
1057};
1058
1059// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1060// Note that these values do not affect disassembler output,
1061// so this is only necessary for consistency with src_modifiers.
1063 bool IsVOP3P = false) {
1064 VOPModifiers Modifiers;
1065 unsigned Opc = MI.getOpcode();
1066 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1067 AMDGPU::OpName::src1_modifiers,
1068 AMDGPU::OpName::src2_modifiers};
1069 for (int J = 0; J < 3; ++J) {
1070 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1071 if (OpIdx == -1)
1072 continue;
1073
1074 unsigned Val = MI.getOperand(OpIdx).getImm();
1075
1076 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1077 if (IsVOP3P) {
1078 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1079 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1080 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1081 } else if (J == 0) {
1082 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1083 }
1084 }
1085
1086 return Modifiers;
1087}
1088
1089// Instructions decode the op_sel/suffix bits into the src_modifier
1090// operands. Copy those bits into the src operands for true16 VGPRs.
1092 const unsigned Opc = MI.getOpcode();
1093 const MCRegisterClass &ConversionRC =
1094 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1095 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1096 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1098 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1100 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1102 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1104 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1105 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1106 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1107 if (OpIdx == -1 || OpModsIdx == -1)
1108 continue;
1109 MCOperand &Op = MI.getOperand(OpIdx);
1110 if (!Op.isReg())
1111 continue;
1112 if (!ConversionRC.contains(Op.getReg()))
1113 continue;
1114 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1115 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1116 unsigned ModVal = OpMods.getImm();
1117 if (ModVal & OpSelMask) { // isHi
1118 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1119 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1120 }
1121 }
1122}
1123
1124// MAC opcodes have special old and src2 operands.
1125// src2 is tied to dst, while old is not tied (but assumed to be).
1127 constexpr int DST_IDX = 0;
1128 auto Opcode = MI.getOpcode();
1129 const auto &Desc = MCII->get(Opcode);
1130 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1131
1132 if (OldIdx != -1 && Desc.getOperandConstraint(
1133 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1134 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1135 assert(Desc.getOperandConstraint(
1136 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1138 (void)DST_IDX;
1139 return true;
1140 }
1141
1142 return false;
1143}
1144
1145// Create dummy old operand and insert dummy unused src2_modifiers
1147 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1148 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1150 AMDGPU::OpName::src2_modifiers);
1151}
1152
1154 unsigned Opc = MI.getOpcode();
1155
1156 int VDstInIdx =
1157 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1158 if (VDstInIdx != -1)
1159 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1160
1161 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1162 if (MI.getNumOperands() < DescNumOps &&
1163 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1165 auto Mods = collectVOPModifiers(MI);
1167 AMDGPU::OpName::op_sel);
1168 } else {
1169 // Insert dummy unused src modifiers.
1170 if (MI.getNumOperands() < DescNumOps &&
1171 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1173 AMDGPU::OpName::src0_modifiers);
1174
1175 if (MI.getNumOperands() < DescNumOps &&
1176 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1178 AMDGPU::OpName::src1_modifiers);
1179 }
1180}
1181
1184
1185 int VDstInIdx =
1186 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1187 if (VDstInIdx != -1)
1188 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1189
1190 unsigned Opc = MI.getOpcode();
1191 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1192 if (MI.getNumOperands() < DescNumOps &&
1193 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1194 auto Mods = collectVOPModifiers(MI);
1196 AMDGPU::OpName::op_sel);
1197 }
1198}
1199
1200// Given a wide tuple \p Reg check if it will overflow 256 registers.
1201// \returns \p Reg on success or NoRegister otherwise.
1202static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC,
1203 const MCRegisterInfo &MRI) {
1204 unsigned NumRegs = RC.getSizeInBits() / 32;
1205 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1206 if (!Sub0)
1207 return Reg;
1208
1209 MCRegister BaseReg;
1210 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1211 BaseReg = AMDGPU::VGPR0;
1212 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1213 BaseReg = AMDGPU::AGPR0;
1214
1215 assert(BaseReg && "Only vector registers expected");
1216
1217 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : AMDGPU::NoRegister;
1218}
1219
1220// Note that before gfx10, the MIMG encoding provided no information about
1221// VADDR size. Consequently, decoded instructions always show address as if it
1222// has 1 dword, which could be not really so.
1224 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1225
1226 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1227 AMDGPU::OpName::vdst);
1228
1229 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1230 AMDGPU::OpName::vdata);
1231 int VAddr0Idx =
1232 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1233 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1234 ? AMDGPU::OpName::srsrc
1235 : AMDGPU::OpName::rsrc;
1236 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1237 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1238 AMDGPU::OpName::dmask);
1239
1240 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1241 AMDGPU::OpName::tfe);
1242 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1243 AMDGPU::OpName::d16);
1244
1245 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1246 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1247 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1248
1249 assert(VDataIdx != -1);
1250 if (BaseOpcode->BVH) {
1251 // Add A16 operand for intersect_ray instructions
1252 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1253 return;
1254 }
1255
1256 bool IsAtomic = (VDstIdx != -1);
1257 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1258 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1259 bool IsNSA = false;
1260 bool IsPartialNSA = false;
1261 unsigned AddrSize = Info->VAddrDwords;
1262
1263 if (isGFX10Plus()) {
1264 unsigned DimIdx =
1265 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1266 int A16Idx =
1267 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1268 const AMDGPU::MIMGDimInfo *Dim =
1269 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1270 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1271
1272 AddrSize =
1273 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1274
1275 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1276 // VIMAGE insts other than BVH never use vaddr4.
1277 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1278 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1279 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1280 if (!IsNSA) {
1281 if (!IsVSample && AddrSize > 12)
1282 AddrSize = 16;
1283 } else {
1284 if (AddrSize > Info->VAddrDwords) {
1285 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1286 // The NSA encoding does not contain enough operands for the
1287 // combination of base opcode / dimension. Should this be an error?
1288 return;
1289 }
1290 IsPartialNSA = true;
1291 }
1292 }
1293 }
1294
1295 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1296 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1297
1298 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1299 if (D16 && AMDGPU::hasPackedD16(STI)) {
1300 DstSize = (DstSize + 1) / 2;
1301 }
1302
1303 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1304 DstSize += 1;
1305
1306 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1307 return;
1308
1309 int NewOpcode =
1310 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1311 if (NewOpcode == -1)
1312 return;
1313
1314 // Widen the register to the correct number of enabled channels.
1315 MCRegister NewVdata;
1316 if (DstSize != Info->VDataDwords) {
1317 auto DataRCID = MCII->getOpRegClassID(
1318 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1319
1320 // Get first subregister of VData
1321 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1322 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1323 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1324
1325 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1326 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1327 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1328 if (!NewVdata) {
1329 // It's possible to encode this such that the low register + enabled
1330 // components exceeds the register count.
1331 return;
1332 }
1333 }
1334
1335 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1336 // If using partial NSA on GFX11+ widen last address register.
1337 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1338 MCRegister NewVAddrSA;
1339 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1340 AddrSize != Info->VAddrDwords) {
1341 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1342 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1343 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1344
1345 auto AddrRCID = MCII->getOpRegClassID(
1346 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1347
1348 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1349 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1350 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1351 if (!NewVAddrSA)
1352 return;
1353 }
1354
1355 MI.setOpcode(NewOpcode);
1356
1357 if (NewVdata != AMDGPU::NoRegister) {
1358 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1359
1360 if (IsAtomic) {
1361 // Atomic operations have an additional operand (a copy of data)
1362 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1363 }
1364 }
1365
1366 if (NewVAddrSA) {
1367 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1368 } else if (IsNSA) {
1369 assert(AddrSize <= Info->VAddrDwords);
1370 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1371 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1372 }
1373}
1374
1375// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1376// decoder only adds to src_modifiers, so manually add the bits to the other
1377// operands.
1379 unsigned Opc = MI.getOpcode();
1380 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1381 auto Mods = collectVOPModifiers(MI, true);
1382
1383 if (MI.getNumOperands() < DescNumOps &&
1384 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1385 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1386
1387 if (MI.getNumOperands() < DescNumOps &&
1388 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1390 AMDGPU::OpName::op_sel);
1391 if (MI.getNumOperands() < DescNumOps &&
1392 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1394 AMDGPU::OpName::op_sel_hi);
1395 if (MI.getNumOperands() < DescNumOps &&
1396 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1398 AMDGPU::OpName::neg_lo);
1399 if (MI.getNumOperands() < DescNumOps &&
1400 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1402 AMDGPU::OpName::neg_hi);
1403}
1404
1405// Create dummy old operand and insert optional operands
1407 unsigned Opc = MI.getOpcode();
1408 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1409
1410 if (MI.getNumOperands() < DescNumOps &&
1411 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1412 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1413
1414 if (MI.getNumOperands() < DescNumOps &&
1415 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1417 AMDGPU::OpName::src0_modifiers);
1418
1419 if (MI.getNumOperands() < DescNumOps &&
1420 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1422 AMDGPU::OpName::src1_modifiers);
1423}
1424
1426 unsigned Opc = MI.getOpcode();
1427 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1428
1430
1431 if (MI.getNumOperands() < DescNumOps &&
1432 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1435 AMDGPU::OpName::op_sel);
1436 }
1437}
1438
1440 assert(HasLiteral && "Should have decoded a literal");
1441 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1442}
1443
1444const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1445 return getContext().getRegisterInfo()->
1446 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1447}
1448
1449inline
1451 const Twine& ErrMsg) const {
1452 *CommentStream << "Error: " + ErrMsg;
1453
1454 // ToDo: add support for error operands to MCInst.h
1455 // return MCOperand::createError(V);
1456 return MCOperand();
1457}
1458
1459inline
1462}
1463
1464inline
1466 unsigned Val) const {
1467 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1468 if (Val >= RegCl.getNumRegs())
1469 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1470 ": unknown register " + Twine(Val));
1471 return createRegOperand(RegCl.getRegister(Val));
1472}
1473
1474inline
1476 unsigned Val) const {
1477 // ToDo: SI/CI have 104 SGPRs, VI - 102
1478 // Valery: here we accepting as much as we can, let assembler sort it out
1479 int shift = 0;
1480 switch (SRegClassID) {
1481 case AMDGPU::SGPR_32RegClassID:
1482 case AMDGPU::TTMP_32RegClassID:
1483 break;
1484 case AMDGPU::SGPR_64RegClassID:
1485 case AMDGPU::TTMP_64RegClassID:
1486 shift = 1;
1487 break;
1488 case AMDGPU::SGPR_96RegClassID:
1489 case AMDGPU::TTMP_96RegClassID:
1490 case AMDGPU::SGPR_128RegClassID:
1491 case AMDGPU::TTMP_128RegClassID:
1492 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1493 // this bundle?
1494 case AMDGPU::SGPR_256RegClassID:
1495 case AMDGPU::TTMP_256RegClassID:
1496 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1497 // this bundle?
1498 case AMDGPU::SGPR_288RegClassID:
1499 case AMDGPU::TTMP_288RegClassID:
1500 case AMDGPU::SGPR_320RegClassID:
1501 case AMDGPU::TTMP_320RegClassID:
1502 case AMDGPU::SGPR_352RegClassID:
1503 case AMDGPU::TTMP_352RegClassID:
1504 case AMDGPU::SGPR_384RegClassID:
1505 case AMDGPU::TTMP_384RegClassID:
1506 case AMDGPU::SGPR_512RegClassID:
1507 case AMDGPU::TTMP_512RegClassID:
1508 shift = 2;
1509 break;
1510 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1511 // this bundle?
1512 default:
1513 llvm_unreachable("unhandled register class");
1514 }
1515
1516 if (Val % (1 << shift)) {
1517 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1518 << ": scalar reg isn't aligned " << Val;
1519 }
1520
1521 return createRegOperand(SRegClassID, Val >> shift);
1522}
1523
1525 bool IsHi) const {
1526 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1527 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1528}
1529
1530// Decode Literals for insts which always have a literal in the encoding
1533 if (HasLiteral) {
1534 assert(
1536 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1537 if (Literal != Val)
1538 return errOperand(Val, "More than one unique literal is illegal");
1539 }
1540 HasLiteral = true;
1541 Literal = Val;
1542 return MCOperand::createImm(Literal);
1543}
1544
1547 if (HasLiteral) {
1548 if (Literal64 != Val)
1549 return errOperand(Val, "More than one unique literal is illegal");
1550 }
1551 HasLiteral = true;
1552 Literal = Literal64 = Val;
1553
1554 bool UseLit64 = Hi_32(Literal64) == 0;
1556 LitModifier::Lit64, Literal64, getContext()))
1557 : MCOperand::createImm(Literal64);
1558}
1559
1561 const MCOperandInfo &OpDesc,
1562 bool ExtendFP64) const {
1563 // For now all literal constants are supposed to be unsigned integer
1564 // ToDo: deal with signed/unsigned 64-bit integer constants
1565 // ToDo: deal with float/double constants
1566 if (!HasLiteral) {
1567 if (Bytes.size() < 4) {
1568 return errOperand(0, "cannot read literal, inst bytes left " +
1569 Twine(Bytes.size()));
1570 }
1571 HasLiteral = true;
1572 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1573 if (ExtendFP64)
1574 Literal64 <<= 32;
1575 }
1576
1577 int64_t Val = ExtendFP64 ? Literal64 : Literal;
1578
1579 bool CanUse64BitLiterals =
1580 STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
1582
1583 bool UseLit64 = false;
1584 if (CanUse64BitLiterals) {
1587 UseLit64 = false;
1588 else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
1591 UseLit64 = Hi_32(Literal64) == 0;
1592 }
1593
1596 : MCOperand::createImm(Val);
1597}
1598
1601 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1602
1603 if (!HasLiteral) {
1604 if (Bytes.size() < 8) {
1605 return errOperand(0, "cannot read literal64, inst bytes left " +
1606 Twine(Bytes.size()));
1607 }
1608 HasLiteral = true;
1609 Literal64 = eatBytes<uint64_t>(Bytes);
1610 }
1611
1612 bool UseLit64 = false;
1613 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
1614 const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()];
1617 UseLit64 = false;
1618 } else {
1622 UseLit64 = Hi_32(Literal64) == 0;
1623 }
1624
1626 LitModifier::Lit64, Literal64, getContext()))
1627 : MCOperand::createImm(Literal64);
1628}
1629
1631 using namespace AMDGPU::EncValues;
1632
1633 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1634 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1635 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1636 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1637 // Cast prevents negative overflow.
1638}
1639
1640static int64_t getInlineImmVal32(unsigned Imm) {
1641 switch (Imm) {
1642 case 240:
1643 return llvm::bit_cast<uint32_t>(0.5f);
1644 case 241:
1645 return llvm::bit_cast<uint32_t>(-0.5f);
1646 case 242:
1647 return llvm::bit_cast<uint32_t>(1.0f);
1648 case 243:
1649 return llvm::bit_cast<uint32_t>(-1.0f);
1650 case 244:
1651 return llvm::bit_cast<uint32_t>(2.0f);
1652 case 245:
1653 return llvm::bit_cast<uint32_t>(-2.0f);
1654 case 246:
1655 return llvm::bit_cast<uint32_t>(4.0f);
1656 case 247:
1657 return llvm::bit_cast<uint32_t>(-4.0f);
1658 case 248: // 1 / (2 * PI)
1659 return 0x3e22f983;
1660 default:
1661 llvm_unreachable("invalid fp inline imm");
1662 }
1663}
1664
1665static int64_t getInlineImmVal64(unsigned Imm) {
1666 switch (Imm) {
1667 case 240:
1668 return llvm::bit_cast<uint64_t>(0.5);
1669 case 241:
1670 return llvm::bit_cast<uint64_t>(-0.5);
1671 case 242:
1672 return llvm::bit_cast<uint64_t>(1.0);
1673 case 243:
1674 return llvm::bit_cast<uint64_t>(-1.0);
1675 case 244:
1676 return llvm::bit_cast<uint64_t>(2.0);
1677 case 245:
1678 return llvm::bit_cast<uint64_t>(-2.0);
1679 case 246:
1680 return llvm::bit_cast<uint64_t>(4.0);
1681 case 247:
1682 return llvm::bit_cast<uint64_t>(-4.0);
1683 case 248: // 1 / (2 * PI)
1684 return 0x3fc45f306dc9c882;
1685 default:
1686 llvm_unreachable("invalid fp inline imm");
1687 }
1688}
1689
1690static int64_t getInlineImmValF16(unsigned Imm) {
1691 switch (Imm) {
1692 case 240:
1693 return 0x3800;
1694 case 241:
1695 return 0xB800;
1696 case 242:
1697 return 0x3C00;
1698 case 243:
1699 return 0xBC00;
1700 case 244:
1701 return 0x4000;
1702 case 245:
1703 return 0xC000;
1704 case 246:
1705 return 0x4400;
1706 case 247:
1707 return 0xC400;
1708 case 248: // 1 / (2 * PI)
1709 return 0x3118;
1710 default:
1711 llvm_unreachable("invalid fp inline imm");
1712 }
1713}
1714
1715static int64_t getInlineImmValBF16(unsigned Imm) {
1716 switch (Imm) {
1717 case 240:
1718 return 0x3F00;
1719 case 241:
1720 return 0xBF00;
1721 case 242:
1722 return 0x3F80;
1723 case 243:
1724 return 0xBF80;
1725 case 244:
1726 return 0x4000;
1727 case 245:
1728 return 0xC000;
1729 case 246:
1730 return 0x4080;
1731 case 247:
1732 return 0xC080;
1733 case 248: // 1 / (2 * PI)
1734 return 0x3E22;
1735 default:
1736 llvm_unreachable("invalid fp inline imm");
1737 }
1738}
1739
1740unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1741 using namespace AMDGPU;
1742
1743 switch (Width) {
1744 case 16:
1745 case 32:
1746 return VGPR_32RegClassID;
1747 case 64:
1748 return VReg_64RegClassID;
1749 case 96:
1750 return VReg_96RegClassID;
1751 case 128:
1752 return VReg_128RegClassID;
1753 case 160:
1754 return VReg_160RegClassID;
1755 case 192:
1756 return VReg_192RegClassID;
1757 case 256:
1758 return VReg_256RegClassID;
1759 case 288:
1760 return VReg_288RegClassID;
1761 case 320:
1762 return VReg_320RegClassID;
1763 case 352:
1764 return VReg_352RegClassID;
1765 case 384:
1766 return VReg_384RegClassID;
1767 case 512:
1768 return VReg_512RegClassID;
1769 case 1024:
1770 return VReg_1024RegClassID;
1771 }
1772 llvm_unreachable("Invalid register width!");
1773}
1774
1775unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1776 using namespace AMDGPU;
1777
1778 switch (Width) {
1779 case 16:
1780 case 32:
1781 return AGPR_32RegClassID;
1782 case 64:
1783 return AReg_64RegClassID;
1784 case 96:
1785 return AReg_96RegClassID;
1786 case 128:
1787 return AReg_128RegClassID;
1788 case 160:
1789 return AReg_160RegClassID;
1790 case 256:
1791 return AReg_256RegClassID;
1792 case 288:
1793 return AReg_288RegClassID;
1794 case 320:
1795 return AReg_320RegClassID;
1796 case 352:
1797 return AReg_352RegClassID;
1798 case 384:
1799 return AReg_384RegClassID;
1800 case 512:
1801 return AReg_512RegClassID;
1802 case 1024:
1803 return AReg_1024RegClassID;
1804 }
1805 llvm_unreachable("Invalid register width!");
1806}
1807
1808unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1809 using namespace AMDGPU;
1810
1811 switch (Width) {
1812 case 16:
1813 case 32:
1814 return SGPR_32RegClassID;
1815 case 64:
1816 return SGPR_64RegClassID;
1817 case 96:
1818 return SGPR_96RegClassID;
1819 case 128:
1820 return SGPR_128RegClassID;
1821 case 160:
1822 return SGPR_160RegClassID;
1823 case 256:
1824 return SGPR_256RegClassID;
1825 case 288:
1826 return SGPR_288RegClassID;
1827 case 320:
1828 return SGPR_320RegClassID;
1829 case 352:
1830 return SGPR_352RegClassID;
1831 case 384:
1832 return SGPR_384RegClassID;
1833 case 512:
1834 return SGPR_512RegClassID;
1835 }
1836 llvm_unreachable("Invalid register width!");
1837}
1838
1839unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1840 using namespace AMDGPU;
1841
1842 switch (Width) {
1843 case 16:
1844 case 32:
1845 return TTMP_32RegClassID;
1846 case 64:
1847 return TTMP_64RegClassID;
1848 case 128:
1849 return TTMP_128RegClassID;
1850 case 256:
1851 return TTMP_256RegClassID;
1852 case 288:
1853 return TTMP_288RegClassID;
1854 case 320:
1855 return TTMP_320RegClassID;
1856 case 352:
1857 return TTMP_352RegClassID;
1858 case 384:
1859 return TTMP_384RegClassID;
1860 case 512:
1861 return TTMP_512RegClassID;
1862 }
1863 llvm_unreachable("Invalid register width!");
1864}
1865
1866int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1867 using namespace AMDGPU::EncValues;
1868
1869 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1870 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1871
1872 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1873}
1874
1876 unsigned Val) const {
1877 using namespace AMDGPU::EncValues;
1878
1879 assert(Val < 1024); // enum10
1880
1881 bool IsAGPR = Val & 512;
1882 Val &= 511;
1883
1884 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1885 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1886 : getVgprClassId(Width), Val - VGPR_MIN);
1887 }
1888 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1889}
1890
1892 unsigned Width,
1893 unsigned Val) const {
1894 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1895 // decoded earlier.
1896 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1897 using namespace AMDGPU::EncValues;
1898
1899 if (Val <= SGPR_MAX) {
1900 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1901 static_assert(SGPR_MIN == 0);
1902 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1903 }
1904
1905 int TTmpIdx = getTTmpIdx(Val);
1906 if (TTmpIdx >= 0) {
1907 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1908 }
1909
1910 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1911 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1912 Val == LITERAL_CONST)
1913 return MCOperand::createImm(Val);
1914
1915 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1916 return decodeLiteral64Constant(Inst);
1917 }
1918
1919 switch (Width) {
1920 case 32:
1921 case 16:
1922 return decodeSpecialReg32(Val);
1923 case 64:
1924 return decodeSpecialReg64(Val);
1925 case 96:
1926 case 128:
1927 case 256:
1928 case 512:
1929 return decodeSpecialReg96Plus(Val);
1930 default:
1931 llvm_unreachable("unexpected immediate type");
1932 }
1933}
1934
1935// Bit 0 of DstY isn't stored in the instruction, because it's always the
1936// opposite of bit 0 of DstX.
1938 unsigned Val) const {
1939 int VDstXInd =
1940 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1941 assert(VDstXInd != -1);
1942 assert(Inst.getOperand(VDstXInd).isReg());
1943 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1944 Val |= ~XDstReg & 1;
1945 return createRegOperand(getVgprClassId(32), Val);
1946}
1947
1949 using namespace AMDGPU;
1950
1951 switch (Val) {
1952 // clang-format off
1953 case 102: return createRegOperand(FLAT_SCR_LO);
1954 case 103: return createRegOperand(FLAT_SCR_HI);
1955 case 104: return createRegOperand(XNACK_MASK_LO);
1956 case 105: return createRegOperand(XNACK_MASK_HI);
1957 case 106: return createRegOperand(VCC_LO);
1958 case 107: return createRegOperand(VCC_HI);
1959 case 108: return createRegOperand(TBA_LO);
1960 case 109: return createRegOperand(TBA_HI);
1961 case 110: return createRegOperand(TMA_LO);
1962 case 111: return createRegOperand(TMA_HI);
1963 case 124:
1964 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1965 case 125:
1966 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1967 case 126: return createRegOperand(EXEC_LO);
1968 case 127: return createRegOperand(EXEC_HI);
1969 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
1970 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
1971 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1972 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1973 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1974 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1975 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1976 case 251: return createRegOperand(SRC_VCCZ);
1977 case 252: return createRegOperand(SRC_EXECZ);
1978 case 253: return createRegOperand(SRC_SCC);
1979 case 254: return createRegOperand(LDS_DIRECT);
1980 default: break;
1981 // clang-format on
1982 }
1983 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1984}
1985
1987 using namespace AMDGPU;
1988
1989 switch (Val) {
1990 case 102: return createRegOperand(FLAT_SCR);
1991 case 104: return createRegOperand(XNACK_MASK);
1992 case 106: return createRegOperand(VCC);
1993 case 108: return createRegOperand(TBA);
1994 case 110: return createRegOperand(TMA);
1995 case 124:
1996 if (isGFX11Plus())
1997 return createRegOperand(SGPR_NULL);
1998 break;
1999 case 125:
2000 if (!isGFX11Plus())
2001 return createRegOperand(SGPR_NULL);
2002 break;
2003 case 126: return createRegOperand(EXEC);
2004 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2005 case 235: return createRegOperand(SRC_SHARED_BASE);
2006 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2007 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2008 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2009 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2010 case 251: return createRegOperand(SRC_VCCZ);
2011 case 252: return createRegOperand(SRC_EXECZ);
2012 case 253: return createRegOperand(SRC_SCC);
2013 default: break;
2014 }
2015 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2016}
2017
2019 using namespace AMDGPU;
2020
2021 switch (Val) {
2022 case 124:
2023 if (isGFX11Plus())
2024 return createRegOperand(SGPR_NULL);
2025 break;
2026 case 125:
2027 if (!isGFX11Plus())
2028 return createRegOperand(SGPR_NULL);
2029 break;
2030 default:
2031 break;
2032 }
2033 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2034}
2035
2037 const unsigned Val) const {
2038 using namespace AMDGPU::SDWA;
2039 using namespace AMDGPU::EncValues;
2040
2041 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2042 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2043 // XXX: cast to int is needed to avoid stupid warning:
2044 // compare with unsigned is always true
2045 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2046 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2047 return createRegOperand(getVgprClassId(Width),
2048 Val - SDWA9EncValues::SRC_VGPR_MIN);
2049 }
2050 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2051 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2052 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2053 return createSRegOperand(getSgprClassId(Width),
2054 Val - SDWA9EncValues::SRC_SGPR_MIN);
2055 }
2056 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2057 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2058 return createSRegOperand(getTtmpClassId(Width),
2059 Val - SDWA9EncValues::SRC_TTMP_MIN);
2060 }
2061
2062 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2063
2064 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2065 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2066 return MCOperand::createImm(SVal);
2067
2068 return decodeSpecialReg32(SVal);
2069 }
2070 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2071 return createRegOperand(getVgprClassId(Width), Val);
2072 llvm_unreachable("unsupported target");
2073}
2074
2076 return decodeSDWASrc(16, Val);
2077}
2078
2080 return decodeSDWASrc(32, Val);
2081}
2082
2084 using namespace AMDGPU::SDWA;
2085
2086 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2087 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2088 "SDWAVopcDst should be present only on GFX9+");
2089
2090 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2091
2092 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2093 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2094
2095 int TTmpIdx = getTTmpIdx(Val);
2096 if (TTmpIdx >= 0) {
2097 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2098 return createSRegOperand(TTmpClsId, TTmpIdx);
2099 }
2100 if (Val > SGPR_MAX) {
2101 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2102 }
2103 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2104 }
2105 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2106}
2107
2109 unsigned Val) const {
2110 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2111 ? decodeSrcOp(Inst, 32, Val)
2112 : decodeSrcOp(Inst, 64, Val);
2113}
2114
2116 unsigned Val) const {
2117 return decodeSrcOp(Inst, 32, Val);
2118}
2119
2122 return MCOperand();
2123 return MCOperand::createImm(Val);
2124}
2125
2127 using VersionField = AMDGPU::EncodingField<7, 0>;
2128 using W64Bit = AMDGPU::EncodingBit<13>;
2129 using W32Bit = AMDGPU::EncodingBit<14>;
2130 using MDPBit = AMDGPU::EncodingBit<15>;
2132
2133 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2134
2135 // Decode into a plain immediate if any unused bits are raised.
2136 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2137 return MCOperand::createImm(Imm);
2138
2139 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2140 const auto *I = find_if(
2141 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2142 return V.Code == Version;
2143 });
2144 MCContext &Ctx = getContext();
2145 const MCExpr *E;
2146 if (I == Versions.end())
2148 else
2149 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2150
2151 if (W64)
2152 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2153 if (W32)
2154 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2155 if (MDP)
2156 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2157
2158 return MCOperand::createExpr(E);
2159}
2160
2162 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2163}
2164
2166
2168 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2169}
2170
2172
2174
2178
2180 return STI.hasFeature(AMDGPU::FeatureGFX11);
2181}
2182
2186
2188 return STI.hasFeature(AMDGPU::FeatureGFX12);
2189}
2190
2194
2196
2198 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2199}
2200
2204
2205//===----------------------------------------------------------------------===//
2206// AMDGPU specific symbol handling
2207//===----------------------------------------------------------------------===//
2208
2209/// Print a string describing the reserved bit range specified by Mask with
2210/// offset BaseBytes for use in error comments. Mask is a single continuous
2211/// range of 1s surrounded by zeros. The format here is meant to align with the
2212/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2213static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2214 SmallString<32> Result;
2215 raw_svector_ostream S(Result);
2216
2217 int TrailingZeros = llvm::countr_zero(Mask);
2218 int PopCount = llvm::popcount(Mask);
2219
2220 if (PopCount == 1) {
2221 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2222 } else {
2223 S << "bits in range ("
2224 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2225 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2226 }
2227
2228 return Result;
2229}
2230
2231#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2232#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2233 do { \
2234 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2235 } while (0)
2236#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2237 do { \
2238 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2239 << GET_FIELD(MASK) << '\n'; \
2240 } while (0)
2241
2242#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2243 do { \
2244 if (FourByteBuffer & (MASK)) { \
2245 return createStringError(std::errc::invalid_argument, \
2246 "kernel descriptor " DESC \
2247 " reserved %s set" MSG, \
2248 getBitRangeFromMask((MASK), 0).c_str()); \
2249 } \
2250 } while (0)
2251
2252#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2253#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2254 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2255#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2256 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2257#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2258 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2259
2260// NOLINTNEXTLINE(readability-identifier-naming)
2262 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2263 using namespace amdhsa;
2264 StringRef Indent = "\t";
2265
2266 // We cannot accurately backward compute #VGPRs used from
2267 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2268 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2269 // simply calculate the inverse of what the assembler does.
2270
2271 uint32_t GranulatedWorkitemVGPRCount =
2272 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2273
2274 uint32_t NextFreeVGPR =
2275 (GranulatedWorkitemVGPRCount + 1) *
2276 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2277
2278 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2279
2280 // We cannot backward compute values used to calculate
2281 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2282 // directives can't be computed:
2283 // .amdhsa_reserve_vcc
2284 // .amdhsa_reserve_flat_scratch
2285 // .amdhsa_reserve_xnack_mask
2286 // They take their respective default values if not specified in the assembly.
2287 //
2288 // GRANULATED_WAVEFRONT_SGPR_COUNT
2289 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2290 //
2291 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2292 // are set to 0. So while disassembling we consider that:
2293 //
2294 // GRANULATED_WAVEFRONT_SGPR_COUNT
2295 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2296 //
2297 // The disassembler cannot recover the original values of those 3 directives.
2298
2299 uint32_t GranulatedWavefrontSGPRCount =
2300 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2301
2302 if (isGFX10Plus())
2303 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2304 "must be zero on gfx10+");
2305
2306 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2308
2309 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2311 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2312 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2313 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2314 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2315 << '\n';
2316 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2317
2318 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2319
2320 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2321 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2322 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2323 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2324 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2325 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2326 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2327 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2328
2329 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2330
2331 if (!isGFX12Plus())
2332 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2333 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2334
2335 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2336
2337 if (!isGFX12Plus())
2338 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2339 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2340
2341 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2342 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2343
2344 // Bits [26].
2345 if (isGFX9Plus()) {
2346 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2347 } else {
2348 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2349 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2350 }
2351
2352 // Bits [27].
2353 if (isGFX1250()) {
2354 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2355 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2356 } else {
2357 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2358 "COMPUTE_PGM_RSRC1");
2359 }
2360
2361 // Bits [28].
2362 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2363
2364 // Bits [29-31].
2365 if (isGFX10Plus()) {
2366 // WGP_MODE is not available on GFX1250.
2367 if (!isGFX1250()) {
2368 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2369 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2370 }
2371 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2372 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2373 } else {
2374 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2375 "COMPUTE_PGM_RSRC1");
2376 }
2377
2378 if (isGFX12Plus())
2379 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2380 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2381
2382 return true;
2383}
2384
2385// NOLINTNEXTLINE(readability-identifier-naming)
2387 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2388 using namespace amdhsa;
2389 StringRef Indent = "\t";
2391 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2392 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2393 else
2394 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2395 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2396 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2397 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2398 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2399 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2400 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2401 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2402 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2403 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2404 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2405 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2406
2407 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2408 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2409 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2410
2412 ".amdhsa_exception_fp_ieee_invalid_op",
2413 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2414 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2415 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2417 ".amdhsa_exception_fp_ieee_div_zero",
2418 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2419 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2420 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2421 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2422 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2423 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2424 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2425 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2426 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2427
2428 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2429
2430 return true;
2431}
2432
2433// NOLINTNEXTLINE(readability-identifier-naming)
2435 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2436 using namespace amdhsa;
2437 StringRef Indent = "\t";
2438 if (isGFX90A()) {
2439 KdStream << Indent << ".amdhsa_accum_offset "
2440 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2441 << '\n';
2442
2443 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2444
2445 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2446 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2447 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2448 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2449 } else if (isGFX10Plus()) {
2450 // Bits [0-3].
2451 if (!isGFX12Plus()) {
2452 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2453 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2454 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2455 } else {
2457 "SHARED_VGPR_COUNT",
2458 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2459 }
2460 } else {
2461 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2462 "COMPUTE_PGM_RSRC3",
2463 "must be zero on gfx12+");
2464 }
2465
2466 // Bits [4-11].
2467 if (isGFX11()) {
2468 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2469 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2470 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2471 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2472 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2473 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2474 } else if (isGFX12Plus()) {
2475 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2476 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2477 } else {
2478 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2479 "COMPUTE_PGM_RSRC3",
2480 "must be zero on gfx10");
2481 }
2482
2483 // Bits [12].
2484 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2485 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2486
2487 // Bits [13].
2488 if (isGFX12Plus()) {
2490 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2491 } else {
2492 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2493 "COMPUTE_PGM_RSRC3",
2494 "must be zero on gfx10 or gfx11");
2495 }
2496
2497 // Bits [14-21].
2498 if (isGFX1250()) {
2499 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2500 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2502 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2504 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2506 "ENABLE_DIDT_THROTTLE",
2507 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2508 } else {
2509 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2510 "COMPUTE_PGM_RSRC3",
2511 "must be zero on gfx10+");
2512 }
2513
2514 // Bits [22-30].
2515 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2516 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2517
2518 // Bits [31].
2519 if (isGFX11Plus()) {
2521 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2522 } else {
2523 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2524 "COMPUTE_PGM_RSRC3",
2525 "must be zero on gfx10");
2526 }
2527 } else if (FourByteBuffer) {
2528 return createStringError(
2529 std::errc::invalid_argument,
2530 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2531 }
2532 return true;
2533}
2534#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2535#undef PRINT_DIRECTIVE
2536#undef GET_FIELD
2537#undef CHECK_RESERVED_BITS_IMPL
2538#undef CHECK_RESERVED_BITS
2539#undef CHECK_RESERVED_BITS_MSG
2540#undef CHECK_RESERVED_BITS_DESC
2541#undef CHECK_RESERVED_BITS_DESC_MSG
2542
2543/// Create an error object to return from onSymbolStart for reserved kernel
2544/// descriptor bits being set.
2545static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2546 const char *Msg = "") {
2547 return createStringError(
2548 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2549 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2550}
2551
2552/// Create an error object to return from onSymbolStart for reserved kernel
2553/// descriptor bytes being set.
2554static Error createReservedKDBytesError(unsigned BaseInBytes,
2555 unsigned WidthInBytes) {
2556 // Create an error comment in the same format as the "Kernel Descriptor"
2557 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2558 return createStringError(
2559 std::errc::invalid_argument,
2560 "kernel descriptor reserved bits in range (%u:%u) set",
2561 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2562}
2563
2566 raw_string_ostream &KdStream) const {
2567#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2568 do { \
2569 KdStream << Indent << DIRECTIVE " " \
2570 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2571 } while (0)
2572
2573 uint16_t TwoByteBuffer = 0;
2574 uint32_t FourByteBuffer = 0;
2575
2576 StringRef ReservedBytes;
2577 StringRef Indent = "\t";
2578
2579 assert(Bytes.size() == 64);
2580 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2581
2582 switch (Cursor.tell()) {
2584 FourByteBuffer = DE.getU32(Cursor);
2585 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2586 << '\n';
2587 return true;
2588
2590 FourByteBuffer = DE.getU32(Cursor);
2591 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2592 << FourByteBuffer << '\n';
2593 return true;
2594
2596 FourByteBuffer = DE.getU32(Cursor);
2597 KdStream << Indent << ".amdhsa_kernarg_size "
2598 << FourByteBuffer << '\n';
2599 return true;
2600
2602 // 4 reserved bytes, must be 0.
2603 ReservedBytes = DE.getBytes(Cursor, 4);
2604 for (int I = 0; I < 4; ++I) {
2605 if (ReservedBytes[I] != 0)
2607 }
2608 return true;
2609
2611 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2612 // So far no directive controls this for Code Object V3, so simply skip for
2613 // disassembly.
2614 DE.skip(Cursor, 8);
2615 return true;
2616
2618 // 20 reserved bytes, must be 0.
2619 ReservedBytes = DE.getBytes(Cursor, 20);
2620 for (int I = 0; I < 20; ++I) {
2621 if (ReservedBytes[I] != 0)
2623 }
2624 return true;
2625
2627 FourByteBuffer = DE.getU32(Cursor);
2628 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2629
2631 FourByteBuffer = DE.getU32(Cursor);
2632 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2633
2635 FourByteBuffer = DE.getU32(Cursor);
2636 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2637
2639 using namespace amdhsa;
2640 TwoByteBuffer = DE.getU16(Cursor);
2641
2643 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2644 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2645 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2646 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2647 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2648 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2649 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2650 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2651 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2652 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2654 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2655 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2656 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2657 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2658
2659 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2660 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2662
2663 // Reserved for GFX9
2664 if (isGFX9() &&
2665 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2667 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2668 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2669 }
2670 if (isGFX10Plus()) {
2671 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2672 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2673 }
2674
2675 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2676 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2677 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2678
2679 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2680 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2682 }
2683
2684 return true;
2685
2687 using namespace amdhsa;
2688 TwoByteBuffer = DE.getU16(Cursor);
2689 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2690 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2691 KERNARG_PRELOAD_SPEC_LENGTH);
2692 }
2693
2694 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2695 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2696 KERNARG_PRELOAD_SPEC_OFFSET);
2697 }
2698 return true;
2699
2701 // 4 bytes from here are reserved, must be 0.
2702 ReservedBytes = DE.getBytes(Cursor, 4);
2703 for (int I = 0; I < 4; ++I) {
2704 if (ReservedBytes[I] != 0)
2706 }
2707 return true;
2708
2709 default:
2710 llvm_unreachable("Unhandled index. Case statements cover everything.");
2711 return true;
2712 }
2713#undef PRINT_DIRECTIVE
2714}
2715
2717 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2718
2719 // CP microcode requires the kernel descriptor to be 64 aligned.
2720 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2721 return createStringError(std::errc::invalid_argument,
2722 "kernel descriptor must be 64-byte aligned");
2723
2724 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2725 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2726 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2727 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2728 // when required.
2729 if (isGFX10Plus()) {
2730 uint16_t KernelCodeProperties =
2733 EnableWavefrontSize32 =
2734 AMDHSA_BITS_GET(KernelCodeProperties,
2735 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2736 }
2737
2738 std::string Kd;
2739 raw_string_ostream KdStream(Kd);
2740 KdStream << ".amdhsa_kernel " << KdName << '\n';
2741
2743 while (C && C.tell() < Bytes.size()) {
2744 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2745
2746 cantFail(C.takeError());
2747
2748 if (!Res)
2749 return Res;
2750 }
2751 KdStream << ".end_amdhsa_kernel\n";
2752 outs() << KdStream.str();
2753 return true;
2754}
2755
2757 uint64_t &Size,
2758 ArrayRef<uint8_t> Bytes,
2759 uint64_t Address) const {
2760 // Right now only kernel descriptor needs to be handled.
2761 // We ignore all other symbols for target specific handling.
2762 // TODO:
2763 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2764 // Object V2 and V3 when symbols are marked protected.
2765
2766 // amd_kernel_code_t for Code Object V2.
2767 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2768 Size = 256;
2769 return createStringError(std::errc::invalid_argument,
2770 "code object v2 is not supported");
2771 }
2772
2773 // Code Object V3 kernel descriptors.
2774 StringRef Name = Symbol.Name;
2775 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2776 Size = 64; // Size = 64 regardless of success or failure.
2777 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2778 }
2779
2780 return false;
2781}
2782
2783const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2784 int64_t Val) {
2785 MCContext &Ctx = getContext();
2786 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2787 // Note: only set value to Val on a new symbol in case an dissassembler
2788 // has already been initialized in this context.
2789 if (!Sym->isVariable()) {
2791 } else {
2792 int64_t Res = ~Val;
2793 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2794 if (!Valid || Res != Val)
2795 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2796 }
2797 return MCSymbolRefExpr::create(Sym, Ctx);
2798}
2799
2801 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2802
2803 // Check for MUBUF and MTBUF instructions
2804 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2805 return true;
2806
2807 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2808 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2809 return true;
2810
2811 return false;
2812}
2813
2814//===----------------------------------------------------------------------===//
2815// AMDGPUSymbolizer
2816//===----------------------------------------------------------------------===//
2817
2818// Try to find symbol name for specified label
2820 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2821 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2822 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2823
2824 if (!IsBranch) {
2825 return false;
2826 }
2827
2828 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2829 if (!Symbols)
2830 return false;
2831
2832 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2833 return Val.Addr == static_cast<uint64_t>(Value) &&
2834 Val.Type == ELF::STT_NOTYPE;
2835 });
2836 if (Result != Symbols->end()) {
2837 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2838 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2840 return true;
2841 }
2842 // Add to list of referenced addresses, so caller can synthesize a label.
2843 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2844 return false;
2845}
2846
2848 int64_t Value,
2849 uint64_t Address) {
2850 llvm_unreachable("unimplemented");
2851}
2852
2853//===----------------------------------------------------------------------===//
2854// Initialization
2855//===----------------------------------------------------------------------===//
2856
2858 LLVMOpInfoCallback /*GetOpInfo*/,
2859 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2860 void *DisInfo,
2861 MCContext *Ctx,
2862 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2863 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2864}
2865
2867 const MCSubtargetInfo &STI,
2868 MCContext &Ctx) {
2869 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2870}
2871
2872extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeLiteral64Constant(const MCInst &Inst) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc, bool ExtendFP64) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
const T * data() const
Definition ArrayRef.h:144
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:87
uint8_t OperandType
Information about the type of the operand.
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:23
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1411
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1425
@ STT_OBJECT
Definition ELF.h:1412
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.