LLVM 19.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCExpr.h"
32#include "llvm/MC/MCInstrDesc.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "amdgpu-disassembler"
41
42#define SGPR_MAX \
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
45
47
49 MCContext &Ctx, MCInstrInfo const *MCII)
50 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
51 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
52 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
53 // ToDo: AMDGPUDisassembler supports only VI ISA.
54 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
55 report_fatal_error("Disassembly not yet supported for subtarget");
56
57 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
58 createConstantSymbolExpr(Symbol, Code);
59
60 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
61 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
62 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
63}
64
67}
68
70addOperand(MCInst &Inst, const MCOperand& Opnd) {
71 Inst.addOperand(Opnd);
72 return Opnd.isValid() ?
75}
76
78 uint16_t NameIdx) {
79 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
80 if (OpIdx != -1) {
81 auto I = MI.begin();
82 std::advance(I, OpIdx);
83 MI.insert(I, Op);
84 }
85 return OpIdx;
86}
87
88static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
90 const MCDisassembler *Decoder) {
91 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
92
93 // Our branches take a simm16, but we need two extra bits to account for the
94 // factor of 4.
95 APInt SignedOffset(18, Imm * 4, true);
96 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
97
98 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
100 return addOperand(Inst, MCOperand::createImm(Imm));
101}
102
104 const MCDisassembler *Decoder) {
105 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
106 int64_t Offset;
107 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
108 Offset = SignExtend64<24>(Imm);
109 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
110 Offset = Imm & 0xFFFFF;
111 } else { // GFX9+ supports 21-bit signed offsets.
112 Offset = SignExtend64<21>(Imm);
113 }
115}
116
117static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
118 const MCDisassembler *Decoder) {
119 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
120 return addOperand(Inst, DAsm->decodeBoolReg(Val));
121}
122
123static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
125 const MCDisassembler *Decoder) {
126 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
127 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
128}
129
130static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
131 const MCDisassembler *Decoder) {
132 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
133 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
134}
135
136#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
137 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
138 uint64_t /*Addr*/, \
139 const MCDisassembler *Decoder) { \
140 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
141 return addOperand(Inst, DAsm->DecoderName(Imm)); \
142 }
143
144// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
145// number of register. Used by VGPR only and AGPR only operands.
146#define DECODE_OPERAND_REG_8(RegClass) \
147 static DecodeStatus Decode##RegClass##RegisterClass( \
148 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
149 const MCDisassembler *Decoder) { \
150 assert(Imm < (1 << 8) && "8-bit encoding"); \
151 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
152 return addOperand( \
153 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
154 }
155
156#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
157 ImmWidth) \
158 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
159 const MCDisassembler *Decoder) { \
160 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
161 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
162 return addOperand(Inst, \
163 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
164 MandatoryLiteral, ImmWidth)); \
165 }
166
167static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
169 unsigned Imm, unsigned EncImm,
170 bool MandatoryLiteral, unsigned ImmWidth,
172 const MCDisassembler *Decoder) {
173 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
174 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
175 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
176 ImmWidth, Sema));
177}
178
179// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
180// get register class. Used by SGPR only operands.
181#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
182 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
183
184// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
185// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
186// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
187// Used by AV_ register classes (AGPR or VGPR only register operands).
188template <AMDGPUDisassembler::OpWidthTy OpWidth>
189static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
190 const MCDisassembler *Decoder) {
191 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
192 false, 0, AMDGPU::OperandSemantics::INT, Decoder);
193}
194
195// Decoder for Src(9-bit encoding) registers only.
196template <AMDGPUDisassembler::OpWidthTy OpWidth>
197static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
198 uint64_t /* Addr */,
199 const MCDisassembler *Decoder) {
200 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
202}
203
204// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
205// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
206// only.
207template <AMDGPUDisassembler::OpWidthTy OpWidth>
208static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
209 const MCDisassembler *Decoder) {
210 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
212}
213
214// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
215// Imm{9} is acc, registers only.
216template <AMDGPUDisassembler::OpWidthTy OpWidth>
217static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
218 uint64_t /* Addr */,
219 const MCDisassembler *Decoder) {
220 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
222}
223
224// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
225// register from RegClass or immediate. Registers that don't belong to RegClass
226// will be decoded and InstPrinter will report warning. Immediate will be
227// decoded into constant of size ImmWidth, should match width of immediate used
228// by OperandType (important for floating point types).
229template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
230 unsigned OperandSemantics>
231static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
232 uint64_t /* Addr */,
233 const MCDisassembler *Decoder) {
234 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
235 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
241 unsigned OperandSemantics>
242static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
243 uint64_t /* Addr */,
244 const MCDisassembler *Decoder) {
245 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
246 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
247}
248
249template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
250 unsigned OperandSemantics>
252 uint64_t /* Addr */,
253 const MCDisassembler *Decoder) {
254 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
255 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
256}
257
258// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
259// when RegisterClass is used as an operand. Most often used for destination
260// operands.
261
263DECODE_OPERAND_REG_8(VGPR_32_Lo128)
266DECODE_OPERAND_REG_8(VReg_128)
267DECODE_OPERAND_REG_8(VReg_256)
268DECODE_OPERAND_REG_8(VReg_288)
269DECODE_OPERAND_REG_8(VReg_352)
270DECODE_OPERAND_REG_8(VReg_384)
271DECODE_OPERAND_REG_8(VReg_512)
272DECODE_OPERAND_REG_8(VReg_1024)
273
274DECODE_OPERAND_REG_7(SReg_32, OPW32)
275DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
276DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
277DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
278DECODE_OPERAND_REG_7(SReg_64, OPW64)
279DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
280DECODE_OPERAND_REG_7(SReg_96, OPW96)
281DECODE_OPERAND_REG_7(SReg_128, OPW128)
282DECODE_OPERAND_REG_7(SReg_256, OPW256)
283DECODE_OPERAND_REG_7(SReg_512, OPW512)
284
287DECODE_OPERAND_REG_8(AReg_128)
288DECODE_OPERAND_REG_8(AReg_256)
289DECODE_OPERAND_REG_8(AReg_512)
290DECODE_OPERAND_REG_8(AReg_1024)
291
293 uint64_t /*Addr*/,
294 const MCDisassembler *Decoder) {
295 assert(isUInt<10>(Imm) && "10-bit encoding expected");
296 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
297
298 bool IsHi = Imm & (1 << 9);
299 unsigned RegIdx = Imm & 0xff;
300 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
301 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
302}
303
304static DecodeStatus
306 const MCDisassembler *Decoder) {
307 assert(isUInt<8>(Imm) && "8-bit encoding expected");
308
309 bool IsHi = Imm & (1 << 7);
310 unsigned RegIdx = Imm & 0x7f;
311 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
312 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
313}
314
316 uint64_t /*Addr*/,
317 const MCDisassembler *Decoder) {
318 assert(isUInt<9>(Imm) && "9-bit encoding expected");
319
320 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
321 bool IsVGPR = Imm & (1 << 8);
322 if (IsVGPR) {
323 bool IsHi = Imm & (1 << 7);
324 unsigned RegIdx = Imm & 0x7f;
325 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
326 }
327 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
328 Imm & 0xFF, false, 16));
329}
330
331static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
332 uint64_t /*Addr*/,
333 const MCDisassembler *Decoder) {
334 assert(isUInt<10>(Imm) && "10-bit encoding expected");
335
336 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
337 bool IsVGPR = Imm & (1 << 8);
338 if (IsVGPR) {
339 bool IsHi = Imm & (1 << 9);
340 unsigned RegIdx = Imm & 0xff;
341 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
342 }
343 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
344 Imm & 0xFF, false, 16));
345}
346
347static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
349 const MCDisassembler *Decoder) {
350 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
351 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
352}
353
354static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
355 uint64_t Addr, const void *Decoder) {
356 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
357 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
358}
359
360static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
361 const MCRegisterInfo *MRI) {
362 if (OpIdx < 0)
363 return false;
364
365 const MCOperand &Op = Inst.getOperand(OpIdx);
366 if (!Op.isReg())
367 return false;
368
369 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
370 auto Reg = Sub ? Sub : Op.getReg();
371 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
376 const MCDisassembler *Decoder) {
377 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
378 if (!DAsm->isGFX90A()) {
379 Imm &= 511;
380 } else {
381 // If atomic has both vdata and vdst their register classes are tied.
382 // The bit is decoded along with the vdst, first operand. We need to
383 // change register class to AGPR if vdst was AGPR.
384 // If a DS instruction has both data0 and data1 their register classes
385 // are also tied.
386 unsigned Opc = Inst.getOpcode();
387 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
388 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
389 : AMDGPU::OpName::vdata;
390 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
391 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
392 if ((int)Inst.getNumOperands() == DataIdx) {
393 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
394 if (IsAGPROperand(Inst, DstIdx, MRI))
395 Imm |= 512;
396 }
397
398 if (TSFlags & SIInstrFlags::DS) {
399 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
400 if ((int)Inst.getNumOperands() == Data2Idx &&
401 IsAGPROperand(Inst, DataIdx, MRI))
402 Imm |= 512;
403 }
404 }
405 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
406}
407
408template <AMDGPUDisassembler::OpWidthTy Opw>
409static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
410 uint64_t /* Addr */,
411 const MCDisassembler *Decoder) {
412 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
413}
414
415static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
417 const MCDisassembler *Decoder) {
418 assert(Imm < (1 << 9) && "9-bit encoding");
419 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
420 return addOperand(Inst,
421 DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
423}
424
425#define DECODE_SDWA(DecName) \
426DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
427
428DECODE_SDWA(Src32)
429DECODE_SDWA(Src16)
430DECODE_SDWA(VopcDst)
431
432static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
433 uint64_t /* Addr */,
434 const MCDisassembler *Decoder) {
435 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
436 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
437}
438
439#include "AMDGPUGenDisassemblerTables.inc"
440
441//===----------------------------------------------------------------------===//
442//
443//===----------------------------------------------------------------------===//
444
445template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
446 assert(Bytes.size() >= sizeof(T));
447 const auto Res =
448 support::endian::read<T, llvm::endianness::little>(Bytes.data());
449 Bytes = Bytes.slice(sizeof(T));
450 return Res;
451}
452
454 assert(Bytes.size() >= 12);
455 uint64_t Lo =
456 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
457 Bytes = Bytes.slice(8);
458 uint64_t Hi =
459 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
460 Bytes = Bytes.slice(4);
461 return DecoderUInt128(Lo, Hi);
462}
463
465 ArrayRef<uint8_t> Bytes_,
467 raw_ostream &CS) const {
468 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
469 Bytes = Bytes_.slice(0, MaxInstBytesNum);
470
471 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
472 // there are fewer bytes left). This will be overridden on success.
473 Size = std::min((size_t)4, Bytes_.size());
474
475 do {
476 // ToDo: better to switch encoding length using some bit predicate
477 // but it is unknown yet, so try all we can
478
479 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
480 // encodings
481 if (isGFX11Plus() && Bytes.size() >= 12 ) {
482 DecoderUInt128 DecW = eat12Bytes(Bytes);
483
484 if (isGFX11() &&
485 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
486 DecW, Address, CS))
487 break;
488
489 if (isGFX12() &&
490 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
491 DecW, Address, CS))
492 break;
493
494 if (isGFX12() &&
495 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
496 break;
497 }
498
499 // Reinitialize Bytes
500 Bytes = Bytes_.slice(0, MaxInstBytesNum);
501
502 if (Bytes.size() >= 8) {
503 const uint64_t QW = eatBytes<uint64_t>(Bytes);
504
505 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
506 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
507 break;
508
509 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
510 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
511 break;
512
513 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
514 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
515 // table first so we print the correct name.
516 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
517 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
518 break;
519
520 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
521 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
522 break;
523
524 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
525 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
526 break;
527
528 if ((isVI() || isGFX9()) &&
529 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
530 break;
531
532 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
533 break;
534
535 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
536 break;
537
538 if (isGFX12() &&
539 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
540 Address, CS))
541 break;
542
543 if (isGFX11() &&
544 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
545 Address, CS))
546 break;
547
548 if (isGFX11() &&
549 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
550 break;
551
552 if (isGFX12() &&
553 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
554 break;
555 }
556
557 // Reinitialize Bytes
558 Bytes = Bytes_.slice(0, MaxInstBytesNum);
559
560 // Try decode 32-bit instruction
561 if (Bytes.size() >= 4) {
562 const uint32_t DW = eatBytes<uint32_t>(Bytes);
563
564 if ((isVI() || isGFX9()) &&
565 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
566 break;
567
568 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
569 break;
570
571 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
572 break;
573
574 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
575 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
576 break;
577
578 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
579 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
580 break;
581
582 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
583 break;
584
585 if (isGFX11() &&
586 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
587 Address, CS))
588 break;
589
590 if (isGFX12() &&
591 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
592 Address, CS))
593 break;
594 }
595
597 } while (false);
598
599 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
600 if (isMacDPP(MI))
602
603 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
605 else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
606 AMDGPU::isVOPC64DPP(MI.getOpcode()))
607 convertVOPCDPPInst(MI); // Special VOP3 case
608 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
609 -1)
611 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
612 convertVOP3DPPInst(MI); // Regular VOP3 case
613 }
614
615 if (AMDGPU::isMAC(MI.getOpcode())) {
616 // Insert dummy unused src2_modifiers.
618 AMDGPU::OpName::src2_modifiers);
619 }
620
621 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
622 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
623 // Insert dummy unused src2_modifiers.
625 AMDGPU::OpName::src2_modifiers);
626 }
627
628 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
630 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
631 }
632
633 if (MCII->get(MI.getOpcode()).TSFlags &
635 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
636 AMDGPU::OpName::cpol);
637 if (CPolPos != -1) {
638 unsigned CPol =
639 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
641 if (MI.getNumOperands() <= (unsigned)CPolPos) {
643 AMDGPU::OpName::cpol);
644 } else if (CPol) {
645 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
646 }
647 }
648 }
649
650 if ((MCII->get(MI.getOpcode()).TSFlags &
652 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
653 // GFX90A lost TFE, its place is occupied by ACC.
654 int TFEOpIdx =
655 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
656 if (TFEOpIdx != -1) {
657 auto TFEIter = MI.begin();
658 std::advance(TFEIter, TFEOpIdx);
659 MI.insert(TFEIter, MCOperand::createImm(0));
660 }
661 }
662
663 if (MCII->get(MI.getOpcode()).TSFlags &
665 int SWZOpIdx =
666 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
667 if (SWZOpIdx != -1) {
668 auto SWZIter = MI.begin();
669 std::advance(SWZIter, SWZOpIdx);
670 MI.insert(SWZIter, MCOperand::createImm(0));
671 }
672 }
673
674 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
675 int VAddr0Idx =
676 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
677 int RsrcIdx =
678 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
679 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
680 if (VAddr0Idx >= 0 && NSAArgs > 0) {
681 unsigned NSAWords = (NSAArgs + 3) / 4;
682 if (Bytes.size() < 4 * NSAWords)
684 for (unsigned i = 0; i < NSAArgs; ++i) {
685 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
686 auto VAddrRCID =
687 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
688 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
689 }
690 Bytes = Bytes.slice(4 * NSAWords);
691 }
692
694 }
695
696 if (MCII->get(MI.getOpcode()).TSFlags &
699
700 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
702
703 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
705
706 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
708
709 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
710 AMDGPU::OpName::vdst_in);
711 if (VDstIn_Idx != -1) {
712 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
714 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
715 !MI.getOperand(VDstIn_Idx).isReg() ||
716 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
717 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
718 MI.erase(&MI.getOperand(VDstIn_Idx));
720 MCOperand::createReg(MI.getOperand(Tied).getReg()),
721 AMDGPU::OpName::vdst_in);
722 }
723 }
724
725 int ImmLitIdx =
726 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
727 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
728 if (ImmLitIdx != -1 && !IsSOPK)
729 convertFMAanyK(MI, ImmLitIdx);
730
731 Size = MaxInstBytesNum - Bytes.size();
733}
734
736 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
737 // The MCInst still has these fields even though they are no longer encoded
738 // in the GFX11 instruction.
739 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
740 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
741 }
742}
743
745 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
746 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
747 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
748 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
749 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
750 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
751 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
752 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
753 // The MCInst has this field that is not directly encoded in the
754 // instruction.
755 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
756 }
757}
758
760 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
761 STI.hasFeature(AMDGPU::FeatureGFX10)) {
762 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
763 // VOPC - insert clamp
764 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
765 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
766 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
767 if (SDst != -1) {
768 // VOPC - insert VCC register as sdst
770 AMDGPU::OpName::sdst);
771 } else {
772 // VOP1/2 - insert omod if present in instruction
773 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
774 }
775 }
776}
777
779 unsigned OpSel = 0;
780 unsigned OpSelHi = 0;
781 unsigned NegLo = 0;
782 unsigned NegHi = 0;
783};
784
785// Reconstruct values of VOP3/VOP3P operands such as op_sel.
786// Note that these values do not affect disassembler output,
787// so this is only necessary for consistency with src_modifiers.
789 bool IsVOP3P = false) {
790 VOPModifiers Modifiers;
791 unsigned Opc = MI.getOpcode();
792 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
793 AMDGPU::OpName::src1_modifiers,
794 AMDGPU::OpName::src2_modifiers};
795 for (int J = 0; J < 3; ++J) {
796 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
797 if (OpIdx == -1)
798 continue;
799
800 unsigned Val = MI.getOperand(OpIdx).getImm();
801
802 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
803 if (IsVOP3P) {
804 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
805 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
806 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
807 } else if (J == 0) {
808 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
809 }
810 }
811
812 return Modifiers;
813}
814
815// Instructions decode the op_sel/suffix bits into the src_modifier
816// operands. Copy those bits into the src operands for true16 VGPRs.
818 const unsigned Opc = MI.getOpcode();
819 const MCRegisterClass &ConversionRC =
820 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
821 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
822 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
824 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
826 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
828 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
830 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
831 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
832 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
833 if (OpIdx == -1 || OpModsIdx == -1)
834 continue;
835 MCOperand &Op = MI.getOperand(OpIdx);
836 if (!Op.isReg())
837 continue;
838 if (!ConversionRC.contains(Op.getReg()))
839 continue;
840 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
841 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
842 unsigned ModVal = OpMods.getImm();
843 if (ModVal & OpSelMask) { // isHi
844 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
845 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
846 }
847 }
848}
849
850// MAC opcodes have special old and src2 operands.
851// src2 is tied to dst, while old is not tied (but assumed to be).
853 constexpr int DST_IDX = 0;
854 auto Opcode = MI.getOpcode();
855 const auto &Desc = MCII->get(Opcode);
856 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
857
858 if (OldIdx != -1 && Desc.getOperandConstraint(
859 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
860 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
861 assert(Desc.getOperandConstraint(
862 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
864 (void)DST_IDX;
865 return true;
866 }
867
868 return false;
869}
870
871// Create dummy old operand and insert dummy unused src2_modifiers
873 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
874 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
876 AMDGPU::OpName::src2_modifiers);
877}
878
880 unsigned Opc = MI.getOpcode();
881
882 int VDstInIdx =
883 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
884 if (VDstInIdx != -1)
885 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
886
887 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
888 if (MI.getNumOperands() < DescNumOps &&
889 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
891 auto Mods = collectVOPModifiers(MI);
893 AMDGPU::OpName::op_sel);
894 } else {
895 // Insert dummy unused src modifiers.
896 if (MI.getNumOperands() < DescNumOps &&
897 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
899 AMDGPU::OpName::src0_modifiers);
900
901 if (MI.getNumOperands() < DescNumOps &&
902 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
904 AMDGPU::OpName::src1_modifiers);
905 }
906}
907
910
911 int VDstInIdx =
912 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
913 if (VDstInIdx != -1)
914 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
915
916 unsigned Opc = MI.getOpcode();
917 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
918 if (MI.getNumOperands() < DescNumOps &&
919 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
920 auto Mods = collectVOPModifiers(MI);
922 AMDGPU::OpName::op_sel);
923 }
924}
925
926// Note that before gfx10, the MIMG encoding provided no information about
927// VADDR size. Consequently, decoded instructions always show address as if it
928// has 1 dword, which could be not really so.
930 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
931
932 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
933 AMDGPU::OpName::vdst);
934
935 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
936 AMDGPU::OpName::vdata);
937 int VAddr0Idx =
938 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
939 int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
940 : AMDGPU::OpName::rsrc;
941 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
942 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
943 AMDGPU::OpName::dmask);
944
945 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
946 AMDGPU::OpName::tfe);
947 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
948 AMDGPU::OpName::d16);
949
950 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
951 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
953
954 assert(VDataIdx != -1);
955 if (BaseOpcode->BVH) {
956 // Add A16 operand for intersect_ray instructions
957 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
958 return;
959 }
960
961 bool IsAtomic = (VDstIdx != -1);
962 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
963 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
964 bool IsNSA = false;
965 bool IsPartialNSA = false;
966 unsigned AddrSize = Info->VAddrDwords;
967
968 if (isGFX10Plus()) {
969 unsigned DimIdx =
970 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
971 int A16Idx =
972 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
973 const AMDGPU::MIMGDimInfo *Dim =
974 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
975 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
976
977 AddrSize =
978 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
979
980 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
981 // VIMAGE insts other than BVH never use vaddr4.
982 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
983 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
984 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
985 if (!IsNSA) {
986 if (!IsVSample && AddrSize > 12)
987 AddrSize = 16;
988 } else {
989 if (AddrSize > Info->VAddrDwords) {
990 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
991 // The NSA encoding does not contain enough operands for the
992 // combination of base opcode / dimension. Should this be an error?
993 return;
994 }
995 IsPartialNSA = true;
996 }
997 }
998 }
999
1000 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1001 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1002
1003 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1004 if (D16 && AMDGPU::hasPackedD16(STI)) {
1005 DstSize = (DstSize + 1) / 2;
1006 }
1007
1008 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1009 DstSize += 1;
1010
1011 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1012 return;
1013
1014 int NewOpcode =
1015 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1016 if (NewOpcode == -1)
1017 return;
1018
1019 // Widen the register to the correct number of enabled channels.
1020 unsigned NewVdata = AMDGPU::NoRegister;
1021 if (DstSize != Info->VDataDwords) {
1022 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1023
1024 // Get first subregister of VData
1025 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1026 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1027 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1028
1029 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1030 &MRI.getRegClass(DataRCID));
1031 if (NewVdata == AMDGPU::NoRegister) {
1032 // It's possible to encode this such that the low register + enabled
1033 // components exceeds the register count.
1034 return;
1035 }
1036 }
1037
1038 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1039 // If using partial NSA on GFX11+ widen last address register.
1040 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1041 unsigned NewVAddrSA = AMDGPU::NoRegister;
1042 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1043 AddrSize != Info->VAddrDwords) {
1044 unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1045 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1046 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1047
1048 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1049 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1050 &MRI.getRegClass(AddrRCID));
1051 if (!NewVAddrSA)
1052 return;
1053 }
1054
1055 MI.setOpcode(NewOpcode);
1056
1057 if (NewVdata != AMDGPU::NoRegister) {
1058 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1059
1060 if (IsAtomic) {
1061 // Atomic operations have an additional operand (a copy of data)
1062 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1063 }
1064 }
1065
1066 if (NewVAddrSA) {
1067 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1068 } else if (IsNSA) {
1069 assert(AddrSize <= Info->VAddrDwords);
1070 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1071 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1072 }
1073}
1074
1075// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1076// decoder only adds to src_modifiers, so manually add the bits to the other
1077// operands.
1079 unsigned Opc = MI.getOpcode();
1080 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1081 auto Mods = collectVOPModifiers(MI, true);
1082
1083 if (MI.getNumOperands() < DescNumOps &&
1084 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1085 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1086
1087 if (MI.getNumOperands() < DescNumOps &&
1088 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1090 AMDGPU::OpName::op_sel);
1091 if (MI.getNumOperands() < DescNumOps &&
1092 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1094 AMDGPU::OpName::op_sel_hi);
1095 if (MI.getNumOperands() < DescNumOps &&
1096 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1098 AMDGPU::OpName::neg_lo);
1099 if (MI.getNumOperands() < DescNumOps &&
1100 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1102 AMDGPU::OpName::neg_hi);
1103}
1104
1105// Create dummy old operand and insert optional operands
1107 unsigned Opc = MI.getOpcode();
1108 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1109
1110 if (MI.getNumOperands() < DescNumOps &&
1111 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1112 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1113
1114 if (MI.getNumOperands() < DescNumOps &&
1115 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1117 AMDGPU::OpName::src0_modifiers);
1118
1119 if (MI.getNumOperands() < DescNumOps &&
1120 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1122 AMDGPU::OpName::src1_modifiers);
1123}
1124
1125void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1126 assert(HasLiteral && "Should have decoded a literal");
1127 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1128 unsigned DescNumOps = Desc.getNumOperands();
1130 AMDGPU::OpName::immDeferred);
1131 assert(DescNumOps == MI.getNumOperands());
1132 for (unsigned I = 0; I < DescNumOps; ++I) {
1133 auto &Op = MI.getOperand(I);
1134 auto OpType = Desc.operands()[I].OperandType;
1135 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1137 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1138 IsDeferredOp)
1139 Op.setImm(Literal);
1140 }
1141}
1142
1143const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1144 return getContext().getRegisterInfo()->
1145 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1146}
1147
1148inline
1150 const Twine& ErrMsg) const {
1151 *CommentStream << "Error: " + ErrMsg;
1152
1153 // ToDo: add support for error operands to MCInst.h
1154 // return MCOperand::createError(V);
1155 return MCOperand();
1156}
1157
1158inline
1161}
1162
1163inline
1165 unsigned Val) const {
1166 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1167 if (Val >= RegCl.getNumRegs())
1168 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1169 ": unknown register " + Twine(Val));
1170 return createRegOperand(RegCl.getRegister(Val));
1171}
1172
1173inline
1175 unsigned Val) const {
1176 // ToDo: SI/CI have 104 SGPRs, VI - 102
1177 // Valery: here we accepting as much as we can, let assembler sort it out
1178 int shift = 0;
1179 switch (SRegClassID) {
1180 case AMDGPU::SGPR_32RegClassID:
1181 case AMDGPU::TTMP_32RegClassID:
1182 break;
1183 case AMDGPU::SGPR_64RegClassID:
1184 case AMDGPU::TTMP_64RegClassID:
1185 shift = 1;
1186 break;
1187 case AMDGPU::SGPR_96RegClassID:
1188 case AMDGPU::TTMP_96RegClassID:
1189 case AMDGPU::SGPR_128RegClassID:
1190 case AMDGPU::TTMP_128RegClassID:
1191 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1192 // this bundle?
1193 case AMDGPU::SGPR_256RegClassID:
1194 case AMDGPU::TTMP_256RegClassID:
1195 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1196 // this bundle?
1197 case AMDGPU::SGPR_288RegClassID:
1198 case AMDGPU::TTMP_288RegClassID:
1199 case AMDGPU::SGPR_320RegClassID:
1200 case AMDGPU::TTMP_320RegClassID:
1201 case AMDGPU::SGPR_352RegClassID:
1202 case AMDGPU::TTMP_352RegClassID:
1203 case AMDGPU::SGPR_384RegClassID:
1204 case AMDGPU::TTMP_384RegClassID:
1205 case AMDGPU::SGPR_512RegClassID:
1206 case AMDGPU::TTMP_512RegClassID:
1207 shift = 2;
1208 break;
1209 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1210 // this bundle?
1211 default:
1212 llvm_unreachable("unhandled register class");
1213 }
1214
1215 if (Val % (1 << shift)) {
1216 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1217 << ": scalar reg isn't aligned " << Val;
1218 }
1219
1220 return createRegOperand(SRegClassID, Val >> shift);
1221}
1222
1224 bool IsHi) const {
1225 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1226 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1227}
1228
1229// Decode Literals for insts which always have a literal in the encoding
1232 if (HasLiteral) {
1233 assert(
1235 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1236 if (Literal != Val)
1237 return errOperand(Val, "More than one unique literal is illegal");
1238 }
1239 HasLiteral = true;
1240 Literal = Val;
1241 return MCOperand::createImm(Literal);
1242}
1243
1245 // For now all literal constants are supposed to be unsigned integer
1246 // ToDo: deal with signed/unsigned 64-bit integer constants
1247 // ToDo: deal with float/double constants
1248 if (!HasLiteral) {
1249 if (Bytes.size() < 4) {
1250 return errOperand(0, "cannot read literal, inst bytes left " +
1251 Twine(Bytes.size()));
1252 }
1253 HasLiteral = true;
1254 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1255 if (ExtendFP64)
1256 Literal64 <<= 32;
1257 }
1258 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1259}
1260
1262 using namespace AMDGPU::EncValues;
1263
1264 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1265 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1266 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1267 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1268 // Cast prevents negative overflow.
1269}
1270
1271static int64_t getInlineImmVal32(unsigned Imm) {
1272 switch (Imm) {
1273 case 240:
1274 return llvm::bit_cast<uint32_t>(0.5f);
1275 case 241:
1276 return llvm::bit_cast<uint32_t>(-0.5f);
1277 case 242:
1278 return llvm::bit_cast<uint32_t>(1.0f);
1279 case 243:
1280 return llvm::bit_cast<uint32_t>(-1.0f);
1281 case 244:
1282 return llvm::bit_cast<uint32_t>(2.0f);
1283 case 245:
1284 return llvm::bit_cast<uint32_t>(-2.0f);
1285 case 246:
1286 return llvm::bit_cast<uint32_t>(4.0f);
1287 case 247:
1288 return llvm::bit_cast<uint32_t>(-4.0f);
1289 case 248: // 1 / (2 * PI)
1290 return 0x3e22f983;
1291 default:
1292 llvm_unreachable("invalid fp inline imm");
1293 }
1294}
1295
1296static int64_t getInlineImmVal64(unsigned Imm) {
1297 switch (Imm) {
1298 case 240:
1299 return llvm::bit_cast<uint64_t>(0.5);
1300 case 241:
1301 return llvm::bit_cast<uint64_t>(-0.5);
1302 case 242:
1303 return llvm::bit_cast<uint64_t>(1.0);
1304 case 243:
1305 return llvm::bit_cast<uint64_t>(-1.0);
1306 case 244:
1307 return llvm::bit_cast<uint64_t>(2.0);
1308 case 245:
1309 return llvm::bit_cast<uint64_t>(-2.0);
1310 case 246:
1311 return llvm::bit_cast<uint64_t>(4.0);
1312 case 247:
1313 return llvm::bit_cast<uint64_t>(-4.0);
1314 case 248: // 1 / (2 * PI)
1315 return 0x3fc45f306dc9c882;
1316 default:
1317 llvm_unreachable("invalid fp inline imm");
1318 }
1319}
1320
1321static int64_t getInlineImmValF16(unsigned Imm) {
1322 switch (Imm) {
1323 case 240:
1324 return 0x3800;
1325 case 241:
1326 return 0xB800;
1327 case 242:
1328 return 0x3C00;
1329 case 243:
1330 return 0xBC00;
1331 case 244:
1332 return 0x4000;
1333 case 245:
1334 return 0xC000;
1335 case 246:
1336 return 0x4400;
1337 case 247:
1338 return 0xC400;
1339 case 248: // 1 / (2 * PI)
1340 return 0x3118;
1341 default:
1342 llvm_unreachable("invalid fp inline imm");
1343 }
1344}
1345
1346static int64_t getInlineImmValBF16(unsigned Imm) {
1347 switch (Imm) {
1348 case 240:
1349 return 0x3F00;
1350 case 241:
1351 return 0xBF00;
1352 case 242:
1353 return 0x3F80;
1354 case 243:
1355 return 0xBF80;
1356 case 244:
1357 return 0x4000;
1358 case 245:
1359 return 0xC000;
1360 case 246:
1361 return 0x4080;
1362 case 247:
1363 return 0xC080;
1364 case 248: // 1 / (2 * PI)
1365 return 0x3E22;
1366 default:
1367 llvm_unreachable("invalid fp inline imm");
1368 }
1369}
1370
1371static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1373 : getInlineImmValF16(Imm);
1374}
1375
1376MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1380
1381 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1382 // ImmWidth 0 is a default case where operand should not allow immediates.
1383 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1384 // use it to print verbose error message.
1385 switch (ImmWidth) {
1386 case 0:
1387 case 32:
1389 case 64:
1391 case 16:
1392 return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1393 default:
1394 llvm_unreachable("implement me");
1395 }
1396}
1397
1399 using namespace AMDGPU;
1400
1401 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1402 switch (Width) {
1403 default: // fall
1404 case OPW32:
1405 case OPW16:
1406 case OPWV216:
1407 return VGPR_32RegClassID;
1408 case OPW64:
1409 case OPWV232: return VReg_64RegClassID;
1410 case OPW96: return VReg_96RegClassID;
1411 case OPW128: return VReg_128RegClassID;
1412 case OPW160: return VReg_160RegClassID;
1413 case OPW256: return VReg_256RegClassID;
1414 case OPW288: return VReg_288RegClassID;
1415 case OPW320: return VReg_320RegClassID;
1416 case OPW352: return VReg_352RegClassID;
1417 case OPW384: return VReg_384RegClassID;
1418 case OPW512: return VReg_512RegClassID;
1419 case OPW1024: return VReg_1024RegClassID;
1420 }
1421}
1422
1424 using namespace AMDGPU;
1425
1426 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1427 switch (Width) {
1428 default: // fall
1429 case OPW32:
1430 case OPW16:
1431 case OPWV216:
1432 return AGPR_32RegClassID;
1433 case OPW64:
1434 case OPWV232: return AReg_64RegClassID;
1435 case OPW96: return AReg_96RegClassID;
1436 case OPW128: return AReg_128RegClassID;
1437 case OPW160: return AReg_160RegClassID;
1438 case OPW256: return AReg_256RegClassID;
1439 case OPW288: return AReg_288RegClassID;
1440 case OPW320: return AReg_320RegClassID;
1441 case OPW352: return AReg_352RegClassID;
1442 case OPW384: return AReg_384RegClassID;
1443 case OPW512: return AReg_512RegClassID;
1444 case OPW1024: return AReg_1024RegClassID;
1445 }
1446}
1447
1448
1450 using namespace AMDGPU;
1451
1452 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1453 switch (Width) {
1454 default: // fall
1455 case OPW32:
1456 case OPW16:
1457 case OPWV216:
1458 return SGPR_32RegClassID;
1459 case OPW64:
1460 case OPWV232: return SGPR_64RegClassID;
1461 case OPW96: return SGPR_96RegClassID;
1462 case OPW128: return SGPR_128RegClassID;
1463 case OPW160: return SGPR_160RegClassID;
1464 case OPW256: return SGPR_256RegClassID;
1465 case OPW288: return SGPR_288RegClassID;
1466 case OPW320: return SGPR_320RegClassID;
1467 case OPW352: return SGPR_352RegClassID;
1468 case OPW384: return SGPR_384RegClassID;
1469 case OPW512: return SGPR_512RegClassID;
1470 }
1471}
1472
1474 using namespace AMDGPU;
1475
1476 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1477 switch (Width) {
1478 default: // fall
1479 case OPW32:
1480 case OPW16:
1481 case OPWV216:
1482 return TTMP_32RegClassID;
1483 case OPW64:
1484 case OPWV232: return TTMP_64RegClassID;
1485 case OPW128: return TTMP_128RegClassID;
1486 case OPW256: return TTMP_256RegClassID;
1487 case OPW288: return TTMP_288RegClassID;
1488 case OPW320: return TTMP_320RegClassID;
1489 case OPW352: return TTMP_352RegClassID;
1490 case OPW384: return TTMP_384RegClassID;
1491 case OPW512: return TTMP_512RegClassID;
1492 }
1493}
1494
1495int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1496 using namespace AMDGPU::EncValues;
1497
1498 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1499 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1500
1501 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1502}
1503
1505 bool MandatoryLiteral,
1506 unsigned ImmWidth,
1507 AMDGPU::OperandSemantics Sema) const {
1508 using namespace AMDGPU::EncValues;
1509
1510 assert(Val < 1024); // enum10
1511
1512 bool IsAGPR = Val & 512;
1513 Val &= 511;
1514
1515 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1516 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1517 : getVgprClassId(Width), Val - VGPR_MIN);
1518 }
1519 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1520 Sema);
1521}
1522
1525 bool MandatoryLiteral, unsigned ImmWidth,
1526 AMDGPU::OperandSemantics Sema) const {
1527 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1528 // decoded earlier.
1529 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1530 using namespace AMDGPU::EncValues;
1531
1532 if (Val <= SGPR_MAX) {
1533 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1534 static_assert(SGPR_MIN == 0);
1535 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1536 }
1537
1538 int TTmpIdx = getTTmpIdx(Val);
1539 if (TTmpIdx >= 0) {
1540 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1541 }
1542
1543 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1544 return decodeIntImmed(Val);
1545
1546 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1547 return decodeFPImmed(ImmWidth, Val, Sema);
1548
1549 if (Val == LITERAL_CONST) {
1550 if (MandatoryLiteral)
1551 // Keep a sentinel value for deferred setting
1552 return MCOperand::createImm(LITERAL_CONST);
1553 else
1555 }
1556
1557 switch (Width) {
1558 case OPW32:
1559 case OPW16:
1560 case OPWV216:
1561 return decodeSpecialReg32(Val);
1562 case OPW64:
1563 case OPWV232:
1564 return decodeSpecialReg64(Val);
1565 default:
1566 llvm_unreachable("unexpected immediate type");
1567 }
1568}
1569
1570// Bit 0 of DstY isn't stored in the instruction, because it's always the
1571// opposite of bit 0 of DstX.
1573 unsigned Val) const {
1574 int VDstXInd =
1575 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1576 assert(VDstXInd != -1);
1577 assert(Inst.getOperand(VDstXInd).isReg());
1578 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1579 Val |= ~XDstReg & 1;
1581 return createRegOperand(getVgprClassId(Width), Val);
1582}
1583
1585 using namespace AMDGPU;
1586
1587 switch (Val) {
1588 // clang-format off
1589 case 102: return createRegOperand(FLAT_SCR_LO);
1590 case 103: return createRegOperand(FLAT_SCR_HI);
1591 case 104: return createRegOperand(XNACK_MASK_LO);
1592 case 105: return createRegOperand(XNACK_MASK_HI);
1593 case 106: return createRegOperand(VCC_LO);
1594 case 107: return createRegOperand(VCC_HI);
1595 case 108: return createRegOperand(TBA_LO);
1596 case 109: return createRegOperand(TBA_HI);
1597 case 110: return createRegOperand(TMA_LO);
1598 case 111: return createRegOperand(TMA_HI);
1599 case 124:
1600 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1601 case 125:
1602 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1603 case 126: return createRegOperand(EXEC_LO);
1604 case 127: return createRegOperand(EXEC_HI);
1605 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1606 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1607 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1608 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1609 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1610 case 251: return createRegOperand(SRC_VCCZ);
1611 case 252: return createRegOperand(SRC_EXECZ);
1612 case 253: return createRegOperand(SRC_SCC);
1613 case 254: return createRegOperand(LDS_DIRECT);
1614 default: break;
1615 // clang-format on
1616 }
1617 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1618}
1619
1621 using namespace AMDGPU;
1622
1623 switch (Val) {
1624 case 102: return createRegOperand(FLAT_SCR);
1625 case 104: return createRegOperand(XNACK_MASK);
1626 case 106: return createRegOperand(VCC);
1627 case 108: return createRegOperand(TBA);
1628 case 110: return createRegOperand(TMA);
1629 case 124:
1630 if (isGFX11Plus())
1631 return createRegOperand(SGPR_NULL);
1632 break;
1633 case 125:
1634 if (!isGFX11Plus())
1635 return createRegOperand(SGPR_NULL);
1636 break;
1637 case 126: return createRegOperand(EXEC);
1638 case 235: return createRegOperand(SRC_SHARED_BASE);
1639 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1640 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1641 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1642 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1643 case 251: return createRegOperand(SRC_VCCZ);
1644 case 252: return createRegOperand(SRC_EXECZ);
1645 case 253: return createRegOperand(SRC_SCC);
1646 default: break;
1647 }
1648 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1649}
1650
1652AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1653 unsigned ImmWidth,
1654 AMDGPU::OperandSemantics Sema) const {
1655 using namespace AMDGPU::SDWA;
1656 using namespace AMDGPU::EncValues;
1657
1658 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1659 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1660 // XXX: cast to int is needed to avoid stupid warning:
1661 // compare with unsigned is always true
1662 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1663 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1664 return createRegOperand(getVgprClassId(Width),
1665 Val - SDWA9EncValues::SRC_VGPR_MIN);
1666 }
1667 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1668 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1669 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1670 return createSRegOperand(getSgprClassId(Width),
1671 Val - SDWA9EncValues::SRC_SGPR_MIN);
1672 }
1673 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1674 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1675 return createSRegOperand(getTtmpClassId(Width),
1676 Val - SDWA9EncValues::SRC_TTMP_MIN);
1677 }
1678
1679 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1680
1681 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1682 return decodeIntImmed(SVal);
1683
1684 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1685 return decodeFPImmed(ImmWidth, SVal, Sema);
1686
1687 return decodeSpecialReg32(SVal);
1688 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1689 return createRegOperand(getVgprClassId(Width), Val);
1690 }
1691 llvm_unreachable("unsupported target");
1692}
1693
1696}
1697
1700}
1701
1703 using namespace AMDGPU::SDWA;
1704
1705 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1706 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1707 "SDWAVopcDst should be present only on GFX9+");
1708
1709 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1710
1711 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1712 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1713
1714 int TTmpIdx = getTTmpIdx(Val);
1715 if (TTmpIdx >= 0) {
1716 auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1717 return createSRegOperand(TTmpClsId, TTmpIdx);
1718 } else if (Val > SGPR_MAX) {
1719 return IsWave64 ? decodeSpecialReg64(Val)
1720 : decodeSpecialReg32(Val);
1721 } else {
1722 return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1723 }
1724 } else {
1725 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1726 }
1727}
1728
1730 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1731 ? decodeSrcOp(OPW64, Val)
1732 : decodeSrcOp(OPW32, Val);
1733}
1734
1736 return decodeSrcOp(OPW32, Val);
1737}
1738
1741 return MCOperand();
1742 return MCOperand::createImm(Val);
1743}
1744
1746 using VersionField = AMDGPU::EncodingField<7, 0>;
1747 using W64Bit = AMDGPU::EncodingBit<13>;
1748 using W32Bit = AMDGPU::EncodingBit<14>;
1749 using MDPBit = AMDGPU::EncodingBit<15>;
1751
1752 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1753
1754 // Decode into a plain immediate if any unused bits are raised.
1755 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1756 return MCOperand::createImm(Imm);
1757
1758 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1759 auto I = find_if(Versions,
1761 return V.Code == Version;
1762 });
1763 MCContext &Ctx = getContext();
1764 const MCExpr *E;
1765 if (I == Versions.end())
1767 else
1768 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
1769
1770 if (W64)
1771 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
1772 if (W32)
1773 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
1774 if (MDP)
1775 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
1776
1777 return MCOperand::createExpr(E);
1778}
1779
1781 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1782}
1783
1785
1787 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1788}
1789
1791
1793
1795 return AMDGPU::isGFX10Plus(STI);
1796}
1797
1799 return STI.hasFeature(AMDGPU::FeatureGFX11);
1800}
1801
1803 return AMDGPU::isGFX11Plus(STI);
1804}
1805
1807 return STI.hasFeature(AMDGPU::FeatureGFX12);
1808}
1809
1811 return AMDGPU::isGFX12Plus(STI);
1812}
1813
1815 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1816}
1817
1820}
1821
1822//===----------------------------------------------------------------------===//
1823// AMDGPU specific symbol handling
1824//===----------------------------------------------------------------------===//
1825
1826/// Print a string describing the reserved bit range specified by Mask with
1827/// offset BaseBytes for use in error comments. Mask is a single continuous
1828/// range of 1s surrounded by zeros. The format here is meant to align with the
1829/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1830static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1831 SmallString<32> Result;
1832 raw_svector_ostream S(Result);
1833
1834 int TrailingZeros = llvm::countr_zero(Mask);
1835 int PopCount = llvm::popcount(Mask);
1836
1837 if (PopCount == 1) {
1838 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1839 } else {
1840 S << "bits in range ("
1841 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1842 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1843 }
1844
1845 return Result;
1846}
1847
1848#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1849#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1850 do { \
1851 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1852 } while (0)
1853#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1854 do { \
1855 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1856 << GET_FIELD(MASK) << '\n'; \
1857 } while (0)
1858
1859#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1860 do { \
1861 if (FourByteBuffer & (MASK)) { \
1862 return createStringError(std::errc::invalid_argument, \
1863 "kernel descriptor " DESC \
1864 " reserved %s set" MSG, \
1865 getBitRangeFromMask((MASK), 0).c_str()); \
1866 } \
1867 } while (0)
1868
1869#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1870#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1871 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1872#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1873 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1874#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1875 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1876
1877// NOLINTNEXTLINE(readability-identifier-naming)
1879 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1880 using namespace amdhsa;
1881 StringRef Indent = "\t";
1882
1883 // We cannot accurately backward compute #VGPRs used from
1884 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1885 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1886 // simply calculate the inverse of what the assembler does.
1887
1888 uint32_t GranulatedWorkitemVGPRCount =
1889 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1890
1891 uint32_t NextFreeVGPR =
1892 (GranulatedWorkitemVGPRCount + 1) *
1893 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1894
1895 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1896
1897 // We cannot backward compute values used to calculate
1898 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1899 // directives can't be computed:
1900 // .amdhsa_reserve_vcc
1901 // .amdhsa_reserve_flat_scratch
1902 // .amdhsa_reserve_xnack_mask
1903 // They take their respective default values if not specified in the assembly.
1904 //
1905 // GRANULATED_WAVEFRONT_SGPR_COUNT
1906 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1907 //
1908 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1909 // are set to 0. So while disassembling we consider that:
1910 //
1911 // GRANULATED_WAVEFRONT_SGPR_COUNT
1912 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1913 //
1914 // The disassembler cannot recover the original values of those 3 directives.
1915
1916 uint32_t GranulatedWavefrontSGPRCount =
1917 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1918
1919 if (isGFX10Plus())
1920 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1921 "must be zero on gfx10+");
1922
1923 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1925
1926 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1928 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1929 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1930 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1931
1932 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1933
1934 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1935 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1936 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1937 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1938 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1939 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1940 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1941 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1942
1943 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1944
1945 if (!isGFX12Plus())
1946 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1947 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1948
1949 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1950
1951 if (!isGFX12Plus())
1952 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1953 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1954
1955 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1956 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1957
1958 if (isGFX9Plus())
1959 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1960
1961 if (!isGFX9Plus())
1962 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1963 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1964
1965 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1966
1967 if (!isGFX10Plus())
1968 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1969 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1970
1971 if (isGFX10Plus()) {
1972 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1973 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1974 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1975 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1976 }
1977
1978 if (isGFX12Plus())
1979 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1980 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1981
1982 return true;
1983}
1984
1985// NOLINTNEXTLINE(readability-identifier-naming)
1987 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1988 using namespace amdhsa;
1989 StringRef Indent = "\t";
1991 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1992 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1993 else
1994 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1995 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1996 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1997 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1998 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1999 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2000 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2001 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2002 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2003 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2004 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2005 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2006
2007 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2008 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2009 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2010
2012 ".amdhsa_exception_fp_ieee_invalid_op",
2013 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2014 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2015 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2017 ".amdhsa_exception_fp_ieee_div_zero",
2018 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2019 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2020 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2021 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2022 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2023 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2024 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2025 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2026 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2027
2028 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2029
2030 return true;
2031}
2032
2033// NOLINTNEXTLINE(readability-identifier-naming)
2035 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2036 using namespace amdhsa;
2037 StringRef Indent = "\t";
2038 if (isGFX90A()) {
2039 KdStream << Indent << ".amdhsa_accum_offset "
2040 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2041 << '\n';
2042
2043 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2044
2045 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2046 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2047 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2048 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2049 } else if (isGFX10Plus()) {
2050 // Bits [0-3].
2051 if (!isGFX12Plus()) {
2052 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2053 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2054 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2055 } else {
2057 "SHARED_VGPR_COUNT",
2058 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2059 }
2060 } else {
2061 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2062 "COMPUTE_PGM_RSRC3",
2063 "must be zero on gfx12+");
2064 }
2065
2066 // Bits [4-11].
2067 if (isGFX11()) {
2068 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2069 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2070 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2071 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2072 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2073 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2074 } else if (isGFX12Plus()) {
2076 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2077 } else {
2078 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2079 "COMPUTE_PGM_RSRC3",
2080 "must be zero on gfx10");
2081 }
2082
2083 // Bits [12].
2084 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2085 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2086
2087 // Bits [13].
2088 if (isGFX12Plus()) {
2090 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2091 } else {
2092 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2093 "COMPUTE_PGM_RSRC3",
2094 "must be zero on gfx10 or gfx11");
2095 }
2096
2097 // Bits [14-30].
2098 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2099 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2100
2101 // Bits [31].
2102 if (isGFX11Plus()) {
2104 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2105 } else {
2106 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2107 "COMPUTE_PGM_RSRC3",
2108 "must be zero on gfx10");
2109 }
2110 } else if (FourByteBuffer) {
2111 return createStringError(
2112 std::errc::invalid_argument,
2113 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2114 }
2115 return true;
2116}
2117#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2118#undef PRINT_DIRECTIVE
2119#undef GET_FIELD
2120#undef CHECK_RESERVED_BITS_IMPL
2121#undef CHECK_RESERVED_BITS
2122#undef CHECK_RESERVED_BITS_MSG
2123#undef CHECK_RESERVED_BITS_DESC
2124#undef CHECK_RESERVED_BITS_DESC_MSG
2125
2126/// Create an error object to return from onSymbolStart for reserved kernel
2127/// descriptor bits being set.
2128static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2129 const char *Msg = "") {
2130 return createStringError(
2131 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2132 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2133}
2134
2135/// Create an error object to return from onSymbolStart for reserved kernel
2136/// descriptor bytes being set.
2137static Error createReservedKDBytesError(unsigned BaseInBytes,
2138 unsigned WidthInBytes) {
2139 // Create an error comment in the same format as the "Kernel Descriptor"
2140 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2141 return createStringError(
2142 std::errc::invalid_argument,
2143 "kernel descriptor reserved bits in range (%u:%u) set",
2144 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2145}
2146
2149 raw_string_ostream &KdStream) const {
2150#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2151 do { \
2152 KdStream << Indent << DIRECTIVE " " \
2153 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2154 } while (0)
2155
2156 uint16_t TwoByteBuffer = 0;
2157 uint32_t FourByteBuffer = 0;
2158
2159 StringRef ReservedBytes;
2160 StringRef Indent = "\t";
2161
2162 assert(Bytes.size() == 64);
2163 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2164
2165 switch (Cursor.tell()) {
2167 FourByteBuffer = DE.getU32(Cursor);
2168 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2169 << '\n';
2170 return true;
2171
2173 FourByteBuffer = DE.getU32(Cursor);
2174 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2175 << FourByteBuffer << '\n';
2176 return true;
2177
2179 FourByteBuffer = DE.getU32(Cursor);
2180 KdStream << Indent << ".amdhsa_kernarg_size "
2181 << FourByteBuffer << '\n';
2182 return true;
2183
2185 // 4 reserved bytes, must be 0.
2186 ReservedBytes = DE.getBytes(Cursor, 4);
2187 for (int I = 0; I < 4; ++I) {
2188 if (ReservedBytes[I] != 0)
2190 }
2191 return true;
2192
2194 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2195 // So far no directive controls this for Code Object V3, so simply skip for
2196 // disassembly.
2197 DE.skip(Cursor, 8);
2198 return true;
2199
2201 // 20 reserved bytes, must be 0.
2202 ReservedBytes = DE.getBytes(Cursor, 20);
2203 for (int I = 0; I < 20; ++I) {
2204 if (ReservedBytes[I] != 0)
2206 }
2207 return true;
2208
2210 FourByteBuffer = DE.getU32(Cursor);
2211 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2212
2214 FourByteBuffer = DE.getU32(Cursor);
2215 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2216
2218 FourByteBuffer = DE.getU32(Cursor);
2219 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2220
2222 using namespace amdhsa;
2223 TwoByteBuffer = DE.getU16(Cursor);
2224
2226 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2227 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2228 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2229 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2230 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2231 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2232 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2233 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2234 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2235 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2237 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2238 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2239 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2240 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2241
2242 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2243 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2245
2246 // Reserved for GFX9
2247 if (isGFX9() &&
2248 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2250 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2251 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2252 } else if (isGFX10Plus()) {
2253 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2254 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2255 }
2256
2257 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2258 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2259 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2260
2261 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2262 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2264 }
2265
2266 return true;
2267
2269 using namespace amdhsa;
2270 TwoByteBuffer = DE.getU16(Cursor);
2271 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2272 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2273 KERNARG_PRELOAD_SPEC_LENGTH);
2274 }
2275
2276 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2277 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2278 KERNARG_PRELOAD_SPEC_OFFSET);
2279 }
2280 return true;
2281
2283 // 4 bytes from here are reserved, must be 0.
2284 ReservedBytes = DE.getBytes(Cursor, 4);
2285 for (int I = 0; I < 4; ++I) {
2286 if (ReservedBytes[I] != 0)
2288 }
2289 return true;
2290
2291 default:
2292 llvm_unreachable("Unhandled index. Case statements cover everything.");
2293 return true;
2294 }
2295#undef PRINT_DIRECTIVE
2296}
2297
2299 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2300
2301 // CP microcode requires the kernel descriptor to be 64 aligned.
2302 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2303 return createStringError(std::errc::invalid_argument,
2304 "kernel descriptor must be 64-byte aligned");
2305
2306 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2307 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2308 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2309 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2310 // when required.
2311 if (isGFX10Plus()) {
2312 uint16_t KernelCodeProperties =
2315 EnableWavefrontSize32 =
2316 AMDHSA_BITS_GET(KernelCodeProperties,
2317 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2318 }
2319
2320 std::string Kd;
2321 raw_string_ostream KdStream(Kd);
2322 KdStream << ".amdhsa_kernel " << KdName << '\n';
2323
2325 while (C && C.tell() < Bytes.size()) {
2326 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2327
2328 cantFail(C.takeError());
2329
2330 if (!Res)
2331 return Res;
2332 }
2333 KdStream << ".end_amdhsa_kernel\n";
2334 outs() << KdStream.str();
2335 return true;
2336}
2337
2339 uint64_t &Size,
2340 ArrayRef<uint8_t> Bytes,
2341 uint64_t Address) const {
2342 // Right now only kernel descriptor needs to be handled.
2343 // We ignore all other symbols for target specific handling.
2344 // TODO:
2345 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2346 // Object V2 and V3 when symbols are marked protected.
2347
2348 // amd_kernel_code_t for Code Object V2.
2349 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2350 Size = 256;
2351 return createStringError(std::errc::invalid_argument,
2352 "code object v2 is not supported");
2353 }
2354
2355 // Code Object V3 kernel descriptors.
2356 StringRef Name = Symbol.Name;
2357 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2358 Size = 64; // Size = 64 regardless of success or failure.
2359 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2360 }
2361
2362 return false;
2363}
2364
2365const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2366 int64_t Val) {
2367 MCContext &Ctx = getContext();
2368 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2369 assert(!Sym->isVariable());
2370 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2371 return MCSymbolRefExpr::create(Sym, Ctx);
2372}
2373
2374//===----------------------------------------------------------------------===//
2375// AMDGPUSymbolizer
2376//===----------------------------------------------------------------------===//
2377
2378// Try to find symbol name for specified label
2380 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2381 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2382 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2383
2384 if (!IsBranch) {
2385 return false;
2386 }
2387
2388 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2389 if (!Symbols)
2390 return false;
2391
2392 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2393 return Val.Addr == static_cast<uint64_t>(Value) &&
2394 Val.Type == ELF::STT_NOTYPE;
2395 });
2396 if (Result != Symbols->end()) {
2397 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2398 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2400 return true;
2401 }
2402 // Add to list of referenced addresses, so caller can synthesize a label.
2403 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2404 return false;
2405}
2406
2408 int64_t Value,
2409 uint64_t Address) {
2410 llvm_unreachable("unimplemented");
2411}
2412
2413//===----------------------------------------------------------------------===//
2414// Initialization
2415//===----------------------------------------------------------------------===//
2416
2418 LLVMOpInfoCallback /*GetOpInfo*/,
2419 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2420 void *DisInfo,
2421 MCContext *Ctx,
2422 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2423 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2424}
2425
2427 const MCSubtargetInfo &STI,
2428 MCContext &Ctx) {
2429 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2430}
2431
2437}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:601
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:411
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:212
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isValid() const
Definition: MCInst.h:60
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:679
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1326
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1340
@ STT_OBJECT
Definition: ELF.h:1327
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:402
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1286
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:756
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.