LLVM 20.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCExpr.h"
32#include "llvm/MC/MCInstrDesc.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "amdgpu-disassembler"
41
42#define SGPR_MAX \
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
45
47
49 MCContext &Ctx) {
50 if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
51 !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
52 MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
53 // If there is no default wave size it must be a generation before gfx10,
54 // these have FeatureWavefrontSize64 in their definition already. For gfx10+
55 // set wave32 as a default.
56 STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
57 return STICopy;
58 }
59
60 return STI;
61}
62
64 MCContext &Ctx, MCInstrInfo const *MCII)
65 : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
66 MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
67 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
68 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
69 // ToDo: AMDGPUDisassembler supports only VI ISA.
70 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
71 report_fatal_error("Disassembly not yet supported for subtarget");
72
73 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
74 createConstantSymbolExpr(Symbol, Code);
75
76 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
77 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
78 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
79}
80
83}
84
86addOperand(MCInst &Inst, const MCOperand& Opnd) {
87 Inst.addOperand(Opnd);
88 return Opnd.isValid() ?
91}
92
94 uint16_t NameIdx) {
95 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
96 if (OpIdx != -1) {
97 auto I = MI.begin();
98 std::advance(I, OpIdx);
99 MI.insert(I, Op);
100 }
101 return OpIdx;
102}
103
104static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
106 const MCDisassembler *Decoder) {
107 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
108
109 // Our branches take a simm16, but we need two extra bits to account for the
110 // factor of 4.
111 APInt SignedOffset(18, Imm * 4, true);
112 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
113
114 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
116 return addOperand(Inst, MCOperand::createImm(Imm));
117}
118
120 const MCDisassembler *Decoder) {
121 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
122 int64_t Offset;
123 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
124 Offset = SignExtend64<24>(Imm);
125 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
126 Offset = Imm & 0xFFFFF;
127 } else { // GFX9+ supports 21-bit signed offsets.
128 Offset = SignExtend64<21>(Imm);
129 }
131}
132
133static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
136 return addOperand(Inst, DAsm->decodeBoolReg(Val));
137}
138
139static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
141 const MCDisassembler *Decoder) {
142 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
143 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
144}
145
146static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
147 const MCDisassembler *Decoder) {
148 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
149 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
150}
151
152#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
153 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
154 uint64_t /*Addr*/, \
155 const MCDisassembler *Decoder) { \
156 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
157 return addOperand(Inst, DAsm->DecoderName(Imm)); \
158 }
159
160// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
161// number of register. Used by VGPR only and AGPR only operands.
162#define DECODE_OPERAND_REG_8(RegClass) \
163 static DecodeStatus Decode##RegClass##RegisterClass( \
164 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
165 const MCDisassembler *Decoder) { \
166 assert(Imm < (1 << 8) && "8-bit encoding"); \
167 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
168 return addOperand( \
169 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
170 }
171
172#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
173 ImmWidth) \
174 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
175 const MCDisassembler *Decoder) { \
176 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
177 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
178 return addOperand(Inst, \
179 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
180 MandatoryLiteral, ImmWidth)); \
181 }
182
183static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
185 unsigned Imm, unsigned EncImm,
186 bool MandatoryLiteral, unsigned ImmWidth,
188 const MCDisassembler *Decoder) {
189 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
190 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
191 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
192 ImmWidth, Sema));
193}
194
195// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
196// get register class. Used by SGPR only operands.
197#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
198 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
199
200// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
201// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
202// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
203// Used by AV_ register classes (AGPR or VGPR only register operands).
204template <AMDGPUDisassembler::OpWidthTy OpWidth>
205static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
206 const MCDisassembler *Decoder) {
207 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
208 false, 0, AMDGPU::OperandSemantics::INT, Decoder);
209}
210
211// Decoder for Src(9-bit encoding) registers only.
212template <AMDGPUDisassembler::OpWidthTy OpWidth>
213static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
214 uint64_t /* Addr */,
215 const MCDisassembler *Decoder) {
216 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
218}
219
220// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
221// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
222// only.
223template <AMDGPUDisassembler::OpWidthTy OpWidth>
224static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
225 const MCDisassembler *Decoder) {
226 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
228}
229
230// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
231// Imm{9} is acc, registers only.
232template <AMDGPUDisassembler::OpWidthTy OpWidth>
233static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
234 uint64_t /* Addr */,
235 const MCDisassembler *Decoder) {
236 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
238}
239
240// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
241// register from RegClass or immediate. Registers that don't belong to RegClass
242// will be decoded and InstPrinter will report warning. Immediate will be
243// decoded into constant of size ImmWidth, should match width of immediate used
244// by OperandType (important for floating point types).
245template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
246 unsigned OperandSemantics>
247static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
248 uint64_t /* Addr */,
249 const MCDisassembler *Decoder) {
250 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
251 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
252}
253
254// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
255// and decode using 'enum10' from decodeSrcOp.
256template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
257 unsigned OperandSemantics>
258static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
259 uint64_t /* Addr */,
260 const MCDisassembler *Decoder) {
261 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
262 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
263}
264
265template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
266 unsigned OperandSemantics>
268 uint64_t /* Addr */,
269 const MCDisassembler *Decoder) {
270 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
271 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
272}
273
274// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
275// when RegisterClass is used as an operand. Most often used for destination
276// operands.
277
279DECODE_OPERAND_REG_8(VGPR_32_Lo128)
282DECODE_OPERAND_REG_8(VReg_128)
283DECODE_OPERAND_REG_8(VReg_256)
284DECODE_OPERAND_REG_8(VReg_288)
285DECODE_OPERAND_REG_8(VReg_352)
286DECODE_OPERAND_REG_8(VReg_384)
287DECODE_OPERAND_REG_8(VReg_512)
288DECODE_OPERAND_REG_8(VReg_1024)
289
290DECODE_OPERAND_REG_7(SReg_32, OPW32)
291DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
292DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
293DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
294DECODE_OPERAND_REG_7(SReg_64, OPW64)
295DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
296DECODE_OPERAND_REG_7(SReg_96, OPW96)
297DECODE_OPERAND_REG_7(SReg_128, OPW128)
298DECODE_OPERAND_REG_7(SReg_256, OPW256)
299DECODE_OPERAND_REG_7(SReg_512, OPW512)
300
303DECODE_OPERAND_REG_8(AReg_128)
304DECODE_OPERAND_REG_8(AReg_256)
305DECODE_OPERAND_REG_8(AReg_512)
306DECODE_OPERAND_REG_8(AReg_1024)
307
309 uint64_t /*Addr*/,
310 const MCDisassembler *Decoder) {
311 assert(isUInt<10>(Imm) && "10-bit encoding expected");
312 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
313
314 bool IsHi = Imm & (1 << 9);
315 unsigned RegIdx = Imm & 0xff;
316 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
317 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
318}
319
320static DecodeStatus
322 const MCDisassembler *Decoder) {
323 assert(isUInt<8>(Imm) && "8-bit encoding expected");
324
325 bool IsHi = Imm & (1 << 7);
326 unsigned RegIdx = Imm & 0x7f;
327 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
328 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
329}
330
332 uint64_t /*Addr*/,
333 const MCDisassembler *Decoder) {
334 assert(isUInt<9>(Imm) && "9-bit encoding expected");
335
336 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
337 bool IsVGPR = Imm & (1 << 8);
338 if (IsVGPR) {
339 bool IsHi = Imm & (1 << 7);
340 unsigned RegIdx = Imm & 0x7f;
341 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
342 }
343 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
344 Imm & 0xFF, false, 16));
345}
346
347static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
348 uint64_t /*Addr*/,
349 const MCDisassembler *Decoder) {
350 assert(isUInt<10>(Imm) && "10-bit encoding expected");
351
352 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
353 bool IsVGPR = Imm & (1 << 8);
354 if (IsVGPR) {
355 bool IsHi = Imm & (1 << 9);
356 unsigned RegIdx = Imm & 0xff;
357 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358 }
359 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
360 Imm & 0xFF, false, 16));
361}
362
363static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
365 const MCDisassembler *Decoder) {
366 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
367 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
368}
369
370static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
371 uint64_t Addr, const void *Decoder) {
372 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
373 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
374}
375
376static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
377 const MCRegisterInfo *MRI) {
378 if (OpIdx < 0)
379 return false;
380
381 const MCOperand &Op = Inst.getOperand(OpIdx);
382 if (!Op.isReg())
383 return false;
384
385 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
386 auto Reg = Sub ? Sub : Op.getReg();
387 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
388}
389
390static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
392 const MCDisassembler *Decoder) {
393 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
394 if (!DAsm->isGFX90A()) {
395 Imm &= 511;
396 } else {
397 // If atomic has both vdata and vdst their register classes are tied.
398 // The bit is decoded along with the vdst, first operand. We need to
399 // change register class to AGPR if vdst was AGPR.
400 // If a DS instruction has both data0 and data1 their register classes
401 // are also tied.
402 unsigned Opc = Inst.getOpcode();
403 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
404 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
405 : AMDGPU::OpName::vdata;
406 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
407 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
408 if ((int)Inst.getNumOperands() == DataIdx) {
409 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
410 if (IsAGPROperand(Inst, DstIdx, MRI))
411 Imm |= 512;
412 }
413
414 if (TSFlags & SIInstrFlags::DS) {
415 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
416 if ((int)Inst.getNumOperands() == Data2Idx &&
417 IsAGPROperand(Inst, DataIdx, MRI))
418 Imm |= 512;
419 }
420 }
421 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
422}
423
424template <AMDGPUDisassembler::OpWidthTy Opw>
425static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
426 uint64_t /* Addr */,
427 const MCDisassembler *Decoder) {
428 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
429}
430
431static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
433 const MCDisassembler *Decoder) {
434 assert(Imm < (1 << 9) && "9-bit encoding");
435 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
436 return addOperand(Inst,
437 DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
439}
440
441#define DECODE_SDWA(DecName) \
442DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
443
444DECODE_SDWA(Src32)
445DECODE_SDWA(Src16)
446DECODE_SDWA(VopcDst)
447
448static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
449 uint64_t /* Addr */,
450 const MCDisassembler *Decoder) {
451 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
452 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
453}
454
455#include "AMDGPUGenDisassemblerTables.inc"
456
457//===----------------------------------------------------------------------===//
458//
459//===----------------------------------------------------------------------===//
460
461template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
462 assert(Bytes.size() >= sizeof(T));
463 const auto Res =
464 support::endian::read<T, llvm::endianness::little>(Bytes.data());
465 Bytes = Bytes.slice(sizeof(T));
466 return Res;
467}
468
470 assert(Bytes.size() >= 12);
471 uint64_t Lo =
472 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
473 Bytes = Bytes.slice(8);
474 uint64_t Hi =
475 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
476 Bytes = Bytes.slice(4);
477 return DecoderUInt128(Lo, Hi);
478}
479
481 ArrayRef<uint8_t> Bytes_,
483 raw_ostream &CS) const {
484 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
485 Bytes = Bytes_.slice(0, MaxInstBytesNum);
486
487 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
488 // there are fewer bytes left). This will be overridden on success.
489 Size = std::min((size_t)4, Bytes_.size());
490
491 do {
492 // ToDo: better to switch encoding length using some bit predicate
493 // but it is unknown yet, so try all we can
494
495 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
496 // encodings
497 if (isGFX11Plus() && Bytes.size() >= 12 ) {
498 DecoderUInt128 DecW = eat12Bytes(Bytes);
499
500 if (isGFX11() &&
501 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
502 DecW, Address, CS))
503 break;
504
505 if (isGFX12() &&
506 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
507 DecW, Address, CS))
508 break;
509
510 if (isGFX12() &&
511 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
512 break;
513
514 // Reinitialize Bytes
515 Bytes = Bytes_.slice(0, MaxInstBytesNum);
516 }
517
518 if (Bytes.size() >= 8) {
519 const uint64_t QW = eatBytes<uint64_t>(Bytes);
520
521 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
522 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
523 break;
524
525 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
526 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
527 break;
528
529 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
530 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
531 // table first so we print the correct name.
532 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
533 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
534 break;
535
536 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
537 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
538 break;
539
540 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
541 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
542 break;
543
544 if ((isVI() || isGFX9()) &&
545 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
546 break;
547
548 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
549 break;
550
551 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
552 break;
553
554 if (isGFX12() &&
555 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
556 Address, CS))
557 break;
558
559 if (isGFX11() &&
560 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
561 Address, CS))
562 break;
563
564 if (isGFX11() &&
565 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
566 break;
567
568 if (isGFX12() &&
569 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
570 break;
571
572 // Reinitialize Bytes
573 Bytes = Bytes_.slice(0, MaxInstBytesNum);
574 }
575
576 // Try decode 32-bit instruction
577 if (Bytes.size() >= 4) {
578 const uint32_t DW = eatBytes<uint32_t>(Bytes);
579
580 if ((isVI() || isGFX9()) &&
581 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
582 break;
583
584 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
585 break;
586
587 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
588 break;
589
590 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
591 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
592 break;
593
594 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
595 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
596 break;
597
598 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
599 break;
600
601 if (isGFX11() &&
602 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
603 Address, CS))
604 break;
605
606 if (isGFX12() &&
607 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
608 Address, CS))
609 break;
610 }
611
613 } while (false);
614
615 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
616 if (isMacDPP(MI))
618
619 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
621 else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
622 AMDGPU::isVOPC64DPP(MI.getOpcode()))
623 convertVOPCDPPInst(MI); // Special VOP3 case
624 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
625 -1)
627 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
628 convertVOP3DPPInst(MI); // Regular VOP3 case
629 }
630
631 if (AMDGPU::isMAC(MI.getOpcode())) {
632 // Insert dummy unused src2_modifiers.
634 AMDGPU::OpName::src2_modifiers);
635 }
636
637 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
638 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
639 // Insert dummy unused src2_modifiers.
641 AMDGPU::OpName::src2_modifiers);
642 }
643
644 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
646 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
647 }
648
649 if (MCII->get(MI.getOpcode()).TSFlags &
651 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
652 AMDGPU::OpName::cpol);
653 if (CPolPos != -1) {
654 unsigned CPol =
655 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
657 if (MI.getNumOperands() <= (unsigned)CPolPos) {
659 AMDGPU::OpName::cpol);
660 } else if (CPol) {
661 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
662 }
663 }
664 }
665
666 if ((MCII->get(MI.getOpcode()).TSFlags &
668 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
669 // GFX90A lost TFE, its place is occupied by ACC.
670 int TFEOpIdx =
671 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
672 if (TFEOpIdx != -1) {
673 auto TFEIter = MI.begin();
674 std::advance(TFEIter, TFEOpIdx);
675 MI.insert(TFEIter, MCOperand::createImm(0));
676 }
677 }
678
679 if (MCII->get(MI.getOpcode()).TSFlags &
681 int SWZOpIdx =
682 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
683 if (SWZOpIdx != -1) {
684 auto SWZIter = MI.begin();
685 std::advance(SWZIter, SWZOpIdx);
686 MI.insert(SWZIter, MCOperand::createImm(0));
687 }
688 }
689
690 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
691 int VAddr0Idx =
692 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
693 int RsrcIdx =
694 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
695 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
696 if (VAddr0Idx >= 0 && NSAArgs > 0) {
697 unsigned NSAWords = (NSAArgs + 3) / 4;
698 if (Bytes.size() < 4 * NSAWords)
700 for (unsigned i = 0; i < NSAArgs; ++i) {
701 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
702 auto VAddrRCID =
703 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
704 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
705 }
706 Bytes = Bytes.slice(4 * NSAWords);
707 }
708
710 }
711
712 if (MCII->get(MI.getOpcode()).TSFlags &
715
716 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
718
719 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
721
722 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
724
725 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
726 AMDGPU::OpName::vdst_in);
727 if (VDstIn_Idx != -1) {
728 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
730 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
731 !MI.getOperand(VDstIn_Idx).isReg() ||
732 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
733 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
734 MI.erase(&MI.getOperand(VDstIn_Idx));
736 MCOperand::createReg(MI.getOperand(Tied).getReg()),
737 AMDGPU::OpName::vdst_in);
738 }
739 }
740
741 int ImmLitIdx =
742 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
743 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
744 if (ImmLitIdx != -1 && !IsSOPK)
745 convertFMAanyK(MI, ImmLitIdx);
746
747 Size = MaxInstBytesNum - Bytes.size();
749}
750
752 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
753 // The MCInst still has these fields even though they are no longer encoded
754 // in the GFX11 instruction.
755 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
756 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
757 }
758}
759
761 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
762 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
763 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
764 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
765 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
766 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
767 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
768 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
769 // The MCInst has this field that is not directly encoded in the
770 // instruction.
771 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
772 }
773}
774
776 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
777 STI.hasFeature(AMDGPU::FeatureGFX10)) {
778 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
779 // VOPC - insert clamp
780 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
781 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
782 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
783 if (SDst != -1) {
784 // VOPC - insert VCC register as sdst
786 AMDGPU::OpName::sdst);
787 } else {
788 // VOP1/2 - insert omod if present in instruction
789 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
790 }
791 }
792}
793
795 unsigned OpSel = 0;
796 unsigned OpSelHi = 0;
797 unsigned NegLo = 0;
798 unsigned NegHi = 0;
799};
800
801// Reconstruct values of VOP3/VOP3P operands such as op_sel.
802// Note that these values do not affect disassembler output,
803// so this is only necessary for consistency with src_modifiers.
805 bool IsVOP3P = false) {
806 VOPModifiers Modifiers;
807 unsigned Opc = MI.getOpcode();
808 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
809 AMDGPU::OpName::src1_modifiers,
810 AMDGPU::OpName::src2_modifiers};
811 for (int J = 0; J < 3; ++J) {
812 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
813 if (OpIdx == -1)
814 continue;
815
816 unsigned Val = MI.getOperand(OpIdx).getImm();
817
818 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
819 if (IsVOP3P) {
820 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
821 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
822 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
823 } else if (J == 0) {
824 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
825 }
826 }
827
828 return Modifiers;
829}
830
831// Instructions decode the op_sel/suffix bits into the src_modifier
832// operands. Copy those bits into the src operands for true16 VGPRs.
834 const unsigned Opc = MI.getOpcode();
835 const MCRegisterClass &ConversionRC =
836 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
837 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
838 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
840 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
842 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
844 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
846 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
847 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
848 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
849 if (OpIdx == -1 || OpModsIdx == -1)
850 continue;
851 MCOperand &Op = MI.getOperand(OpIdx);
852 if (!Op.isReg())
853 continue;
854 if (!ConversionRC.contains(Op.getReg()))
855 continue;
856 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
857 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
858 unsigned ModVal = OpMods.getImm();
859 if (ModVal & OpSelMask) { // isHi
860 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
861 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
862 }
863 }
864}
865
866// MAC opcodes have special old and src2 operands.
867// src2 is tied to dst, while old is not tied (but assumed to be).
869 constexpr int DST_IDX = 0;
870 auto Opcode = MI.getOpcode();
871 const auto &Desc = MCII->get(Opcode);
872 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
873
874 if (OldIdx != -1 && Desc.getOperandConstraint(
875 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
876 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
877 assert(Desc.getOperandConstraint(
878 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
880 (void)DST_IDX;
881 return true;
882 }
883
884 return false;
885}
886
887// Create dummy old operand and insert dummy unused src2_modifiers
889 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
890 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
892 AMDGPU::OpName::src2_modifiers);
893}
894
896 unsigned Opc = MI.getOpcode();
897
898 int VDstInIdx =
899 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
900 if (VDstInIdx != -1)
901 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
902
903 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
904 if (MI.getNumOperands() < DescNumOps &&
905 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
907 auto Mods = collectVOPModifiers(MI);
909 AMDGPU::OpName::op_sel);
910 } else {
911 // Insert dummy unused src modifiers.
912 if (MI.getNumOperands() < DescNumOps &&
913 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
915 AMDGPU::OpName::src0_modifiers);
916
917 if (MI.getNumOperands() < DescNumOps &&
918 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
920 AMDGPU::OpName::src1_modifiers);
921 }
922}
923
926
927 int VDstInIdx =
928 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
929 if (VDstInIdx != -1)
930 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
931
932 unsigned Opc = MI.getOpcode();
933 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
934 if (MI.getNumOperands() < DescNumOps &&
935 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
936 auto Mods = collectVOPModifiers(MI);
938 AMDGPU::OpName::op_sel);
939 }
940}
941
942// Note that before gfx10, the MIMG encoding provided no information about
943// VADDR size. Consequently, decoded instructions always show address as if it
944// has 1 dword, which could be not really so.
946 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
947
948 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
949 AMDGPU::OpName::vdst);
950
951 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
952 AMDGPU::OpName::vdata);
953 int VAddr0Idx =
954 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
955 int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
956 : AMDGPU::OpName::rsrc;
957 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
958 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
959 AMDGPU::OpName::dmask);
960
961 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
962 AMDGPU::OpName::tfe);
963 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
964 AMDGPU::OpName::d16);
965
966 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
967 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
969
970 assert(VDataIdx != -1);
971 if (BaseOpcode->BVH) {
972 // Add A16 operand for intersect_ray instructions
973 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
974 return;
975 }
976
977 bool IsAtomic = (VDstIdx != -1);
978 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
979 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
980 bool IsNSA = false;
981 bool IsPartialNSA = false;
982 unsigned AddrSize = Info->VAddrDwords;
983
984 if (isGFX10Plus()) {
985 unsigned DimIdx =
986 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
987 int A16Idx =
988 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
989 const AMDGPU::MIMGDimInfo *Dim =
990 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
991 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
992
993 AddrSize =
994 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
995
996 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
997 // VIMAGE insts other than BVH never use vaddr4.
998 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
999 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1000 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1001 if (!IsNSA) {
1002 if (!IsVSample && AddrSize > 12)
1003 AddrSize = 16;
1004 } else {
1005 if (AddrSize > Info->VAddrDwords) {
1006 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1007 // The NSA encoding does not contain enough operands for the
1008 // combination of base opcode / dimension. Should this be an error?
1009 return;
1010 }
1011 IsPartialNSA = true;
1012 }
1013 }
1014 }
1015
1016 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1017 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1018
1019 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1020 if (D16 && AMDGPU::hasPackedD16(STI)) {
1021 DstSize = (DstSize + 1) / 2;
1022 }
1023
1024 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1025 DstSize += 1;
1026
1027 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1028 return;
1029
1030 int NewOpcode =
1031 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1032 if (NewOpcode == -1)
1033 return;
1034
1035 // Widen the register to the correct number of enabled channels.
1036 unsigned NewVdata = AMDGPU::NoRegister;
1037 if (DstSize != Info->VDataDwords) {
1038 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1039
1040 // Get first subregister of VData
1041 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1042 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1043 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1044
1045 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1046 &MRI.getRegClass(DataRCID));
1047 if (NewVdata == AMDGPU::NoRegister) {
1048 // It's possible to encode this such that the low register + enabled
1049 // components exceeds the register count.
1050 return;
1051 }
1052 }
1053
1054 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1055 // If using partial NSA on GFX11+ widen last address register.
1056 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1057 unsigned NewVAddrSA = AMDGPU::NoRegister;
1058 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1059 AddrSize != Info->VAddrDwords) {
1060 unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1061 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1062 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1063
1064 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1065 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1066 &MRI.getRegClass(AddrRCID));
1067 if (!NewVAddrSA)
1068 return;
1069 }
1070
1071 MI.setOpcode(NewOpcode);
1072
1073 if (NewVdata != AMDGPU::NoRegister) {
1074 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1075
1076 if (IsAtomic) {
1077 // Atomic operations have an additional operand (a copy of data)
1078 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1079 }
1080 }
1081
1082 if (NewVAddrSA) {
1083 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1084 } else if (IsNSA) {
1085 assert(AddrSize <= Info->VAddrDwords);
1086 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1087 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1088 }
1089}
1090
1091// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1092// decoder only adds to src_modifiers, so manually add the bits to the other
1093// operands.
1095 unsigned Opc = MI.getOpcode();
1096 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1097 auto Mods = collectVOPModifiers(MI, true);
1098
1099 if (MI.getNumOperands() < DescNumOps &&
1100 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1101 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1102
1103 if (MI.getNumOperands() < DescNumOps &&
1104 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1106 AMDGPU::OpName::op_sel);
1107 if (MI.getNumOperands() < DescNumOps &&
1108 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1110 AMDGPU::OpName::op_sel_hi);
1111 if (MI.getNumOperands() < DescNumOps &&
1112 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1114 AMDGPU::OpName::neg_lo);
1115 if (MI.getNumOperands() < DescNumOps &&
1116 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1118 AMDGPU::OpName::neg_hi);
1119}
1120
1121// Create dummy old operand and insert optional operands
1123 unsigned Opc = MI.getOpcode();
1124 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1125
1126 if (MI.getNumOperands() < DescNumOps &&
1127 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1128 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1129
1130 if (MI.getNumOperands() < DescNumOps &&
1131 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1133 AMDGPU::OpName::src0_modifiers);
1134
1135 if (MI.getNumOperands() < DescNumOps &&
1136 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1138 AMDGPU::OpName::src1_modifiers);
1139}
1140
1141void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1142 assert(HasLiteral && "Should have decoded a literal");
1143 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1144 unsigned DescNumOps = Desc.getNumOperands();
1146 AMDGPU::OpName::immDeferred);
1147 assert(DescNumOps == MI.getNumOperands());
1148 for (unsigned I = 0; I < DescNumOps; ++I) {
1149 auto &Op = MI.getOperand(I);
1150 auto OpType = Desc.operands()[I].OperandType;
1151 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1153 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1154 IsDeferredOp)
1155 Op.setImm(Literal);
1156 }
1157}
1158
1159const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1160 return getContext().getRegisterInfo()->
1161 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1162}
1163
1164inline
1166 const Twine& ErrMsg) const {
1167 *CommentStream << "Error: " + ErrMsg;
1168
1169 // ToDo: add support for error operands to MCInst.h
1170 // return MCOperand::createError(V);
1171 return MCOperand();
1172}
1173
1174inline
1177}
1178
1179inline
1181 unsigned Val) const {
1182 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1183 if (Val >= RegCl.getNumRegs())
1184 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1185 ": unknown register " + Twine(Val));
1186 return createRegOperand(RegCl.getRegister(Val));
1187}
1188
1189inline
1191 unsigned Val) const {
1192 // ToDo: SI/CI have 104 SGPRs, VI - 102
1193 // Valery: here we accepting as much as we can, let assembler sort it out
1194 int shift = 0;
1195 switch (SRegClassID) {
1196 case AMDGPU::SGPR_32RegClassID:
1197 case AMDGPU::TTMP_32RegClassID:
1198 break;
1199 case AMDGPU::SGPR_64RegClassID:
1200 case AMDGPU::TTMP_64RegClassID:
1201 shift = 1;
1202 break;
1203 case AMDGPU::SGPR_96RegClassID:
1204 case AMDGPU::TTMP_96RegClassID:
1205 case AMDGPU::SGPR_128RegClassID:
1206 case AMDGPU::TTMP_128RegClassID:
1207 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1208 // this bundle?
1209 case AMDGPU::SGPR_256RegClassID:
1210 case AMDGPU::TTMP_256RegClassID:
1211 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1212 // this bundle?
1213 case AMDGPU::SGPR_288RegClassID:
1214 case AMDGPU::TTMP_288RegClassID:
1215 case AMDGPU::SGPR_320RegClassID:
1216 case AMDGPU::TTMP_320RegClassID:
1217 case AMDGPU::SGPR_352RegClassID:
1218 case AMDGPU::TTMP_352RegClassID:
1219 case AMDGPU::SGPR_384RegClassID:
1220 case AMDGPU::TTMP_384RegClassID:
1221 case AMDGPU::SGPR_512RegClassID:
1222 case AMDGPU::TTMP_512RegClassID:
1223 shift = 2;
1224 break;
1225 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1226 // this bundle?
1227 default:
1228 llvm_unreachable("unhandled register class");
1229 }
1230
1231 if (Val % (1 << shift)) {
1232 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1233 << ": scalar reg isn't aligned " << Val;
1234 }
1235
1236 return createRegOperand(SRegClassID, Val >> shift);
1237}
1238
1240 bool IsHi) const {
1241 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1242 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1243}
1244
1245// Decode Literals for insts which always have a literal in the encoding
1248 if (HasLiteral) {
1249 assert(
1251 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1252 if (Literal != Val)
1253 return errOperand(Val, "More than one unique literal is illegal");
1254 }
1255 HasLiteral = true;
1256 Literal = Val;
1257 return MCOperand::createImm(Literal);
1258}
1259
1261 // For now all literal constants are supposed to be unsigned integer
1262 // ToDo: deal with signed/unsigned 64-bit integer constants
1263 // ToDo: deal with float/double constants
1264 if (!HasLiteral) {
1265 if (Bytes.size() < 4) {
1266 return errOperand(0, "cannot read literal, inst bytes left " +
1267 Twine(Bytes.size()));
1268 }
1269 HasLiteral = true;
1270 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1271 if (ExtendFP64)
1272 Literal64 <<= 32;
1273 }
1274 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1275}
1276
1278 using namespace AMDGPU::EncValues;
1279
1280 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1281 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1282 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1283 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1284 // Cast prevents negative overflow.
1285}
1286
1287static int64_t getInlineImmVal32(unsigned Imm) {
1288 switch (Imm) {
1289 case 240:
1290 return llvm::bit_cast<uint32_t>(0.5f);
1291 case 241:
1292 return llvm::bit_cast<uint32_t>(-0.5f);
1293 case 242:
1294 return llvm::bit_cast<uint32_t>(1.0f);
1295 case 243:
1296 return llvm::bit_cast<uint32_t>(-1.0f);
1297 case 244:
1298 return llvm::bit_cast<uint32_t>(2.0f);
1299 case 245:
1300 return llvm::bit_cast<uint32_t>(-2.0f);
1301 case 246:
1302 return llvm::bit_cast<uint32_t>(4.0f);
1303 case 247:
1304 return llvm::bit_cast<uint32_t>(-4.0f);
1305 case 248: // 1 / (2 * PI)
1306 return 0x3e22f983;
1307 default:
1308 llvm_unreachable("invalid fp inline imm");
1309 }
1310}
1311
1312static int64_t getInlineImmVal64(unsigned Imm) {
1313 switch (Imm) {
1314 case 240:
1315 return llvm::bit_cast<uint64_t>(0.5);
1316 case 241:
1317 return llvm::bit_cast<uint64_t>(-0.5);
1318 case 242:
1319 return llvm::bit_cast<uint64_t>(1.0);
1320 case 243:
1321 return llvm::bit_cast<uint64_t>(-1.0);
1322 case 244:
1323 return llvm::bit_cast<uint64_t>(2.0);
1324 case 245:
1325 return llvm::bit_cast<uint64_t>(-2.0);
1326 case 246:
1327 return llvm::bit_cast<uint64_t>(4.0);
1328 case 247:
1329 return llvm::bit_cast<uint64_t>(-4.0);
1330 case 248: // 1 / (2 * PI)
1331 return 0x3fc45f306dc9c882;
1332 default:
1333 llvm_unreachable("invalid fp inline imm");
1334 }
1335}
1336
1337static int64_t getInlineImmValF16(unsigned Imm) {
1338 switch (Imm) {
1339 case 240:
1340 return 0x3800;
1341 case 241:
1342 return 0xB800;
1343 case 242:
1344 return 0x3C00;
1345 case 243:
1346 return 0xBC00;
1347 case 244:
1348 return 0x4000;
1349 case 245:
1350 return 0xC000;
1351 case 246:
1352 return 0x4400;
1353 case 247:
1354 return 0xC400;
1355 case 248: // 1 / (2 * PI)
1356 return 0x3118;
1357 default:
1358 llvm_unreachable("invalid fp inline imm");
1359 }
1360}
1361
1362static int64_t getInlineImmValBF16(unsigned Imm) {
1363 switch (Imm) {
1364 case 240:
1365 return 0x3F00;
1366 case 241:
1367 return 0xBF00;
1368 case 242:
1369 return 0x3F80;
1370 case 243:
1371 return 0xBF80;
1372 case 244:
1373 return 0x4000;
1374 case 245:
1375 return 0xC000;
1376 case 246:
1377 return 0x4080;
1378 case 247:
1379 return 0xC080;
1380 case 248: // 1 / (2 * PI)
1381 return 0x3E22;
1382 default:
1383 llvm_unreachable("invalid fp inline imm");
1384 }
1385}
1386
1387static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1389 : getInlineImmValF16(Imm);
1390}
1391
1392MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1396
1397 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1398 // ImmWidth 0 is a default case where operand should not allow immediates.
1399 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1400 // use it to print verbose error message.
1401 switch (ImmWidth) {
1402 case 0:
1403 case 32:
1405 case 64:
1407 case 16:
1408 return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1409 default:
1410 llvm_unreachable("implement me");
1411 }
1412}
1413
1415 using namespace AMDGPU;
1416
1417 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1418 switch (Width) {
1419 default: // fall
1420 case OPW32:
1421 case OPW16:
1422 case OPWV216:
1423 return VGPR_32RegClassID;
1424 case OPW64:
1425 case OPWV232: return VReg_64RegClassID;
1426 case OPW96: return VReg_96RegClassID;
1427 case OPW128: return VReg_128RegClassID;
1428 case OPW160: return VReg_160RegClassID;
1429 case OPW256: return VReg_256RegClassID;
1430 case OPW288: return VReg_288RegClassID;
1431 case OPW320: return VReg_320RegClassID;
1432 case OPW352: return VReg_352RegClassID;
1433 case OPW384: return VReg_384RegClassID;
1434 case OPW512: return VReg_512RegClassID;
1435 case OPW1024: return VReg_1024RegClassID;
1436 }
1437}
1438
1440 using namespace AMDGPU;
1441
1442 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1443 switch (Width) {
1444 default: // fall
1445 case OPW32:
1446 case OPW16:
1447 case OPWV216:
1448 return AGPR_32RegClassID;
1449 case OPW64:
1450 case OPWV232: return AReg_64RegClassID;
1451 case OPW96: return AReg_96RegClassID;
1452 case OPW128: return AReg_128RegClassID;
1453 case OPW160: return AReg_160RegClassID;
1454 case OPW256: return AReg_256RegClassID;
1455 case OPW288: return AReg_288RegClassID;
1456 case OPW320: return AReg_320RegClassID;
1457 case OPW352: return AReg_352RegClassID;
1458 case OPW384: return AReg_384RegClassID;
1459 case OPW512: return AReg_512RegClassID;
1460 case OPW1024: return AReg_1024RegClassID;
1461 }
1462}
1463
1464
1466 using namespace AMDGPU;
1467
1468 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1469 switch (Width) {
1470 default: // fall
1471 case OPW32:
1472 case OPW16:
1473 case OPWV216:
1474 return SGPR_32RegClassID;
1475 case OPW64:
1476 case OPWV232: return SGPR_64RegClassID;
1477 case OPW96: return SGPR_96RegClassID;
1478 case OPW128: return SGPR_128RegClassID;
1479 case OPW160: return SGPR_160RegClassID;
1480 case OPW256: return SGPR_256RegClassID;
1481 case OPW288: return SGPR_288RegClassID;
1482 case OPW320: return SGPR_320RegClassID;
1483 case OPW352: return SGPR_352RegClassID;
1484 case OPW384: return SGPR_384RegClassID;
1485 case OPW512: return SGPR_512RegClassID;
1486 }
1487}
1488
1490 using namespace AMDGPU;
1491
1492 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1493 switch (Width) {
1494 default: // fall
1495 case OPW32:
1496 case OPW16:
1497 case OPWV216:
1498 return TTMP_32RegClassID;
1499 case OPW64:
1500 case OPWV232: return TTMP_64RegClassID;
1501 case OPW128: return TTMP_128RegClassID;
1502 case OPW256: return TTMP_256RegClassID;
1503 case OPW288: return TTMP_288RegClassID;
1504 case OPW320: return TTMP_320RegClassID;
1505 case OPW352: return TTMP_352RegClassID;
1506 case OPW384: return TTMP_384RegClassID;
1507 case OPW512: return TTMP_512RegClassID;
1508 }
1509}
1510
1511int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1512 using namespace AMDGPU::EncValues;
1513
1514 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1515 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1516
1517 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1518}
1519
1521 bool MandatoryLiteral,
1522 unsigned ImmWidth,
1523 AMDGPU::OperandSemantics Sema) const {
1524 using namespace AMDGPU::EncValues;
1525
1526 assert(Val < 1024); // enum10
1527
1528 bool IsAGPR = Val & 512;
1529 Val &= 511;
1530
1531 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1532 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1533 : getVgprClassId(Width), Val - VGPR_MIN);
1534 }
1535 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1536 Sema);
1537}
1538
1541 bool MandatoryLiteral, unsigned ImmWidth,
1542 AMDGPU::OperandSemantics Sema) const {
1543 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1544 // decoded earlier.
1545 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1546 using namespace AMDGPU::EncValues;
1547
1548 if (Val <= SGPR_MAX) {
1549 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1550 static_assert(SGPR_MIN == 0);
1551 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1552 }
1553
1554 int TTmpIdx = getTTmpIdx(Val);
1555 if (TTmpIdx >= 0) {
1556 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1557 }
1558
1559 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1560 return decodeIntImmed(Val);
1561
1562 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1563 return decodeFPImmed(ImmWidth, Val, Sema);
1564
1565 if (Val == LITERAL_CONST) {
1566 if (MandatoryLiteral)
1567 // Keep a sentinel value for deferred setting
1568 return MCOperand::createImm(LITERAL_CONST);
1570 }
1571
1572 switch (Width) {
1573 case OPW32:
1574 case OPW16:
1575 case OPWV216:
1576 return decodeSpecialReg32(Val);
1577 case OPW64:
1578 case OPWV232:
1579 return decodeSpecialReg64(Val);
1580 default:
1581 llvm_unreachable("unexpected immediate type");
1582 }
1583}
1584
1585// Bit 0 of DstY isn't stored in the instruction, because it's always the
1586// opposite of bit 0 of DstX.
1588 unsigned Val) const {
1589 int VDstXInd =
1590 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1591 assert(VDstXInd != -1);
1592 assert(Inst.getOperand(VDstXInd).isReg());
1593 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1594 Val |= ~XDstReg & 1;
1596 return createRegOperand(getVgprClassId(Width), Val);
1597}
1598
1600 using namespace AMDGPU;
1601
1602 switch (Val) {
1603 // clang-format off
1604 case 102: return createRegOperand(FLAT_SCR_LO);
1605 case 103: return createRegOperand(FLAT_SCR_HI);
1606 case 104: return createRegOperand(XNACK_MASK_LO);
1607 case 105: return createRegOperand(XNACK_MASK_HI);
1608 case 106: return createRegOperand(VCC_LO);
1609 case 107: return createRegOperand(VCC_HI);
1610 case 108: return createRegOperand(TBA_LO);
1611 case 109: return createRegOperand(TBA_HI);
1612 case 110: return createRegOperand(TMA_LO);
1613 case 111: return createRegOperand(TMA_HI);
1614 case 124:
1615 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1616 case 125:
1617 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1618 case 126: return createRegOperand(EXEC_LO);
1619 case 127: return createRegOperand(EXEC_HI);
1620 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1621 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1622 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1623 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1624 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1625 case 251: return createRegOperand(SRC_VCCZ);
1626 case 252: return createRegOperand(SRC_EXECZ);
1627 case 253: return createRegOperand(SRC_SCC);
1628 case 254: return createRegOperand(LDS_DIRECT);
1629 default: break;
1630 // clang-format on
1631 }
1632 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1633}
1634
1636 using namespace AMDGPU;
1637
1638 switch (Val) {
1639 case 102: return createRegOperand(FLAT_SCR);
1640 case 104: return createRegOperand(XNACK_MASK);
1641 case 106: return createRegOperand(VCC);
1642 case 108: return createRegOperand(TBA);
1643 case 110: return createRegOperand(TMA);
1644 case 124:
1645 if (isGFX11Plus())
1646 return createRegOperand(SGPR_NULL);
1647 break;
1648 case 125:
1649 if (!isGFX11Plus())
1650 return createRegOperand(SGPR_NULL);
1651 break;
1652 case 126: return createRegOperand(EXEC);
1653 case 235: return createRegOperand(SRC_SHARED_BASE);
1654 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1655 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1656 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1657 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1658 case 251: return createRegOperand(SRC_VCCZ);
1659 case 252: return createRegOperand(SRC_EXECZ);
1660 case 253: return createRegOperand(SRC_SCC);
1661 default: break;
1662 }
1663 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1664}
1665
1667AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1668 unsigned ImmWidth,
1669 AMDGPU::OperandSemantics Sema) const {
1670 using namespace AMDGPU::SDWA;
1671 using namespace AMDGPU::EncValues;
1672
1673 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1674 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1675 // XXX: cast to int is needed to avoid stupid warning:
1676 // compare with unsigned is always true
1677 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1678 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1679 return createRegOperand(getVgprClassId(Width),
1680 Val - SDWA9EncValues::SRC_VGPR_MIN);
1681 }
1682 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1683 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1684 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1685 return createSRegOperand(getSgprClassId(Width),
1686 Val - SDWA9EncValues::SRC_SGPR_MIN);
1687 }
1688 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1689 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1690 return createSRegOperand(getTtmpClassId(Width),
1691 Val - SDWA9EncValues::SRC_TTMP_MIN);
1692 }
1693
1694 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1695
1696 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1697 return decodeIntImmed(SVal);
1698
1699 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1700 return decodeFPImmed(ImmWidth, SVal, Sema);
1701
1702 return decodeSpecialReg32(SVal);
1703 }
1704 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
1705 return createRegOperand(getVgprClassId(Width), Val);
1706 llvm_unreachable("unsupported target");
1707}
1708
1711}
1712
1715}
1716
1718 using namespace AMDGPU::SDWA;
1719
1720 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1721 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1722 "SDWAVopcDst should be present only on GFX9+");
1723
1724 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1725
1726 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1727 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1728
1729 int TTmpIdx = getTTmpIdx(Val);
1730 if (TTmpIdx >= 0) {
1731 auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1732 return createSRegOperand(TTmpClsId, TTmpIdx);
1733 }
1734 if (Val > SGPR_MAX) {
1735 return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val);
1736 }
1737 return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1738 }
1739 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1740}
1741
1743 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1744 ? decodeSrcOp(OPW64, Val)
1745 : decodeSrcOp(OPW32, Val);
1746}
1747
1749 return decodeSrcOp(OPW32, Val);
1750}
1751
1754 return MCOperand();
1755 return MCOperand::createImm(Val);
1756}
1757
1759 using VersionField = AMDGPU::EncodingField<7, 0>;
1760 using W64Bit = AMDGPU::EncodingBit<13>;
1761 using W32Bit = AMDGPU::EncodingBit<14>;
1762 using MDPBit = AMDGPU::EncodingBit<15>;
1764
1765 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1766
1767 // Decode into a plain immediate if any unused bits are raised.
1768 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1769 return MCOperand::createImm(Imm);
1770
1771 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1772 auto I = find_if(Versions,
1774 return V.Code == Version;
1775 });
1776 MCContext &Ctx = getContext();
1777 const MCExpr *E;
1778 if (I == Versions.end())
1780 else
1781 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
1782
1783 if (W64)
1784 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
1785 if (W32)
1786 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
1787 if (MDP)
1788 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
1789
1790 return MCOperand::createExpr(E);
1791}
1792
1794 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1795}
1796
1798
1800 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1801}
1802
1804
1806
1808 return AMDGPU::isGFX10Plus(STI);
1809}
1810
1812 return STI.hasFeature(AMDGPU::FeatureGFX11);
1813}
1814
1816 return AMDGPU::isGFX11Plus(STI);
1817}
1818
1820 return STI.hasFeature(AMDGPU::FeatureGFX12);
1821}
1822
1824 return AMDGPU::isGFX12Plus(STI);
1825}
1826
1828 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1829}
1830
1833}
1834
1835//===----------------------------------------------------------------------===//
1836// AMDGPU specific symbol handling
1837//===----------------------------------------------------------------------===//
1838
1839/// Print a string describing the reserved bit range specified by Mask with
1840/// offset BaseBytes for use in error comments. Mask is a single continuous
1841/// range of 1s surrounded by zeros. The format here is meant to align with the
1842/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1843static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1844 SmallString<32> Result;
1845 raw_svector_ostream S(Result);
1846
1847 int TrailingZeros = llvm::countr_zero(Mask);
1848 int PopCount = llvm::popcount(Mask);
1849
1850 if (PopCount == 1) {
1851 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1852 } else {
1853 S << "bits in range ("
1854 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1855 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1856 }
1857
1858 return Result;
1859}
1860
1861#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1862#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1863 do { \
1864 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1865 } while (0)
1866#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1867 do { \
1868 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1869 << GET_FIELD(MASK) << '\n'; \
1870 } while (0)
1871
1872#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1873 do { \
1874 if (FourByteBuffer & (MASK)) { \
1875 return createStringError(std::errc::invalid_argument, \
1876 "kernel descriptor " DESC \
1877 " reserved %s set" MSG, \
1878 getBitRangeFromMask((MASK), 0).c_str()); \
1879 } \
1880 } while (0)
1881
1882#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1883#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1884 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1885#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1886 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1887#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1888 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1889
1890// NOLINTNEXTLINE(readability-identifier-naming)
1892 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1893 using namespace amdhsa;
1894 StringRef Indent = "\t";
1895
1896 // We cannot accurately backward compute #VGPRs used from
1897 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1898 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1899 // simply calculate the inverse of what the assembler does.
1900
1901 uint32_t GranulatedWorkitemVGPRCount =
1902 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1903
1904 uint32_t NextFreeVGPR =
1905 (GranulatedWorkitemVGPRCount + 1) *
1906 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1907
1908 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1909
1910 // We cannot backward compute values used to calculate
1911 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1912 // directives can't be computed:
1913 // .amdhsa_reserve_vcc
1914 // .amdhsa_reserve_flat_scratch
1915 // .amdhsa_reserve_xnack_mask
1916 // They take their respective default values if not specified in the assembly.
1917 //
1918 // GRANULATED_WAVEFRONT_SGPR_COUNT
1919 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1920 //
1921 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1922 // are set to 0. So while disassembling we consider that:
1923 //
1924 // GRANULATED_WAVEFRONT_SGPR_COUNT
1925 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1926 //
1927 // The disassembler cannot recover the original values of those 3 directives.
1928
1929 uint32_t GranulatedWavefrontSGPRCount =
1930 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1931
1932 if (isGFX10Plus())
1933 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1934 "must be zero on gfx10+");
1935
1936 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1938
1939 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1941 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1942 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1943 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1944
1945 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1946
1947 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1948 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1949 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1950 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1951 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1952 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1953 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1954 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1955
1956 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1957
1958 if (!isGFX12Plus())
1959 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1960 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1961
1962 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1963
1964 if (!isGFX12Plus())
1965 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1966 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1967
1968 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1969 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1970
1971 if (isGFX9Plus())
1972 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1973
1974 if (!isGFX9Plus())
1975 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1976 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1977
1978 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1979
1980 if (!isGFX10Plus())
1981 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1982 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1983
1984 if (isGFX10Plus()) {
1985 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1986 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1987 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1988 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1989 }
1990
1991 if (isGFX12Plus())
1992 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1993 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1994
1995 return true;
1996}
1997
1998// NOLINTNEXTLINE(readability-identifier-naming)
2000 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2001 using namespace amdhsa;
2002 StringRef Indent = "\t";
2004 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2005 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2006 else
2007 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2008 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2009 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2010 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2011 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2012 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2013 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2014 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2015 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2016 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2017 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2018 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2019
2020 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2021 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2022 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2023
2025 ".amdhsa_exception_fp_ieee_invalid_op",
2026 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2027 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2028 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2030 ".amdhsa_exception_fp_ieee_div_zero",
2031 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2032 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2033 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2034 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2035 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2036 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2037 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2038 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2039 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2040
2041 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2042
2043 return true;
2044}
2045
2046// NOLINTNEXTLINE(readability-identifier-naming)
2048 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2049 using namespace amdhsa;
2050 StringRef Indent = "\t";
2051 if (isGFX90A()) {
2052 KdStream << Indent << ".amdhsa_accum_offset "
2053 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2054 << '\n';
2055
2056 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2057
2058 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2059 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2060 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2061 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2062 } else if (isGFX10Plus()) {
2063 // Bits [0-3].
2064 if (!isGFX12Plus()) {
2065 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2066 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2067 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2068 } else {
2070 "SHARED_VGPR_COUNT",
2071 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2072 }
2073 } else {
2074 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2075 "COMPUTE_PGM_RSRC3",
2076 "must be zero on gfx12+");
2077 }
2078
2079 // Bits [4-11].
2080 if (isGFX11()) {
2081 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2082 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2083 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2084 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2085 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2086 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2087 } else if (isGFX12Plus()) {
2089 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2090 } else {
2091 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2092 "COMPUTE_PGM_RSRC3",
2093 "must be zero on gfx10");
2094 }
2095
2096 // Bits [12].
2097 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2098 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2099
2100 // Bits [13].
2101 if (isGFX12Plus()) {
2103 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2104 } else {
2105 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2106 "COMPUTE_PGM_RSRC3",
2107 "must be zero on gfx10 or gfx11");
2108 }
2109
2110 // Bits [14-30].
2111 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2112 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2113
2114 // Bits [31].
2115 if (isGFX11Plus()) {
2117 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2118 } else {
2119 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2120 "COMPUTE_PGM_RSRC3",
2121 "must be zero on gfx10");
2122 }
2123 } else if (FourByteBuffer) {
2124 return createStringError(
2125 std::errc::invalid_argument,
2126 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2127 }
2128 return true;
2129}
2130#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2131#undef PRINT_DIRECTIVE
2132#undef GET_FIELD
2133#undef CHECK_RESERVED_BITS_IMPL
2134#undef CHECK_RESERVED_BITS
2135#undef CHECK_RESERVED_BITS_MSG
2136#undef CHECK_RESERVED_BITS_DESC
2137#undef CHECK_RESERVED_BITS_DESC_MSG
2138
2139/// Create an error object to return from onSymbolStart for reserved kernel
2140/// descriptor bits being set.
2141static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2142 const char *Msg = "") {
2143 return createStringError(
2144 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2145 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2146}
2147
2148/// Create an error object to return from onSymbolStart for reserved kernel
2149/// descriptor bytes being set.
2150static Error createReservedKDBytesError(unsigned BaseInBytes,
2151 unsigned WidthInBytes) {
2152 // Create an error comment in the same format as the "Kernel Descriptor"
2153 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2154 return createStringError(
2155 std::errc::invalid_argument,
2156 "kernel descriptor reserved bits in range (%u:%u) set",
2157 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2158}
2159
2162 raw_string_ostream &KdStream) const {
2163#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2164 do { \
2165 KdStream << Indent << DIRECTIVE " " \
2166 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2167 } while (0)
2168
2169 uint16_t TwoByteBuffer = 0;
2170 uint32_t FourByteBuffer = 0;
2171
2172 StringRef ReservedBytes;
2173 StringRef Indent = "\t";
2174
2175 assert(Bytes.size() == 64);
2176 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2177
2178 switch (Cursor.tell()) {
2180 FourByteBuffer = DE.getU32(Cursor);
2181 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2182 << '\n';
2183 return true;
2184
2186 FourByteBuffer = DE.getU32(Cursor);
2187 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2188 << FourByteBuffer << '\n';
2189 return true;
2190
2192 FourByteBuffer = DE.getU32(Cursor);
2193 KdStream << Indent << ".amdhsa_kernarg_size "
2194 << FourByteBuffer << '\n';
2195 return true;
2196
2198 // 4 reserved bytes, must be 0.
2199 ReservedBytes = DE.getBytes(Cursor, 4);
2200 for (int I = 0; I < 4; ++I) {
2201 if (ReservedBytes[I] != 0)
2203 }
2204 return true;
2205
2207 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2208 // So far no directive controls this for Code Object V3, so simply skip for
2209 // disassembly.
2210 DE.skip(Cursor, 8);
2211 return true;
2212
2214 // 20 reserved bytes, must be 0.
2215 ReservedBytes = DE.getBytes(Cursor, 20);
2216 for (int I = 0; I < 20; ++I) {
2217 if (ReservedBytes[I] != 0)
2219 }
2220 return true;
2221
2223 FourByteBuffer = DE.getU32(Cursor);
2224 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2225
2227 FourByteBuffer = DE.getU32(Cursor);
2228 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2229
2231 FourByteBuffer = DE.getU32(Cursor);
2232 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2233
2235 using namespace amdhsa;
2236 TwoByteBuffer = DE.getU16(Cursor);
2237
2239 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2240 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2241 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2242 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2243 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2244 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2245 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2246 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2247 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2248 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2250 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2251 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2252 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2253 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2254
2255 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2256 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2258
2259 // Reserved for GFX9
2260 if (isGFX9() &&
2261 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2263 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2264 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2265 }
2266 if (isGFX10Plus()) {
2267 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2268 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2269 }
2270
2271 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2272 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2273 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2274
2275 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2276 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2278 }
2279
2280 return true;
2281
2283 using namespace amdhsa;
2284 TwoByteBuffer = DE.getU16(Cursor);
2285 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2286 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2287 KERNARG_PRELOAD_SPEC_LENGTH);
2288 }
2289
2290 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2291 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2292 KERNARG_PRELOAD_SPEC_OFFSET);
2293 }
2294 return true;
2295
2297 // 4 bytes from here are reserved, must be 0.
2298 ReservedBytes = DE.getBytes(Cursor, 4);
2299 for (int I = 0; I < 4; ++I) {
2300 if (ReservedBytes[I] != 0)
2302 }
2303 return true;
2304
2305 default:
2306 llvm_unreachable("Unhandled index. Case statements cover everything.");
2307 return true;
2308 }
2309#undef PRINT_DIRECTIVE
2310}
2311
2313 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2314
2315 // CP microcode requires the kernel descriptor to be 64 aligned.
2316 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2317 return createStringError(std::errc::invalid_argument,
2318 "kernel descriptor must be 64-byte aligned");
2319
2320 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2321 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2322 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2323 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2324 // when required.
2325 if (isGFX10Plus()) {
2326 uint16_t KernelCodeProperties =
2329 EnableWavefrontSize32 =
2330 AMDHSA_BITS_GET(KernelCodeProperties,
2331 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2332 }
2333
2334 std::string Kd;
2335 raw_string_ostream KdStream(Kd);
2336 KdStream << ".amdhsa_kernel " << KdName << '\n';
2337
2339 while (C && C.tell() < Bytes.size()) {
2340 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2341
2342 cantFail(C.takeError());
2343
2344 if (!Res)
2345 return Res;
2346 }
2347 KdStream << ".end_amdhsa_kernel\n";
2348 outs() << KdStream.str();
2349 return true;
2350}
2351
2353 uint64_t &Size,
2354 ArrayRef<uint8_t> Bytes,
2355 uint64_t Address) const {
2356 // Right now only kernel descriptor needs to be handled.
2357 // We ignore all other symbols for target specific handling.
2358 // TODO:
2359 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2360 // Object V2 and V3 when symbols are marked protected.
2361
2362 // amd_kernel_code_t for Code Object V2.
2363 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2364 Size = 256;
2365 return createStringError(std::errc::invalid_argument,
2366 "code object v2 is not supported");
2367 }
2368
2369 // Code Object V3 kernel descriptors.
2370 StringRef Name = Symbol.Name;
2371 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2372 Size = 64; // Size = 64 regardless of success or failure.
2373 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2374 }
2375
2376 return false;
2377}
2378
2379const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2380 int64_t Val) {
2381 MCContext &Ctx = getContext();
2382 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2383 // Note: only set value to Val on a new symbol in case an dissassembler
2384 // has already been initialized in this context.
2385 if (!Sym->isVariable()) {
2386 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2387 } else {
2388 int64_t Res = ~Val;
2389 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2390 if (!Valid || Res != Val)
2391 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2392 }
2393 return MCSymbolRefExpr::create(Sym, Ctx);
2394}
2395
2396//===----------------------------------------------------------------------===//
2397// AMDGPUSymbolizer
2398//===----------------------------------------------------------------------===//
2399
2400// Try to find symbol name for specified label
2402 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2403 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2404 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2405
2406 if (!IsBranch) {
2407 return false;
2408 }
2409
2410 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2411 if (!Symbols)
2412 return false;
2413
2414 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2415 return Val.Addr == static_cast<uint64_t>(Value) &&
2416 Val.Type == ELF::STT_NOTYPE;
2417 });
2418 if (Result != Symbols->end()) {
2419 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2420 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2422 return true;
2423 }
2424 // Add to list of referenced addresses, so caller can synthesize a label.
2425 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2426 return false;
2427}
2428
2430 int64_t Value,
2431 uint64_t Address) {
2432 llvm_unreachable("unimplemented");
2433}
2434
2435//===----------------------------------------------------------------------===//
2436// Initialization
2437//===----------------------------------------------------------------------===//
2438
2440 LLVMOpInfoCallback /*GetOpInfo*/,
2441 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2442 void *DisInfo,
2443 MCContext *Ctx,
2444 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2445 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2446}
2447
2449 const MCSubtargetInfo &STI,
2450 MCContext &Ctx) {
2451 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2452}
2453
2459}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static const MCSubtargetInfo & addDefaultWaveSize(const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:131
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:597
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
Context object for machine code objects.
Definition: MCContext.h:83
MCSubtargetInfo & getSubtargetCopy(const MCSubtargetInfo &STI)
Definition: MCContext.cpp:888
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
void reportWarning(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1075
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isValid() const
Definition: MCInst.h:60
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
Represents a location in source code.
Definition: SMLoc.h:23
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:679
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1351
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1365
@ STT_OBJECT
Definition: ELF.h:1352
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:402
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1286
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:756
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.