LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88 bool isForcedLit64() const { return Lit == LitModifier::Lit64; }
89
90 int64_t getFPModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Abs ? SISrcMods::ABS : 0u;
93 Operand |= Neg ? SISrcMods::NEG : 0u;
94 return Operand;
95 }
96
97 int64_t getIntModifiersOperand() const {
98 int64_t Operand = 0;
99 Operand |= Sext ? SISrcMods::SEXT : 0u;
100 return Operand;
101 }
102
103 int64_t getModifiersOperand() const {
104 assert(!(hasFPModifiers() && hasIntModifiers())
105 && "fp and int modifiers should not be used simultaneously");
106 if (hasFPModifiers())
107 return getFPModifiersOperand();
108 if (hasIntModifiers())
109 return getIntModifiersOperand();
110 return 0;
111 }
112
113 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
114 };
115
116 enum ImmTy {
117 ImmTyNone,
118 ImmTyGDS,
119 ImmTyLDS,
120 ImmTyOffen,
121 ImmTyIdxen,
122 ImmTyAddr64,
123 ImmTyOffset,
124 ImmTyInstOffset,
125 ImmTyOffset0,
126 ImmTyOffset1,
127 ImmTySMEMOffsetMod,
128 ImmTyCPol,
129 ImmTyTFE,
130 ImmTyIsAsync,
131 ImmTyD16,
132 ImmTyClamp,
133 ImmTyOModSI,
134 ImmTySDWADstSel,
135 ImmTySDWASrc0Sel,
136 ImmTySDWASrc1Sel,
137 ImmTySDWADstUnused,
138 ImmTyDMask,
139 ImmTyDim,
140 ImmTyUNorm,
141 ImmTyDA,
142 ImmTyR128A16,
143 ImmTyA16,
144 ImmTyLWE,
145 ImmTyExpTgt,
146 ImmTyExpCompr,
147 ImmTyExpVM,
148 ImmTyDone,
149 ImmTyRowEn,
150 ImmTyFORMAT,
151 ImmTyHwreg,
152 ImmTyOff,
153 ImmTySendMsg,
154 ImmTyWaitEvent,
155 ImmTyInterpSlot,
156 ImmTyInterpAttr,
157 ImmTyInterpAttrChan,
158 ImmTyOpSel,
159 ImmTyOpSelHi,
160 ImmTyNegLo,
161 ImmTyNegHi,
162 ImmTyIndexKey8bit,
163 ImmTyIndexKey16bit,
164 ImmTyIndexKey32bit,
165 ImmTyDPP8,
166 ImmTyDppCtrl,
167 ImmTyDppRowMask,
168 ImmTyDppBankMask,
169 ImmTyDppBoundCtrl,
170 ImmTyDppFI,
171 ImmTySwizzle,
172 ImmTyGprIdxMode,
173 ImmTyHigh,
174 ImmTyBLGP,
175 ImmTyCBSZ,
176 ImmTyABID,
177 ImmTyEndpgm,
178 ImmTyWaitVDST,
179 ImmTyWaitEXP,
180 ImmTyWaitVAVDst,
181 ImmTyWaitVMVSrc,
182 ImmTyBitOp3,
183 ImmTyMatrixAFMT,
184 ImmTyMatrixBFMT,
185 ImmTyMatrixAScale,
186 ImmTyMatrixBScale,
187 ImmTyMatrixAScaleFmt,
188 ImmTyMatrixBScaleFmt,
189 ImmTyMatrixAReuse,
190 ImmTyMatrixBReuse,
191 ImmTyScaleSel,
192 ImmTyByteSel,
193 };
194
195private:
196 struct TokOp {
197 const char *Data;
198 unsigned Length;
199 };
200
201 struct ImmOp {
202 int64_t Val;
203 ImmTy Type;
204 bool IsFPImm;
205 Modifiers Mods;
206 };
207
208 struct RegOp {
209 MCRegister RegNo;
210 Modifiers Mods;
211 };
212
213 union {
214 TokOp Tok;
215 ImmOp Imm;
216 RegOp Reg;
217 const MCExpr *Expr;
218 };
219
220 // The index of the associated MCInst operand.
221 mutable int MCOpIdx = -1;
222
223public:
224 bool isToken() const override { return Kind == Token; }
225
226 bool isSymbolRefExpr() const {
227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228 }
229
230 bool isImm() const override {
231 return Kind == Immediate;
232 }
233
234 bool isInlinableImm(MVT type) const;
235 bool isLiteralImm(MVT type) const;
236
237 bool isRegKind() const {
238 return Kind == Register;
239 }
240
241 bool isReg() const override {
242 return isRegKind() && !hasModifiers();
243 }
244
245 bool isRegOrInline(unsigned RCID, MVT type) const {
246 return isRegClass(RCID) || isInlinableImm(type);
247 }
248
249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250 return isRegOrInline(RCID, type) || isLiteralImm(type);
251 }
252
253 bool isRegOrImmWithInt16InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255 }
256
257 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
259 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
260 }
261
262 bool isRegOrImmWithInt32InputMods() const {
263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
264 }
265
266 bool isRegOrInlineImmWithInt16InputMods() const {
267 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
268 }
269
270 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
271 return isRegOrInline(
272 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
273 }
274
275 bool isRegOrInlineImmWithInt32InputMods() const {
276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277 }
278
279 bool isRegOrImmWithInt64InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281 }
282
283 bool isRegOrImmWithFP16InputMods() const {
284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285 }
286
287 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
289 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
290 }
291
292 bool isRegOrImmWithFP32InputMods() const {
293 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
294 }
295
296 bool isRegOrImmWithFP64InputMods() const {
297 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
298 }
299
300 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
301 return isRegOrInline(
302 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
303 }
304
305 bool isRegOrInlineImmWithFP32InputMods() const {
306 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
307 }
308
309 bool isRegOrInlineImmWithFP64InputMods() const {
310 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
311 }
312
313 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
314
315 bool isVRegWithFP32InputMods() const {
316 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
317 }
318
319 bool isVRegWithFP64InputMods() const {
320 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
321 }
322
323 bool isPackedFP16InputMods() const {
324 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
325 }
326
327 bool isPackedVGPRFP32InputMods() const {
328 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
329 }
330
331 bool isVReg() const {
332 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
333 isRegClass(AMDGPU::VReg_64RegClassID) ||
334 isRegClass(AMDGPU::VReg_96RegClassID) ||
335 isRegClass(AMDGPU::VReg_128RegClassID) ||
336 isRegClass(AMDGPU::VReg_160RegClassID) ||
337 isRegClass(AMDGPU::VReg_192RegClassID) ||
338 isRegClass(AMDGPU::VReg_256RegClassID) ||
339 isRegClass(AMDGPU::VReg_512RegClassID) ||
340 isRegClass(AMDGPU::VReg_1024RegClassID);
341 }
342
343 bool isVReg32() const {
344 return isRegClass(AMDGPU::VGPR_32RegClassID);
345 }
346
347 bool isVReg32OrOff() const {
348 return isOff() || isVReg32();
349 }
350
351 bool isNull() const {
352 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
353 }
354
355 bool isAV_LdSt_32_Align2_RegOp() const {
356 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
357 isRegClass(AMDGPU::AGPR_32RegClassID);
358 }
359
360 bool isVRegWithInputMods() const;
361 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
362 template <bool IsFake16> bool isT16VRegWithInputMods() const;
363
364 bool isSDWAOperand(MVT type) const;
365 bool isSDWAFP16Operand() const;
366 bool isSDWAFP32Operand() const;
367 bool isSDWAInt16Operand() const;
368 bool isSDWAInt32Operand() const;
369
370 bool isImmTy(ImmTy ImmT) const {
371 return isImm() && Imm.Type == ImmT;
372 }
373
374 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
375
376 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
377
378 bool isImmModifier() const {
379 return isImm() && Imm.Type != ImmTyNone;
380 }
381
382 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
383 bool isDim() const { return isImmTy(ImmTyDim); }
384 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
385 bool isOff() const { return isImmTy(ImmTyOff); }
386 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
387 bool isOffen() const { return isImmTy(ImmTyOffen); }
388 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
389 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
390 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
391 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
392 bool isGDS() const { return isImmTy(ImmTyGDS); }
393 bool isLDS() const { return isImmTy(ImmTyLDS); }
394 bool isCPol() const { return isImmTy(ImmTyCPol); }
395 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
396 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
397 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
398 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
399 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
400 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
401 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
402 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
403 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
404 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
405 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
406 bool isTFE() const { return isImmTy(ImmTyTFE); }
407 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
408 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
409 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
410 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
411 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
412 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
413 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
414 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
415 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
416 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
417 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
418 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
419 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
420 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
421 bool isDone() const { return isImmTy(ImmTyDone); }
422 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
423
424 bool isRegOrImm() const {
425 return isReg() || isImm();
426 }
427
428 bool isRegClass(unsigned RCID) const;
429
430 bool isInlineValue() const;
431
432 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
433 return isRegOrInline(RCID, type) && !hasModifiers();
434 }
435
436 bool isSCSrcB16() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
438 }
439
440 bool isSCSrcV2B16() const {
441 return isSCSrcB16();
442 }
443
444 bool isSCSrc_b32() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
446 }
447
448 bool isSCSrc_b64() const {
449 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
450 }
451
452 bool isBoolReg() const;
453
454 bool isSCSrcF16() const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
456 }
457
458 bool isSCSrcV2F16() const {
459 return isSCSrcF16();
460 }
461
462 bool isSCSrcF32() const {
463 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
464 }
465
466 bool isSCSrcF64() const {
467 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
468 }
469
470 bool isSSrc_b32() const {
471 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
472 }
473
474 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
475
476 bool isSSrcV2B16() const {
477 llvm_unreachable("cannot happen");
478 return isSSrc_b16();
479 }
480
481 bool isSSrc_b64() const {
482 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
483 // See isVSrc64().
484 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
485 (((const MCTargetAsmParser *)AsmParser)
486 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
487 isExpr());
488 }
489
490 bool isSSrc_f32() const {
491 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
492 }
493
494 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
495
496 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
497
498 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
499
500 bool isSSrcV2F16() const {
501 llvm_unreachable("cannot happen");
502 return isSSrc_f16();
503 }
504
505 bool isSSrcV2FP32() const {
506 llvm_unreachable("cannot happen");
507 return isSSrc_f32();
508 }
509
510 bool isSCSrcV2FP32() const {
511 llvm_unreachable("cannot happen");
512 return isSCSrcF32();
513 }
514
515 bool isSSrcV2INT32() const {
516 llvm_unreachable("cannot happen");
517 return isSSrc_b32();
518 }
519
520 bool isSCSrcV2INT32() const {
521 llvm_unreachable("cannot happen");
522 return isSCSrc_b32();
523 }
524
525 bool isSSrcOrLds_b32() const {
526 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
527 isLiteralImm(MVT::i32) || isExpr();
528 }
529
530 bool isVCSrc_b32() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
532 }
533
534 bool isVCSrc_b32_Lo256() const {
535 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
536 }
537
538 bool isVCSrc_b64_Lo256() const {
539 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
540 }
541
542 bool isVCSrc_b64() const {
543 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
544 }
545
546 bool isVCSrcT_b16() const {
547 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
548 }
549
550 bool isVCSrcTB16_Lo128() const {
551 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
552 }
553
554 bool isVCSrcFake16B16_Lo128() const {
555 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
556 }
557
558 bool isVCSrc_b16() const {
559 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
560 }
561
562 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
563
564 bool isVCSrc_f32() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
566 }
567
568 bool isVCSrc_f64() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
570 }
571
572 bool isVCSrcTBF16() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
574 }
575
576 bool isVCSrcT_f16() const {
577 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
578 }
579
580 bool isVCSrcT_bf16() const {
581 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
582 }
583
584 bool isVCSrcTBF16_Lo128() const {
585 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
586 }
587
588 bool isVCSrcTF16_Lo128() const {
589 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
590 }
591
592 bool isVCSrcFake16BF16_Lo128() const {
593 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
594 }
595
596 bool isVCSrcFake16F16_Lo128() const {
597 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
598 }
599
600 bool isVCSrc_bf16() const {
601 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
602 }
603
604 bool isVCSrc_f16() const {
605 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
606 }
607
608 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
609
610 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
611
612 bool isVSrc_b32() const {
613 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
614 }
615
616 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
617
618 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
619
620 bool isVSrcT_b16_Lo128() const {
621 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
622 }
623
624 bool isVSrcFake16_b16_Lo128() const {
625 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
626 }
627
628 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
629
630 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
631
632 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
633
634 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
635
636 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
637
638 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
639
640 bool isVSrc_f32() const {
641 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
642 }
643
644 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
645
646 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
647
648 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
649
650 bool isVSrcT_bf16_Lo128() const {
651 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
652 }
653
654 bool isVSrcT_f16_Lo128() const {
655 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
656 }
657
658 bool isVSrcFake16_bf16_Lo128() const {
659 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
660 }
661
662 bool isVSrcFake16_f16_Lo128() const {
663 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
664 }
665
666 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
667
668 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
669
670 bool isVSrc_v2bf16() const {
671 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
672 }
673
674 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
675
676 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
677
678 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
679
680 bool isVISrcB32() const {
681 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
682 }
683
684 bool isVISrcB16() const {
685 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
686 }
687
688 bool isVISrcV2B16() const {
689 return isVISrcB16();
690 }
691
692 bool isVISrcF32() const {
693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
694 }
695
696 bool isVISrcF16() const {
697 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
698 }
699
700 bool isVISrcV2F16() const {
701 return isVISrcF16() || isVISrcB32();
702 }
703
704 bool isVISrc_64_bf16() const {
705 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
706 }
707
708 bool isVISrc_64_f16() const {
709 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
710 }
711
712 bool isVISrc_64_b32() const {
713 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
714 }
715
716 bool isVISrc_64B64() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
718 }
719
720 bool isVISrc_64_f64() const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
722 }
723
724 bool isVISrc_64V2FP32() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
726 }
727
728 bool isVISrc_64V2INT32() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
730 }
731
732 bool isVISrc_256_b32() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
734 }
735
736 bool isVISrc_256_f32() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
738 }
739
740 bool isVISrc_256B64() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
742 }
743
744 bool isVISrc_256_f64() const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
746 }
747
748 bool isVISrc_512_f64() const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
750 }
751
752 bool isVISrc_128B16() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
754 }
755
756 bool isVISrc_128V2B16() const {
757 return isVISrc_128B16();
758 }
759
760 bool isVISrc_128_b32() const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
762 }
763
764 bool isVISrc_128_f32() const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
766 }
767
768 bool isVISrc_256V2FP32() const {
769 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
770 }
771
772 bool isVISrc_256V2INT32() const {
773 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
774 }
775
776 bool isVISrc_512_b32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
778 }
779
780 bool isVISrc_512B16() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
782 }
783
784 bool isVISrc_512V2B16() const {
785 return isVISrc_512B16();
786 }
787
788 bool isVISrc_512_f32() const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
790 }
791
792 bool isVISrc_512F16() const {
793 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
794 }
795
796 bool isVISrc_512V2F16() const {
797 return isVISrc_512F16() || isVISrc_512_b32();
798 }
799
800 bool isVISrc_1024_b32() const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
802 }
803
804 bool isVISrc_1024B16() const {
805 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
806 }
807
808 bool isVISrc_1024V2B16() const {
809 return isVISrc_1024B16();
810 }
811
812 bool isVISrc_1024_f32() const {
813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
814 }
815
816 bool isVISrc_1024F16() const {
817 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
818 }
819
820 bool isVISrc_1024V2F16() const {
821 return isVISrc_1024F16() || isVISrc_1024_b32();
822 }
823
824 bool isAISrcB32() const {
825 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
826 }
827
828 bool isAISrcB16() const {
829 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
830 }
831
832 bool isAISrcV2B16() const {
833 return isAISrcB16();
834 }
835
836 bool isAISrcF32() const {
837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
838 }
839
840 bool isAISrcF16() const {
841 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
842 }
843
844 bool isAISrcV2F16() const {
845 return isAISrcF16() || isAISrcB32();
846 }
847
848 bool isAISrc_64B64() const {
849 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
850 }
851
852 bool isAISrc_64_f64() const {
853 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
854 }
855
856 bool isAISrc_128_b32() const {
857 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
858 }
859
860 bool isAISrc_128B16() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
862 }
863
864 bool isAISrc_128V2B16() const {
865 return isAISrc_128B16();
866 }
867
868 bool isAISrc_128_f32() const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
870 }
871
872 bool isAISrc_128F16() const {
873 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
874 }
875
876 bool isAISrc_128V2F16() const {
877 return isAISrc_128F16() || isAISrc_128_b32();
878 }
879
880 bool isVISrc_128_bf16() const {
881 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
882 }
883
884 bool isVISrc_128_f16() const {
885 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
886 }
887
888 bool isVISrc_128V2F16() const {
889 return isVISrc_128_f16() || isVISrc_128_b32();
890 }
891
892 bool isAISrc_256B64() const {
893 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
894 }
895
896 bool isAISrc_256_f64() const {
897 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
898 }
899
900 bool isAISrc_512_b32() const {
901 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
902 }
903
904 bool isAISrc_512B16() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
906 }
907
908 bool isAISrc_512V2B16() const {
909 return isAISrc_512B16();
910 }
911
912 bool isAISrc_512_f32() const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
914 }
915
916 bool isAISrc_512F16() const {
917 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
918 }
919
920 bool isAISrc_512V2F16() const {
921 return isAISrc_512F16() || isAISrc_512_b32();
922 }
923
924 bool isAISrc_1024_b32() const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
926 }
927
928 bool isAISrc_1024B16() const {
929 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
930 }
931
932 bool isAISrc_1024V2B16() const {
933 return isAISrc_1024B16();
934 }
935
936 bool isAISrc_1024_f32() const {
937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
938 }
939
940 bool isAISrc_1024F16() const {
941 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
942 }
943
944 bool isAISrc_1024V2F16() const {
945 return isAISrc_1024F16() || isAISrc_1024_b32();
946 }
947
948 bool isKImmFP32() const {
949 return isLiteralImm(MVT::f32);
950 }
951
952 bool isKImmFP16() const {
953 return isLiteralImm(MVT::f16);
954 }
955
956 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
957
958 bool isMem() const override {
959 return false;
960 }
961
962 bool isExpr() const {
963 return Kind == Expression;
964 }
965
966 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
967
968 bool isSWaitCnt() const;
969 bool isDepCtr() const;
970 bool isSDelayALU() const;
971 bool isHwreg() const;
972 bool isSendMsg() const;
973 bool isWaitEvent() const;
974 bool isSplitBarrier() const;
975 bool isSwizzle() const;
976 bool isSMRDOffset8() const;
977 bool isSMEMOffset() const;
978 bool isSMRDLiteralOffset() const;
979 bool isDPP8() const;
980 bool isDPPCtrl() const;
981 bool isBLGP() const;
982 bool isGPRIdxMode() const;
983 bool isS16Imm() const;
984 bool isU16Imm() const;
985 bool isEndpgm() const;
986
987 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
988 return [this, P]() { return P(*this); };
989 }
990
991 StringRef getToken() const {
992 assert(isToken());
993 return StringRef(Tok.Data, Tok.Length);
994 }
995
996 int64_t getImm() const {
997 assert(isImm());
998 return Imm.Val;
999 }
1000
1001 void setImm(int64_t Val) {
1002 assert(isImm());
1003 Imm.Val = Val;
1004 }
1005
1006 ImmTy getImmTy() const {
1007 assert(isImm());
1008 return Imm.Type;
1009 }
1010
1011 MCRegister getReg() const override {
1012 assert(isRegKind());
1013 return Reg.RegNo;
1014 }
1015
1016 SMLoc getStartLoc() const override {
1017 return StartLoc;
1018 }
1019
1020 SMLoc getEndLoc() const override {
1021 return EndLoc;
1022 }
1023
1024 SMRange getLocRange() const {
1025 return SMRange(StartLoc, EndLoc);
1026 }
1027
1028 int getMCOpIdx() const { return MCOpIdx; }
1029
1030 Modifiers getModifiers() const {
1031 assert(isRegKind() || isImmTy(ImmTyNone));
1032 return isRegKind() ? Reg.Mods : Imm.Mods;
1033 }
1034
1035 void setModifiers(Modifiers Mods) {
1036 assert(isRegKind() || isImmTy(ImmTyNone));
1037 if (isRegKind())
1038 Reg.Mods = Mods;
1039 else
1040 Imm.Mods = Mods;
1041 }
1042
1043 bool hasModifiers() const {
1044 return getModifiers().hasModifiers();
1045 }
1046
1047 bool hasFPModifiers() const {
1048 return getModifiers().hasFPModifiers();
1049 }
1050
1051 bool hasIntModifiers() const {
1052 return getModifiers().hasIntModifiers();
1053 }
1054
1055 bool isForcedLit64() const {
1056 return isImmLiteral() && getModifiers().isForcedLit64();
1057 }
1058
1059 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1060
1061 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1062
1063 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1064
1065 void addRegOperands(MCInst &Inst, unsigned N) const;
1066
1067 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1068 if (isRegKind())
1069 addRegOperands(Inst, N);
1070 else
1071 addImmOperands(Inst, N);
1072 }
1073
1074 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1075 Modifiers Mods = getModifiers();
1076 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1077 if (isRegKind()) {
1078 addRegOperands(Inst, N);
1079 } else {
1080 addImmOperands(Inst, N, false);
1081 }
1082 }
1083
1084 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1085 assert(!hasIntModifiers());
1086 addRegOrImmWithInputModsOperands(Inst, N);
1087 }
1088
1089 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1090 assert(!hasFPModifiers());
1091 addRegOrImmWithInputModsOperands(Inst, N);
1092 }
1093
1094 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1095 Modifiers Mods = getModifiers();
1096 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1097 assert(isRegKind());
1098 addRegOperands(Inst, N);
1099 }
1100
1101 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1102 assert(!hasIntModifiers());
1103 addRegWithInputModsOperands(Inst, N);
1104 }
1105
1106 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1107 assert(!hasFPModifiers());
1108 addRegWithInputModsOperands(Inst, N);
1109 }
1110
1111 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1112 // clang-format off
1113 switch (Type) {
1114 case ImmTyNone: OS << "None"; break;
1115 case ImmTyGDS: OS << "GDS"; break;
1116 case ImmTyLDS: OS << "LDS"; break;
1117 case ImmTyOffen: OS << "Offen"; break;
1118 case ImmTyIdxen: OS << "Idxen"; break;
1119 case ImmTyAddr64: OS << "Addr64"; break;
1120 case ImmTyOffset: OS << "Offset"; break;
1121 case ImmTyInstOffset: OS << "InstOffset"; break;
1122 case ImmTyOffset0: OS << "Offset0"; break;
1123 case ImmTyOffset1: OS << "Offset1"; break;
1124 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1125 case ImmTyCPol: OS << "CPol"; break;
1126 case ImmTyIndexKey8bit: OS << "index_key"; break;
1127 case ImmTyIndexKey16bit: OS << "index_key"; break;
1128 case ImmTyIndexKey32bit: OS << "index_key"; break;
1129 case ImmTyTFE: OS << "TFE"; break;
1130 case ImmTyIsAsync: OS << "IsAsync"; break;
1131 case ImmTyD16: OS << "D16"; break;
1132 case ImmTyFORMAT: OS << "FORMAT"; break;
1133 case ImmTyClamp: OS << "Clamp"; break;
1134 case ImmTyOModSI: OS << "OModSI"; break;
1135 case ImmTyDPP8: OS << "DPP8"; break;
1136 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1137 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1138 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1139 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1140 case ImmTyDppFI: OS << "DppFI"; break;
1141 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1142 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1143 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1144 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1145 case ImmTyDMask: OS << "DMask"; break;
1146 case ImmTyDim: OS << "Dim"; break;
1147 case ImmTyUNorm: OS << "UNorm"; break;
1148 case ImmTyDA: OS << "DA"; break;
1149 case ImmTyR128A16: OS << "R128A16"; break;
1150 case ImmTyA16: OS << "A16"; break;
1151 case ImmTyLWE: OS << "LWE"; break;
1152 case ImmTyOff: OS << "Off"; break;
1153 case ImmTyExpTgt: OS << "ExpTgt"; break;
1154 case ImmTyExpCompr: OS << "ExpCompr"; break;
1155 case ImmTyExpVM: OS << "ExpVM"; break;
1156 case ImmTyDone: OS << "Done"; break;
1157 case ImmTyRowEn: OS << "RowEn"; break;
1158 case ImmTyHwreg: OS << "Hwreg"; break;
1159 case ImmTySendMsg: OS << "SendMsg"; break;
1160 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1161 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1162 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1163 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1164 case ImmTyOpSel: OS << "OpSel"; break;
1165 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1166 case ImmTyNegLo: OS << "NegLo"; break;
1167 case ImmTyNegHi: OS << "NegHi"; break;
1168 case ImmTySwizzle: OS << "Swizzle"; break;
1169 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1170 case ImmTyHigh: OS << "High"; break;
1171 case ImmTyBLGP: OS << "BLGP"; break;
1172 case ImmTyCBSZ: OS << "CBSZ"; break;
1173 case ImmTyABID: OS << "ABID"; break;
1174 case ImmTyEndpgm: OS << "Endpgm"; break;
1175 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1176 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1177 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1178 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1179 case ImmTyBitOp3: OS << "BitOp3"; break;
1180 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1181 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1182 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1183 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1184 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1185 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1186 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1187 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1188 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1189 case ImmTyByteSel: OS << "ByteSel" ; break;
1190 }
1191 // clang-format on
1192 }
1193
1194 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1195 switch (Kind) {
1196 case Register:
1197 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1198 << " mods: " << Reg.Mods << '>';
1199 break;
1200 case Immediate:
1201 OS << '<' << getImm();
1202 if (getImmTy() != ImmTyNone) {
1203 OS << " type: "; printImmTy(OS, getImmTy());
1204 }
1205 OS << " mods: " << Imm.Mods << '>';
1206 break;
1207 case Token:
1208 OS << '\'' << getToken() << '\'';
1209 break;
1210 case Expression:
1211 OS << "<expr ";
1212 MAI.printExpr(OS, *Expr);
1213 OS << '>';
1214 break;
1215 }
1216 }
1217
1218 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1219 int64_t Val, SMLoc Loc,
1220 ImmTy Type = ImmTyNone,
1221 bool IsFPImm = false) {
1222 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1223 Op->Imm.Val = Val;
1224 Op->Imm.IsFPImm = IsFPImm;
1225 Op->Imm.Type = Type;
1226 Op->Imm.Mods = Modifiers();
1227 Op->StartLoc = Loc;
1228 Op->EndLoc = Loc;
1229 return Op;
1230 }
1231
1232 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1233 StringRef Str, SMLoc Loc,
1234 bool HasExplicitEncodingSize = true) {
1235 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1236 Res->Tok.Data = Str.data();
1237 Res->Tok.Length = Str.size();
1238 Res->StartLoc = Loc;
1239 Res->EndLoc = Loc;
1240 return Res;
1241 }
1242
1243 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1244 MCRegister Reg, SMLoc S, SMLoc E) {
1245 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1246 Op->Reg.RegNo = Reg;
1247 Op->Reg.Mods = Modifiers();
1248 Op->StartLoc = S;
1249 Op->EndLoc = E;
1250 return Op;
1251 }
1252
1253 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1254 const class MCExpr *Expr, SMLoc S) {
1255 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1256 Op->Expr = Expr;
1257 Op->StartLoc = S;
1258 Op->EndLoc = S;
1259 return Op;
1260 }
1261};
1262
1263raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1264 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1265 return OS;
1266}
1267
1268//===----------------------------------------------------------------------===//
1269// AsmParser
1270//===----------------------------------------------------------------------===//
1271
1272// TODO: define GET_SUBTARGET_FEATURE_NAME
1273#define GET_REGISTER_MATCHER
1274#include "AMDGPUGenAsmMatcher.inc"
1275#undef GET_REGISTER_MATCHER
1276#undef GET_SUBTARGET_FEATURE_NAME
1277
1278// Holds info related to the current kernel, e.g. count of SGPRs used.
1279// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1280// .amdgpu_hsa_kernel or at EOF.
1281class KernelScopeInfo {
1282 int SgprIndexUnusedMin = -1;
1283 int VgprIndexUnusedMin = -1;
1284 int AgprIndexUnusedMin = -1;
1285 MCContext *Ctx = nullptr;
1286 MCSubtargetInfo const *MSTI = nullptr;
1287
1288 void usesSgprAt(int i) {
1289 if (i >= SgprIndexUnusedMin) {
1290 SgprIndexUnusedMin = ++i;
1291 if (Ctx) {
1292 MCSymbol* const Sym =
1293 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1294 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1295 }
1296 }
1297 }
1298
1299 void usesVgprAt(int i) {
1300 if (i >= VgprIndexUnusedMin) {
1301 VgprIndexUnusedMin = ++i;
1302 if (Ctx) {
1303 MCSymbol* const Sym =
1304 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1305 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1306 VgprIndexUnusedMin);
1307 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1308 }
1309 }
1310 }
1311
1312 void usesAgprAt(int i) {
1313 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1314 if (!hasMAIInsts(*MSTI))
1315 return;
1316
1317 if (i >= AgprIndexUnusedMin) {
1318 AgprIndexUnusedMin = ++i;
1319 if (Ctx) {
1320 MCSymbol* const Sym =
1321 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1322 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1323
1324 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1325 MCSymbol* const vSym =
1326 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1327 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1328 VgprIndexUnusedMin);
1329 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1330 }
1331 }
1332 }
1333
1334public:
1335 KernelScopeInfo() = default;
1336
1337 void initialize(MCContext &Context) {
1338 Ctx = &Context;
1339 MSTI = Ctx->getSubtargetInfo();
1340
1341 usesSgprAt(SgprIndexUnusedMin = -1);
1342 usesVgprAt(VgprIndexUnusedMin = -1);
1343 if (hasMAIInsts(*MSTI)) {
1344 usesAgprAt(AgprIndexUnusedMin = -1);
1345 }
1346 }
1347
1348 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1349 unsigned RegWidth) {
1350 switch (RegKind) {
1351 case IS_SGPR:
1352 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1353 break;
1354 case IS_AGPR:
1355 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1356 break;
1357 case IS_VGPR:
1358 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1359 break;
1360 default:
1361 break;
1362 }
1363 }
1364};
1365
1366class AMDGPUAsmParser : public MCTargetAsmParser {
1367 MCAsmParser &Parser;
1368
1369 unsigned ForcedEncodingSize = 0;
1370 bool ForcedDPP = false;
1371 bool ForcedSDWA = false;
1372 KernelScopeInfo KernelScope;
1373 const unsigned HwMode;
1374
1375 /// @name Auto-generated Match Functions
1376 /// {
1377
1378#define GET_ASSEMBLER_HEADER
1379#include "AMDGPUGenAsmMatcher.inc"
1380
1381 /// }
1382
1383 /// Get size of register operand
1384 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1385 assert(OpNo < Desc.NumOperands);
1386 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1387 return getRegBitWidth(RCID) / 8;
1388 }
1389
1390private:
1391 void createConstantSymbol(StringRef Id, int64_t Val);
1392
1393 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1394 bool OutOfRangeError(SMRange Range);
1395 /// Calculate VGPR/SGPR blocks required for given target, reserved
1396 /// registers, and user-specified NextFreeXGPR values.
1397 ///
1398 /// \param Features [in] Target features, used for bug corrections.
1399 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1400 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1401 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1402 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1403 /// descriptor field, if valid.
1404 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1405 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1406 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1407 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1408 /// \param VGPRBlocks [out] Result VGPR block count.
1409 /// \param SGPRBlocks [out] Result SGPR block count.
1410 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1411 const MCExpr *FlatScrUsed, bool XNACKUsed,
1412 std::optional<bool> EnableWavefrontSize32,
1413 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1414 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1415 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1416 bool ParseDirectiveAMDGCNTarget();
1417 bool ParseDirectiveAMDHSACodeObjectVersion();
1418 bool ParseDirectiveAMDHSAKernel();
1419 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1420 bool ParseDirectiveAMDKernelCodeT();
1421 // TODO: Possibly make subtargetHasRegister const.
1422 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1423 bool ParseDirectiveAMDGPUHsaKernel();
1424
1425 bool ParseDirectiveISAVersion();
1426 bool ParseDirectiveHSAMetadata();
1427 bool ParseDirectivePALMetadataBegin();
1428 bool ParseDirectivePALMetadata();
1429 bool ParseDirectiveAMDGPULDS();
1430
1431 /// Common code to parse out a block of text (typically YAML) between start and
1432 /// end directives.
1433 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1434 const char *AssemblerDirectiveEnd,
1435 std::string &CollectString);
1436
1437 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1438 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1439 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1440 unsigned &RegNum, unsigned &RegWidth,
1441 bool RestoreOnFailure = false);
1442 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1443 unsigned &RegNum, unsigned &RegWidth,
1444 SmallVectorImpl<AsmToken> &Tokens);
1445 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1446 unsigned &RegWidth,
1447 SmallVectorImpl<AsmToken> &Tokens);
1448 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1449 unsigned &RegWidth,
1450 SmallVectorImpl<AsmToken> &Tokens);
1451 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1452 unsigned &RegWidth,
1453 SmallVectorImpl<AsmToken> &Tokens);
1454 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1455 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1456 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1457
1458 bool isRegister();
1459 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1460 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1461 void initializeGprCountSymbol(RegisterKind RegKind);
1462 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1463 unsigned RegWidth);
1464 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1465 bool IsAtomic);
1466
1467public:
1468 enum OperandMode {
1469 OperandMode_Default,
1470 OperandMode_NSA,
1471 };
1472
1473 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1474
1475 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1476 const MCInstrInfo &MII)
1477 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1478 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1480
1481 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1482
1483 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1484 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1485 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1486 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1487 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1488 } else {
1489 createConstantSymbol(".option.machine_version_major", ISA.Major);
1490 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1491 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1492 }
1493 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1494 initializeGprCountSymbol(IS_VGPR);
1495 initializeGprCountSymbol(IS_SGPR);
1496 } else
1497 KernelScope.initialize(getContext());
1498
1499 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1500 createConstantSymbol(Symbol, Code);
1501
1502 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1503 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1504 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1505 }
1506
1507 bool hasMIMG_R128() const {
1508 return AMDGPU::hasMIMG_R128(getSTI());
1509 }
1510
1511 bool hasPackedD16() const {
1512 return AMDGPU::hasPackedD16(getSTI());
1513 }
1514
1515 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1516
1517 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1518
1519 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1520
1521 bool isSI() const {
1522 return AMDGPU::isSI(getSTI());
1523 }
1524
1525 bool isCI() const {
1526 return AMDGPU::isCI(getSTI());
1527 }
1528
1529 bool isVI() const {
1530 return AMDGPU::isVI(getSTI());
1531 }
1532
1533 bool isGFX9() const {
1534 return AMDGPU::isGFX9(getSTI());
1535 }
1536
1537 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1538 bool isGFX90A() const {
1539 return AMDGPU::isGFX90A(getSTI());
1540 }
1541
1542 bool isGFX940() const {
1543 return AMDGPU::isGFX940(getSTI());
1544 }
1545
1546 bool isGFX9Plus() const {
1547 return AMDGPU::isGFX9Plus(getSTI());
1548 }
1549
1550 bool isGFX10() const {
1551 return AMDGPU::isGFX10(getSTI());
1552 }
1553
1554 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1555
1556 bool isGFX11() const {
1557 return AMDGPU::isGFX11(getSTI());
1558 }
1559
1560 bool isGFX11Plus() const {
1561 return AMDGPU::isGFX11Plus(getSTI());
1562 }
1563
1564 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1565
1566 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1567
1568 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1569
1570 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1571
1572 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1573
1574 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1575
1576 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1577
1578 bool isGFX10_BEncoding() const {
1579 return AMDGPU::isGFX10_BEncoding(getSTI());
1580 }
1581
1582 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1583
1584 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1585
1586 bool hasInv2PiInlineImm() const {
1587 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1588 }
1589
1590 bool has64BitLiterals() const {
1591 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1592 }
1593
1594 bool hasFlatOffsets() const {
1595 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1596 }
1597
1598 bool hasTrue16Insts() const {
1599 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1600 }
1601
1602 bool hasArchitectedFlatScratch() const {
1603 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1604 }
1605
1606 bool hasSGPR102_SGPR103() const {
1607 return !isVI() && !isGFX9();
1608 }
1609
1610 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1611
1612 bool hasIntClamp() const {
1613 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1614 }
1615
1616 bool hasPartialNSAEncoding() const {
1617 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1618 }
1619
1620 bool hasGloballyAddressableScratch() const {
1621 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1622 }
1623
1624 unsigned getNSAMaxSize(bool HasSampler = false) const {
1625 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1626 }
1627
1628 unsigned getMaxNumUserSGPRs() const {
1629 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1630 }
1631
1632 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1633
1634 AMDGPUTargetStreamer &getTargetStreamer() {
1635 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1636 return static_cast<AMDGPUTargetStreamer &>(TS);
1637 }
1638
1639 MCContext &getContext() const {
1640 // We need this const_cast because for some reason getContext() is not const
1641 // in MCAsmParser.
1642 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1643 }
1644
1645 const MCRegisterInfo *getMRI() const {
1646 return getContext().getRegisterInfo();
1647 }
1648
1649 const MCInstrInfo *getMII() const {
1650 return &MII;
1651 }
1652
1653 // FIXME: This should not be used. Instead, should use queries derived from
1654 // getAvailableFeatures().
1655 const FeatureBitset &getFeatureBits() const {
1656 return getSTI().getFeatureBits();
1657 }
1658
1659 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1660 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1661 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1662
1663 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1664 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1665 bool isForcedDPP() const { return ForcedDPP; }
1666 bool isForcedSDWA() const { return ForcedSDWA; }
1667 ArrayRef<unsigned> getMatchedVariants() const;
1668 StringRef getMatchedVariantName() const;
1669
1670 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1671 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1672 bool RestoreOnFailure);
1673 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1674 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1675 SMLoc &EndLoc) override;
1676 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1677 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1678 unsigned Kind) override;
1679 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1680 OperandVector &Operands, MCStreamer &Out,
1681 uint64_t &ErrorInfo,
1682 bool MatchingInlineAsm) override;
1683 bool ParseDirective(AsmToken DirectiveID) override;
1684 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1685 OperandMode Mode = OperandMode_Default);
1686 StringRef parseMnemonicSuffix(StringRef Name);
1687 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1688 SMLoc NameLoc, OperandVector &Operands) override;
1689 //bool ProcessInstruction(MCInst &Inst);
1690
1691 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1692
1693 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1694
1695 ParseStatus
1696 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1697 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1698 std::function<bool(int64_t &)> ConvertResult = nullptr);
1699
1700 ParseStatus parseOperandArrayWithPrefix(
1701 const char *Prefix, OperandVector &Operands,
1702 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1703 bool (*ConvertResult)(int64_t &) = nullptr);
1704
1705 ParseStatus
1706 parseNamedBit(StringRef Name, OperandVector &Operands,
1707 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1708 bool IgnoreNegative = false);
1709 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1710 ParseStatus parseCPol(OperandVector &Operands);
1711 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1712 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1713 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1714 SMLoc &StringLoc);
1715 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1716 StringRef Name,
1717 ArrayRef<const char *> Ids,
1718 int64_t &IntVal);
1719 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1720 StringRef Name,
1721 ArrayRef<const char *> Ids,
1722 AMDGPUOperand::ImmTy Type);
1723
1724 bool isModifier();
1725 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1726 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1727 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1728 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1729 bool parseSP3NegModifier();
1730 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1731 LitModifier Lit = LitModifier::None);
1732 ParseStatus parseReg(OperandVector &Operands);
1733 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1734 LitModifier Lit = LitModifier::None);
1735 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1736 bool AllowImm = true);
1737 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1738 bool AllowImm = true);
1739 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1740 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1741 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1742 ParseStatus tryParseIndexKey(OperandVector &Operands,
1743 AMDGPUOperand::ImmTy ImmTy);
1744 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1745 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1746 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1747 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1748 AMDGPUOperand::ImmTy Type);
1749 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1750 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1751 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1752 AMDGPUOperand::ImmTy Type);
1753 ParseStatus parseMatrixAScale(OperandVector &Operands);
1754 ParseStatus parseMatrixBScale(OperandVector &Operands);
1755 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1756 AMDGPUOperand::ImmTy Type);
1757 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1758 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1759
1760 ParseStatus parseDfmtNfmt(int64_t &Format);
1761 ParseStatus parseUfmt(int64_t &Format);
1762 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1763 int64_t &Format);
1764 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1765 int64_t &Format);
1766 ParseStatus parseFORMAT(OperandVector &Operands);
1767 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1768 ParseStatus parseNumericFormat(int64_t &Format);
1769 ParseStatus parseFlatOffset(OperandVector &Operands);
1770 ParseStatus parseR128A16(OperandVector &Operands);
1771 ParseStatus parseBLGP(OperandVector &Operands);
1772 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1773 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1774
1775 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1776
1777 bool parseCnt(int64_t &IntVal);
1778 ParseStatus parseSWaitCnt(OperandVector &Operands);
1779
1780 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1781 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1782 ParseStatus parseDepCtr(OperandVector &Operands);
1783
1784 bool parseDelay(int64_t &Delay);
1785 ParseStatus parseSDelayALU(OperandVector &Operands);
1786
1787 ParseStatus parseHwreg(OperandVector &Operands);
1788
1789private:
1790 struct OperandInfoTy {
1791 SMLoc Loc;
1792 int64_t Val;
1793 bool IsSymbolic = false;
1794 bool IsDefined = false;
1795
1796 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1797 };
1798
1799 struct StructuredOpField : OperandInfoTy {
1800 StringLiteral Id;
1801 StringLiteral Desc;
1802 unsigned Width;
1803 bool IsDefined = false;
1804
1805 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1806 unsigned Width, int64_t Default)
1807 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1808 virtual ~StructuredOpField() = default;
1809
1810 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1811 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1812 return false;
1813 }
1814
1815 virtual bool validate(AMDGPUAsmParser &Parser) const {
1816 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1817 return Error(Parser, "not supported on this GPU");
1818 if (!isUIntN(Width, Val))
1819 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1820 return true;
1821 }
1822 };
1823
1824 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1825 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1826
1827 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1828 bool validateSendMsg(const OperandInfoTy &Msg,
1829 const OperandInfoTy &Op,
1830 const OperandInfoTy &Stream);
1831
1832 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1833 OperandInfoTy &Width);
1834
1835 const AMDGPUOperand &findMCOperand(const OperandVector &Operands,
1836 int MCOpIdx) const;
1837
1838 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1839
1840 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1841 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1842 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1843
1844 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1845 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1846 const OperandVector &Operands) const;
1847 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1848 const OperandVector &Operands) const;
1849 SMLoc getInstLoc(const OperandVector &Operands) const;
1850
1851 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1852 const OperandVector &Operands);
1853 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1854 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1857 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1858 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1859 bool AsVOPD3);
1860 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1861 bool tryVOPD(const MCInst &Inst);
1862 bool tryVOPD3(const MCInst &Inst);
1863 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1864
1865 bool validateIntClampSupported(const MCInst &Inst);
1866 bool validateMIMGAtomicDMask(const MCInst &Inst);
1867 bool validateMIMGGatherDMask(const MCInst &Inst);
1868 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1869 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1870 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1871 bool validateMIMGD16(const MCInst &Inst);
1872 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1873 bool validateTensorR128(const MCInst &Inst);
1874 bool validateMIMGMSAA(const MCInst &Inst);
1875 bool validateOpSel(const MCInst &Inst);
1876 bool validateTrue16OpSel(const MCInst &Inst);
1877 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1878 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1879 bool validateVccOperand(MCRegister Reg) const;
1880 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1881 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1882 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1884 bool validateAGPRLdSt(const MCInst &Inst) const;
1885 bool validateVGPRAlign(const MCInst &Inst) const;
1886 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1887 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1888 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1889 bool validateDivScale(const MCInst &Inst);
1890 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1891 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1892 SMLoc IDLoc);
1893 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1894 const unsigned CPol);
1895 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1896 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1897 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1898 unsigned getConstantBusLimit(unsigned Opcode) const;
1899 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1900 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1901 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1902
1903 bool isSupportedMnemo(StringRef Mnemo,
1904 const FeatureBitset &FBS);
1905 bool isSupportedMnemo(StringRef Mnemo,
1906 const FeatureBitset &FBS,
1907 ArrayRef<unsigned> Variants);
1908 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1909
1910 bool isId(const StringRef Id) const;
1911 bool isId(const AsmToken &Token, const StringRef Id) const;
1912 bool isToken(const AsmToken::TokenKind Kind) const;
1913 StringRef getId() const;
1914 bool trySkipId(const StringRef Id);
1915 bool trySkipId(const StringRef Pref, const StringRef Id);
1916 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1917 bool trySkipToken(const AsmToken::TokenKind Kind);
1918 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1919 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1920 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1921
1922 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1923 AsmToken::TokenKind getTokenKind() const;
1924 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1925 bool parseExpr(OperandVector &Operands);
1926 StringRef getTokenStr() const;
1927 AsmToken peekToken(bool ShouldSkipSpace = true);
1928 AsmToken getToken() const;
1929 SMLoc getLoc() const;
1930 void lex();
1931
1932public:
1933 void onBeginOfFile() override;
1934 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1935
1936 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1937
1938 ParseStatus parseExpTgt(OperandVector &Operands);
1939 ParseStatus parseSendMsg(OperandVector &Operands);
1940 ParseStatus parseWaitEvent(OperandVector &Operands);
1941 ParseStatus parseInterpSlot(OperandVector &Operands);
1942 ParseStatus parseInterpAttr(OperandVector &Operands);
1943 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1944 ParseStatus parseBoolReg(OperandVector &Operands);
1945
1946 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1947 const unsigned MaxVal, const Twine &ErrMsg,
1948 SMLoc &Loc);
1949 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1950 const unsigned MinVal,
1951 const unsigned MaxVal,
1952 const StringRef ErrMsg);
1953 ParseStatus parseSwizzle(OperandVector &Operands);
1954 bool parseSwizzleOffset(int64_t &Imm);
1955 bool parseSwizzleMacro(int64_t &Imm);
1956 bool parseSwizzleQuadPerm(int64_t &Imm);
1957 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1958 bool parseSwizzleBroadcast(int64_t &Imm);
1959 bool parseSwizzleSwap(int64_t &Imm);
1960 bool parseSwizzleReverse(int64_t &Imm);
1961 bool parseSwizzleFFT(int64_t &Imm);
1962 bool parseSwizzleRotate(int64_t &Imm);
1963
1964 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1965 int64_t parseGPRIdxMacro();
1966
1967 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1968 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1969
1970 ParseStatus parseOModSI(OperandVector &Operands);
1971
1972 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1973 OptionalImmIndexMap &OptionalIdx);
1974 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1975 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1976 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1977 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1978 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1979
1980 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1981 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1982 OptionalImmIndexMap &OptionalIdx);
1983 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1984 OptionalImmIndexMap &OptionalIdx);
1985
1986 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1987 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1988 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1989
1990 bool parseDimId(unsigned &Encoding);
1991 ParseStatus parseDim(OperandVector &Operands);
1992 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1993 ParseStatus parseDPP8(OperandVector &Operands);
1994 ParseStatus parseDPPCtrl(OperandVector &Operands);
1995 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1996 int64_t parseDPPCtrlSel(StringRef Ctrl);
1997 int64_t parseDPPCtrlPerm();
1998 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1999 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2000 cvtDPP(Inst, Operands, true);
2001 }
2002 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2003 bool IsDPP8 = false);
2004 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2005 cvtVOP3DPP(Inst, Operands, true);
2006 }
2007
2008 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2009 AMDGPUOperand::ImmTy Type);
2010 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2011 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2012 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2013 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2014 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2015 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2016 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2017 uint64_t BasicInstType,
2018 bool SkipDstVcc = false,
2019 bool SkipSrcVcc = false);
2020
2021 ParseStatus parseEndpgm(OperandVector &Operands);
2022
2023 ParseStatus parseVOPD(OperandVector &Operands);
2024};
2025
2026} // end anonymous namespace
2027
2028// May be called with integer type with equivalent bitwidth.
2029static const fltSemantics *getFltSemantics(unsigned Size) {
2030 switch (Size) {
2031 case 4:
2032 return &APFloat::IEEEsingle();
2033 case 8:
2034 return &APFloat::IEEEdouble();
2035 case 2:
2036 return &APFloat::IEEEhalf();
2037 default:
2038 llvm_unreachable("unsupported fp type");
2039 }
2040}
2041
2043 return getFltSemantics(VT.getSizeInBits() / 8);
2044}
2045
2047 switch (OperandType) {
2048 // When floating-point immediate is used as operand of type i16, the 32-bit
2049 // representation of the constant truncated to the 16 LSBs should be used.
2064 return &APFloat::IEEEsingle();
2071 return &APFloat::IEEEdouble();
2079 return &APFloat::IEEEhalf();
2084 return &APFloat::BFloat();
2085 default:
2086 llvm_unreachable("unsupported fp type");
2087 }
2088}
2089
2090//===----------------------------------------------------------------------===//
2091// Operand
2092//===----------------------------------------------------------------------===//
2093
2094static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2095 bool Lost;
2096
2097 // Convert literal to single precision
2100 &Lost);
2101 // We allow precision lost but not overflow or underflow
2102 if (Status != APFloat::opOK &&
2103 Lost &&
2104 ((Status & APFloat::opOverflow) != 0 ||
2105 (Status & APFloat::opUnderflow) != 0)) {
2106 return false;
2107 }
2108
2109 return true;
2110}
2111
2112static bool isSafeTruncation(int64_t Val, unsigned Size) {
2113 return isUIntN(Size, Val) || isIntN(Size, Val);
2114}
2115
2116static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2117 if (VT.getScalarType() == MVT::i16)
2118 return isInlinableLiteral32(Val, HasInv2Pi);
2119
2120 if (VT.getScalarType() == MVT::f16)
2121 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2122
2123 assert(VT.getScalarType() == MVT::bf16);
2124
2125 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2126}
2127
2128bool AMDGPUOperand::isInlinableImm(MVT type) const {
2129
2130 // This is a hack to enable named inline values like
2131 // shared_base with both 32-bit and 64-bit operands.
2132 // Note that these values are defined as
2133 // 32-bit operands only.
2134 if (isInlineValue()) {
2135 return true;
2136 }
2137
2138 if (!isImmTy(ImmTyNone)) {
2139 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2140 return false;
2141 }
2142
2143 if (getModifiers().Lit != LitModifier::None)
2144 return false;
2145
2146 // TODO: We should avoid using host float here. It would be better to
2147 // check the float bit values which is what a few other places do.
2148 // We've had bot failures before due to weird NaN support on mips hosts.
2149
2150 APInt Literal(64, Imm.Val);
2151
2152 if (Imm.IsFPImm) { // We got fp literal token
2153 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2155 AsmParser->hasInv2PiInlineImm());
2156 }
2157
2158 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2159 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2160 return false;
2161
2162 if (type.getScalarSizeInBits() == 16) {
2163 bool Lost = false;
2164 switch (type.getScalarType().SimpleTy) {
2165 default:
2166 llvm_unreachable("unknown 16-bit type");
2167 case MVT::bf16:
2168 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2169 &Lost);
2170 break;
2171 case MVT::f16:
2172 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2173 &Lost);
2174 break;
2175 case MVT::i16:
2176 FPLiteral.convert(APFloatBase::IEEEsingle(),
2177 APFloat::rmNearestTiesToEven, &Lost);
2178 break;
2179 }
2180 // We need to use 32-bit representation here because when a floating-point
2181 // inline constant is used as an i16 operand, its 32-bit representation
2182 // representation will be used. We will need the 32-bit value to check if
2183 // it is FP inline constant.
2184 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2185 return isInlineableLiteralOp16(ImmVal, type,
2186 AsmParser->hasInv2PiInlineImm());
2187 }
2188
2189 // Check if single precision literal is inlinable
2191 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2192 AsmParser->hasInv2PiInlineImm());
2193 }
2194
2195 // We got int literal token.
2196 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2198 AsmParser->hasInv2PiInlineImm());
2199 }
2200
2201 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2202 return false;
2203 }
2204
2205 if (type.getScalarSizeInBits() == 16) {
2207 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2208 type, AsmParser->hasInv2PiInlineImm());
2209 }
2210
2212 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2213 AsmParser->hasInv2PiInlineImm());
2214}
2215
2216bool AMDGPUOperand::isLiteralImm(MVT type) const {
2217 // Check that this immediate can be added as literal
2218 if (!isImmTy(ImmTyNone)) {
2219 return false;
2220 }
2221
2222 bool Allow64Bit =
2223 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2224
2225 if (!Imm.IsFPImm) {
2226 // We got int literal token.
2227
2228 if (type == MVT::f64 && hasFPModifiers()) {
2229 // Cannot apply fp modifiers to int literals preserving the same semantics
2230 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2231 // disable these cases.
2232 return false;
2233 }
2234
2235 unsigned Size = type.getSizeInBits();
2236 if (Size == 64) {
2237 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2238 return true;
2239 Size = 32;
2240 }
2241
2242 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2243 // types.
2244 return isSafeTruncation(Imm.Val, Size);
2245 }
2246
2247 // We got fp literal token
2248 if (type == MVT::f64) { // Expected 64-bit fp operand
2249 // We would set low 64-bits of literal to zeroes but we accept this literals
2250 return true;
2251 }
2252
2253 if (type == MVT::i64) { // Expected 64-bit int operand
2254 // We don't allow fp literals in 64-bit integer instructions. It is
2255 // unclear how we should encode them.
2256 return false;
2257 }
2258
2259 // We allow fp literals with f16x2 operands assuming that the specified
2260 // literal goes into the lower half and the upper half is zero. We also
2261 // require that the literal may be losslessly converted to f16.
2262 //
2263 // For i16x2 operands, we assume that the specified literal is encoded as a
2264 // single-precision float. This is pretty odd, but it matches SP3 and what
2265 // happens in hardware.
2266 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2267 : (type == MVT::v2i16) ? MVT::f32
2268 : (type == MVT::v2f32) ? MVT::f32
2269 : type;
2270
2271 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2272 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2273}
2274
2275bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2276 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2277}
2278
2279bool AMDGPUOperand::isVRegWithInputMods() const {
2280 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2281 // GFX90A allows DPP on 64-bit operands.
2282 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2283 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2284}
2285
2286template <bool IsFake16>
2287bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2288 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2289 : AMDGPU::VGPR_16_Lo128RegClassID);
2290}
2291
2292template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2293 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2294 : AMDGPU::VGPR_16RegClassID);
2295}
2296
2297bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2298 if (AsmParser->isVI())
2299 return isVReg32();
2300 if (AsmParser->isGFX9Plus())
2301 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2302 return false;
2303}
2304
2305bool AMDGPUOperand::isSDWAFP16Operand() const {
2306 return isSDWAOperand(MVT::f16);
2307}
2308
2309bool AMDGPUOperand::isSDWAFP32Operand() const {
2310 return isSDWAOperand(MVT::f32);
2311}
2312
2313bool AMDGPUOperand::isSDWAInt16Operand() const {
2314 return isSDWAOperand(MVT::i16);
2315}
2316
2317bool AMDGPUOperand::isSDWAInt32Operand() const {
2318 return isSDWAOperand(MVT::i32);
2319}
2320
2321bool AMDGPUOperand::isBoolReg() const {
2322 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2323 (AsmParser->isWave32() && isSCSrc_b32()));
2324}
2325
2326uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2327{
2328 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2329 assert(Size == 2 || Size == 4 || Size == 8);
2330
2331 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2332
2333 if (Imm.Mods.Abs) {
2334 Val &= ~FpSignMask;
2335 }
2336 if (Imm.Mods.Neg) {
2337 Val ^= FpSignMask;
2338 }
2339
2340 return Val;
2341}
2342
2343void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2344 MCOpIdx = Inst.getNumOperands();
2345
2346 if (isExpr()) {
2348 return;
2349 }
2350
2351 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2352 Inst.getNumOperands())) {
2353 addLiteralImmOperand(Inst, Imm.Val,
2354 ApplyModifiers &
2355 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2356 } else {
2357 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2359 }
2360}
2361
2362void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2363 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2364 auto OpNum = Inst.getNumOperands();
2365 // Check that this operand accepts literals
2366 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2367
2368 if (ApplyModifiers) {
2369 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2370 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2371 Val = applyInputFPModifiers(Val, Size);
2372 }
2373
2374 APInt Literal(64, Val);
2375 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2376
2377 bool CanUse64BitLiterals =
2378 AsmParser->has64BitLiterals() &&
2379 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2380 LitModifier Lit = getModifiers().Lit;
2381 MCContext &Ctx = AsmParser->getContext();
2382
2383 if (Imm.IsFPImm) { // We got fp literal token
2384 switch (OpTy) {
2390 if (Lit == LitModifier::None &&
2392 AsmParser->hasInv2PiInlineImm())) {
2393 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2394 return;
2395 }
2396
2397 // Non-inlineable
2398 if (AMDGPU::isSISrcFPOperand(InstDesc,
2399 OpNum)) { // Expected 64-bit fp operand
2400 bool HasMandatoryLiteral =
2401 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2402 // For fp operands we check if low 32 bits are zeros
2403 if (Literal.getLoBits(32) != 0 &&
2404 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2405 !HasMandatoryLiteral) {
2406 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2407 Inst.getLoc(),
2408 "Can't encode literal as exact 64-bit floating-point operand. "
2409 "Low 32-bits will be set to zero");
2410 Val &= 0xffffffff00000000u;
2411 }
2412
2413 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2416 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2417 (isInt<32>(Val) || isUInt<32>(Val))) {
2418 // The floating-point operand will be verbalized as an
2419 // integer one. If that integer happens to fit 32 bits, on
2420 // re-assembling it will be intepreted as the high half of
2421 // the actual value, so we have to wrap it into lit64().
2422 Lit = LitModifier::Lit64;
2423 } else if (Lit == LitModifier::Lit) {
2424 // For FP64 operands lit() specifies the high half of the value.
2425 Val = Hi_32(Val);
2426 }
2427 }
2428 break;
2429 }
2430
2431 // We don't allow fp literals in 64-bit integer instructions. It is
2432 // unclear how we should encode them. This case should be checked earlier
2433 // in predicate methods (isLiteralImm())
2434 llvm_unreachable("fp literal in 64-bit integer instruction.");
2435
2437 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2438 (isInt<32>(Val) || isUInt<32>(Val)))
2439 Lit = LitModifier::Lit64;
2440 break;
2441
2446 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2447 Literal == 0x3fc45f306725feed) {
2448 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2449 // loss of precision. The constant represents ideomatic fp32 value of
2450 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2451 // bits. Prevent rounding below.
2452 Inst.addOperand(MCOperand::createImm(0x3e22));
2453 return;
2454 }
2455 [[fallthrough]];
2456
2478 bool lost;
2479 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2480 // Convert literal to single precision
2481 FPLiteral.convert(*getOpFltSemantics(OpTy),
2482 APFloat::rmNearestTiesToEven, &lost);
2483 // We allow precision lost but not overflow or underflow. This should be
2484 // checked earlier in isLiteralImm()
2485
2486 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2487 break;
2488 }
2489 default:
2490 llvm_unreachable("invalid operand size");
2491 }
2492
2493 if (Lit != LitModifier::None) {
2494 Inst.addOperand(
2496 } else {
2498 }
2499 return;
2500 }
2501
2502 // We got int literal token.
2503 // Only sign extend inline immediates.
2504 switch (OpTy) {
2519 break;
2520
2523 if (Lit == LitModifier::None &&
2524 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2526 return;
2527 }
2528
2529 // When the 32 MSBs are not zero (effectively means it can't be safely
2530 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2531 // the lit modifier is explicitly used, we need to truncate it to the 32
2532 // LSBs.
2533 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2534 Val = Lo_32(Val);
2535 break;
2536
2540 if (Lit == LitModifier::None &&
2541 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2543 return;
2544 }
2545
2546 // If the target doesn't support 64-bit literals, we need to use the
2547 // constant as the high 32 MSBs of a double-precision floating point value.
2548 if (!AsmParser->has64BitLiterals()) {
2549 Val = static_cast<uint64_t>(Val) << 32;
2550 } else {
2551 // Now the target does support 64-bit literals, there are two cases
2552 // where we still want to use src_literal encoding:
2553 // 1) explicitly forced by using lit modifier;
2554 // 2) the value is a valid 32-bit representation (signed or unsigned),
2555 // meanwhile not forced by lit64 modifier.
2556 if (Lit == LitModifier::Lit ||
2557 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2558 Val = static_cast<uint64_t>(Val) << 32;
2559 }
2560
2561 // For FP64 operands lit() specifies the high half of the value.
2562 if (Lit == LitModifier::Lit)
2563 Val = Hi_32(Val);
2564 break;
2565
2577 break;
2578
2580 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2581 Val <<= 32;
2582 break;
2583
2584 default:
2585 llvm_unreachable("invalid operand type");
2586 }
2587
2588 if (Lit != LitModifier::None) {
2589 Inst.addOperand(
2591 } else {
2593 }
2594}
2595
2596void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2597 MCOpIdx = Inst.getNumOperands();
2598 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2599}
2600
2601bool AMDGPUOperand::isInlineValue() const {
2602 return isRegKind() && ::isInlineValue(getReg());
2603}
2604
2605//===----------------------------------------------------------------------===//
2606// AsmParser
2607//===----------------------------------------------------------------------===//
2608
2609void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2610 // TODO: make those pre-defined variables read-only.
2611 // Currently there is none suitable machinery in the core llvm-mc for this.
2612 // MCSymbol::isRedefinable is intended for another purpose, and
2613 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2614 MCContext &Ctx = getContext();
2615 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2617}
2618
2619static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2620 if (Is == IS_VGPR) {
2621 switch (RegWidth) {
2622 default: return -1;
2623 case 32:
2624 return AMDGPU::VGPR_32RegClassID;
2625 case 64:
2626 return AMDGPU::VReg_64RegClassID;
2627 case 96:
2628 return AMDGPU::VReg_96RegClassID;
2629 case 128:
2630 return AMDGPU::VReg_128RegClassID;
2631 case 160:
2632 return AMDGPU::VReg_160RegClassID;
2633 case 192:
2634 return AMDGPU::VReg_192RegClassID;
2635 case 224:
2636 return AMDGPU::VReg_224RegClassID;
2637 case 256:
2638 return AMDGPU::VReg_256RegClassID;
2639 case 288:
2640 return AMDGPU::VReg_288RegClassID;
2641 case 320:
2642 return AMDGPU::VReg_320RegClassID;
2643 case 352:
2644 return AMDGPU::VReg_352RegClassID;
2645 case 384:
2646 return AMDGPU::VReg_384RegClassID;
2647 case 512:
2648 return AMDGPU::VReg_512RegClassID;
2649 case 1024:
2650 return AMDGPU::VReg_1024RegClassID;
2651 }
2652 } else if (Is == IS_TTMP) {
2653 switch (RegWidth) {
2654 default: return -1;
2655 case 32:
2656 return AMDGPU::TTMP_32RegClassID;
2657 case 64:
2658 return AMDGPU::TTMP_64RegClassID;
2659 case 128:
2660 return AMDGPU::TTMP_128RegClassID;
2661 case 256:
2662 return AMDGPU::TTMP_256RegClassID;
2663 case 512:
2664 return AMDGPU::TTMP_512RegClassID;
2665 }
2666 } else if (Is == IS_SGPR) {
2667 switch (RegWidth) {
2668 default: return -1;
2669 case 32:
2670 return AMDGPU::SGPR_32RegClassID;
2671 case 64:
2672 return AMDGPU::SGPR_64RegClassID;
2673 case 96:
2674 return AMDGPU::SGPR_96RegClassID;
2675 case 128:
2676 return AMDGPU::SGPR_128RegClassID;
2677 case 160:
2678 return AMDGPU::SGPR_160RegClassID;
2679 case 192:
2680 return AMDGPU::SGPR_192RegClassID;
2681 case 224:
2682 return AMDGPU::SGPR_224RegClassID;
2683 case 256:
2684 return AMDGPU::SGPR_256RegClassID;
2685 case 288:
2686 return AMDGPU::SGPR_288RegClassID;
2687 case 320:
2688 return AMDGPU::SGPR_320RegClassID;
2689 case 352:
2690 return AMDGPU::SGPR_352RegClassID;
2691 case 384:
2692 return AMDGPU::SGPR_384RegClassID;
2693 case 512:
2694 return AMDGPU::SGPR_512RegClassID;
2695 }
2696 } else if (Is == IS_AGPR) {
2697 switch (RegWidth) {
2698 default: return -1;
2699 case 32:
2700 return AMDGPU::AGPR_32RegClassID;
2701 case 64:
2702 return AMDGPU::AReg_64RegClassID;
2703 case 96:
2704 return AMDGPU::AReg_96RegClassID;
2705 case 128:
2706 return AMDGPU::AReg_128RegClassID;
2707 case 160:
2708 return AMDGPU::AReg_160RegClassID;
2709 case 192:
2710 return AMDGPU::AReg_192RegClassID;
2711 case 224:
2712 return AMDGPU::AReg_224RegClassID;
2713 case 256:
2714 return AMDGPU::AReg_256RegClassID;
2715 case 288:
2716 return AMDGPU::AReg_288RegClassID;
2717 case 320:
2718 return AMDGPU::AReg_320RegClassID;
2719 case 352:
2720 return AMDGPU::AReg_352RegClassID;
2721 case 384:
2722 return AMDGPU::AReg_384RegClassID;
2723 case 512:
2724 return AMDGPU::AReg_512RegClassID;
2725 case 1024:
2726 return AMDGPU::AReg_1024RegClassID;
2727 }
2728 }
2729 return -1;
2730}
2731
2734 .Case("exec", AMDGPU::EXEC)
2735 .Case("vcc", AMDGPU::VCC)
2736 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2737 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2738 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2739 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2740 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2741 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2742 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2743 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2744 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2745 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2746 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2747 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2748 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2749 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2750 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2751 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2752 .Case("m0", AMDGPU::M0)
2753 .Case("vccz", AMDGPU::SRC_VCCZ)
2754 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2755 .Case("execz", AMDGPU::SRC_EXECZ)
2756 .Case("src_execz", AMDGPU::SRC_EXECZ)
2757 .Case("scc", AMDGPU::SRC_SCC)
2758 .Case("src_scc", AMDGPU::SRC_SCC)
2759 .Case("tba", AMDGPU::TBA)
2760 .Case("tma", AMDGPU::TMA)
2761 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2762 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2763 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2764 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2765 .Case("vcc_lo", AMDGPU::VCC_LO)
2766 .Case("vcc_hi", AMDGPU::VCC_HI)
2767 .Case("exec_lo", AMDGPU::EXEC_LO)
2768 .Case("exec_hi", AMDGPU::EXEC_HI)
2769 .Case("tma_lo", AMDGPU::TMA_LO)
2770 .Case("tma_hi", AMDGPU::TMA_HI)
2771 .Case("tba_lo", AMDGPU::TBA_LO)
2772 .Case("tba_hi", AMDGPU::TBA_HI)
2773 .Case("pc", AMDGPU::PC_REG)
2774 .Case("null", AMDGPU::SGPR_NULL)
2775 .Default(AMDGPU::NoRegister);
2776}
2777
2778bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2779 SMLoc &EndLoc, bool RestoreOnFailure) {
2780 auto R = parseRegister();
2781 if (!R) return true;
2782 assert(R->isReg());
2783 RegNo = R->getReg();
2784 StartLoc = R->getStartLoc();
2785 EndLoc = R->getEndLoc();
2786 return false;
2787}
2788
2789bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2790 SMLoc &EndLoc) {
2791 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2792}
2793
2794ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2795 SMLoc &EndLoc) {
2796 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2797 bool PendingErrors = getParser().hasPendingError();
2798 getParser().clearPendingErrors();
2799 if (PendingErrors)
2800 return ParseStatus::Failure;
2801 if (Result)
2802 return ParseStatus::NoMatch;
2803 return ParseStatus::Success;
2804}
2805
2806bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2807 RegisterKind RegKind,
2808 MCRegister Reg1, SMLoc Loc) {
2809 switch (RegKind) {
2810 case IS_SPECIAL:
2811 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2812 Reg = AMDGPU::EXEC;
2813 RegWidth = 64;
2814 return true;
2815 }
2816 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2817 Reg = AMDGPU::FLAT_SCR;
2818 RegWidth = 64;
2819 return true;
2820 }
2821 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2822 Reg = AMDGPU::XNACK_MASK;
2823 RegWidth = 64;
2824 return true;
2825 }
2826 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2827 Reg = AMDGPU::VCC;
2828 RegWidth = 64;
2829 return true;
2830 }
2831 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2832 Reg = AMDGPU::TBA;
2833 RegWidth = 64;
2834 return true;
2835 }
2836 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2837 Reg = AMDGPU::TMA;
2838 RegWidth = 64;
2839 return true;
2840 }
2841 Error(Loc, "register does not fit in the list");
2842 return false;
2843 case IS_VGPR:
2844 case IS_SGPR:
2845 case IS_AGPR:
2846 case IS_TTMP:
2847 if (Reg1 != Reg + RegWidth / 32) {
2848 Error(Loc, "registers in a list must have consecutive indices");
2849 return false;
2850 }
2851 RegWidth += 32;
2852 return true;
2853 default:
2854 llvm_unreachable("unexpected register kind");
2855 }
2856}
2857
2858struct RegInfo {
2860 RegisterKind Kind;
2861};
2862
2863static constexpr RegInfo RegularRegisters[] = {
2864 {{"v"}, IS_VGPR},
2865 {{"s"}, IS_SGPR},
2866 {{"ttmp"}, IS_TTMP},
2867 {{"acc"}, IS_AGPR},
2868 {{"a"}, IS_AGPR},
2869};
2870
2871static bool isRegularReg(RegisterKind Kind) {
2872 return Kind == IS_VGPR ||
2873 Kind == IS_SGPR ||
2874 Kind == IS_TTMP ||
2875 Kind == IS_AGPR;
2876}
2877
2879 for (const RegInfo &Reg : RegularRegisters)
2880 if (Str.starts_with(Reg.Name))
2881 return &Reg;
2882 return nullptr;
2883}
2884
2885static bool getRegNum(StringRef Str, unsigned& Num) {
2886 return !Str.getAsInteger(10, Num);
2887}
2888
2889bool
2890AMDGPUAsmParser::isRegister(const AsmToken &Token,
2891 const AsmToken &NextToken) const {
2892
2893 // A list of consecutive registers: [s0,s1,s2,s3]
2894 if (Token.is(AsmToken::LBrac))
2895 return true;
2896
2897 if (!Token.is(AsmToken::Identifier))
2898 return false;
2899
2900 // A single register like s0 or a range of registers like s[0:1]
2901
2902 StringRef Str = Token.getString();
2903 const RegInfo *Reg = getRegularRegInfo(Str);
2904 if (Reg) {
2905 StringRef RegName = Reg->Name;
2906 StringRef RegSuffix = Str.substr(RegName.size());
2907 if (!RegSuffix.empty()) {
2908 RegSuffix.consume_back(".l");
2909 RegSuffix.consume_back(".h");
2910 unsigned Num;
2911 // A single register with an index: rXX
2912 if (getRegNum(RegSuffix, Num))
2913 return true;
2914 } else {
2915 // A range of registers: r[XX:YY].
2916 if (NextToken.is(AsmToken::LBrac))
2917 return true;
2918 }
2919 }
2920
2921 return getSpecialRegForName(Str).isValid();
2922}
2923
2924bool
2925AMDGPUAsmParser::isRegister()
2926{
2927 return isRegister(getToken(), peekToken());
2928}
2929
2930MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2931 unsigned SubReg, unsigned RegWidth,
2932 SMLoc Loc) {
2933 assert(isRegularReg(RegKind));
2934
2935 unsigned AlignSize = 1;
2936 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2937 // SGPR and TTMP registers must be aligned.
2938 // Max required alignment is 4 dwords.
2939 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2940 }
2941
2942 if (RegNum % AlignSize != 0) {
2943 Error(Loc, "invalid register alignment");
2944 return MCRegister();
2945 }
2946
2947 unsigned RegIdx = RegNum / AlignSize;
2948 int RCID = getRegClass(RegKind, RegWidth);
2949 if (RCID == -1) {
2950 Error(Loc, "invalid or unsupported register size");
2951 return MCRegister();
2952 }
2953
2954 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2955 const MCRegisterClass RC = TRI->getRegClass(RCID);
2956 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2957 Error(Loc, "register index is out of range");
2958 return AMDGPU::NoRegister;
2959 }
2960
2961 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2962 Error(Loc, "register index is out of range");
2963 return MCRegister();
2964 }
2965
2966 MCRegister Reg = RC.getRegister(RegIdx);
2967
2968 if (SubReg) {
2969 Reg = TRI->getSubReg(Reg, SubReg);
2970
2971 // Currently all regular registers have their .l and .h subregisters, so
2972 // we should never need to generate an error here.
2973 assert(Reg && "Invalid subregister!");
2974 }
2975
2976 return Reg;
2977}
2978
2979bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2980 unsigned &SubReg) {
2981 int64_t RegLo, RegHi;
2982 if (!skipToken(AsmToken::LBrac, "missing register index"))
2983 return false;
2984
2985 SMLoc FirstIdxLoc = getLoc();
2986 SMLoc SecondIdxLoc;
2987
2988 if (!parseExpr(RegLo))
2989 return false;
2990
2991 if (trySkipToken(AsmToken::Colon)) {
2992 SecondIdxLoc = getLoc();
2993 if (!parseExpr(RegHi))
2994 return false;
2995 } else {
2996 RegHi = RegLo;
2997 }
2998
2999 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3000 return false;
3001
3002 if (!isUInt<32>(RegLo)) {
3003 Error(FirstIdxLoc, "invalid register index");
3004 return false;
3005 }
3006
3007 if (!isUInt<32>(RegHi)) {
3008 Error(SecondIdxLoc, "invalid register index");
3009 return false;
3010 }
3011
3012 if (RegLo > RegHi) {
3013 Error(FirstIdxLoc, "first register index should not exceed second index");
3014 return false;
3015 }
3016
3017 if (RegHi == RegLo) {
3018 StringRef RegSuffix = getTokenStr();
3019 if (RegSuffix == ".l") {
3020 SubReg = AMDGPU::lo16;
3021 lex();
3022 } else if (RegSuffix == ".h") {
3023 SubReg = AMDGPU::hi16;
3024 lex();
3025 }
3026 }
3027
3028 Num = static_cast<unsigned>(RegLo);
3029 RegWidth = 32 * ((RegHi - RegLo) + 1);
3030
3031 return true;
3032}
3033
3034MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3035 unsigned &RegNum,
3036 unsigned &RegWidth,
3037 SmallVectorImpl<AsmToken> &Tokens) {
3038 assert(isToken(AsmToken::Identifier));
3039 MCRegister Reg = getSpecialRegForName(getTokenStr());
3040 if (Reg) {
3041 RegNum = 0;
3042 RegWidth = 32;
3043 RegKind = IS_SPECIAL;
3044 Tokens.push_back(getToken());
3045 lex(); // skip register name
3046 }
3047 return Reg;
3048}
3049
3050MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3051 unsigned &RegNum,
3052 unsigned &RegWidth,
3053 SmallVectorImpl<AsmToken> &Tokens) {
3054 assert(isToken(AsmToken::Identifier));
3055 StringRef RegName = getTokenStr();
3056 auto Loc = getLoc();
3057
3058 const RegInfo *RI = getRegularRegInfo(RegName);
3059 if (!RI) {
3060 Error(Loc, "invalid register name");
3061 return MCRegister();
3062 }
3063
3064 Tokens.push_back(getToken());
3065 lex(); // skip register name
3066
3067 RegKind = RI->Kind;
3068 StringRef RegSuffix = RegName.substr(RI->Name.size());
3069 unsigned SubReg = NoSubRegister;
3070 if (!RegSuffix.empty()) {
3071 if (RegSuffix.consume_back(".l"))
3072 SubReg = AMDGPU::lo16;
3073 else if (RegSuffix.consume_back(".h"))
3074 SubReg = AMDGPU::hi16;
3075
3076 // Single 32-bit register: vXX.
3077 if (!getRegNum(RegSuffix, RegNum)) {
3078 Error(Loc, "invalid register index");
3079 return MCRegister();
3080 }
3081 RegWidth = 32;
3082 } else {
3083 // Range of registers: v[XX:YY]. ":YY" is optional.
3084 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3085 return MCRegister();
3086 }
3087
3088 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3089}
3090
3091MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3092 unsigned &RegNum, unsigned &RegWidth,
3093 SmallVectorImpl<AsmToken> &Tokens) {
3094 MCRegister Reg;
3095 auto ListLoc = getLoc();
3096
3097 if (!skipToken(AsmToken::LBrac,
3098 "expected a register or a list of registers")) {
3099 return MCRegister();
3100 }
3101
3102 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3103
3104 auto Loc = getLoc();
3105 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3106 return MCRegister();
3107 if (RegWidth != 32) {
3108 Error(Loc, "expected a single 32-bit register");
3109 return MCRegister();
3110 }
3111
3112 for (; trySkipToken(AsmToken::Comma); ) {
3113 RegisterKind NextRegKind;
3114 MCRegister NextReg;
3115 unsigned NextRegNum, NextRegWidth;
3116 Loc = getLoc();
3117
3118 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3119 NextRegNum, NextRegWidth,
3120 Tokens)) {
3121 return MCRegister();
3122 }
3123 if (NextRegWidth != 32) {
3124 Error(Loc, "expected a single 32-bit register");
3125 return MCRegister();
3126 }
3127 if (NextRegKind != RegKind) {
3128 Error(Loc, "registers in a list must be of the same kind");
3129 return MCRegister();
3130 }
3131 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3132 return MCRegister();
3133 }
3134
3135 if (!skipToken(AsmToken::RBrac,
3136 "expected a comma or a closing square bracket")) {
3137 return MCRegister();
3138 }
3139
3140 if (isRegularReg(RegKind))
3141 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3142
3143 return Reg;
3144}
3145
3146bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3147 MCRegister &Reg, unsigned &RegNum,
3148 unsigned &RegWidth,
3149 SmallVectorImpl<AsmToken> &Tokens) {
3150 auto Loc = getLoc();
3151 Reg = MCRegister();
3152
3153 if (isToken(AsmToken::Identifier)) {
3154 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3155 if (!Reg)
3156 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3157 } else {
3158 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3159 }
3160
3161 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3162 if (!Reg) {
3163 assert(Parser.hasPendingError());
3164 return false;
3165 }
3166
3167 if (!subtargetHasRegister(*TRI, Reg)) {
3168 if (Reg == AMDGPU::SGPR_NULL) {
3169 Error(Loc, "'null' operand is not supported on this GPU");
3170 } else {
3172 " register not available on this GPU");
3173 }
3174 return false;
3175 }
3176
3177 return true;
3178}
3179
3180bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3181 MCRegister &Reg, unsigned &RegNum,
3182 unsigned &RegWidth,
3183 bool RestoreOnFailure /*=false*/) {
3184 Reg = MCRegister();
3185
3187 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3188 if (RestoreOnFailure) {
3189 while (!Tokens.empty()) {
3190 getLexer().UnLex(Tokens.pop_back_val());
3191 }
3192 }
3193 return true;
3194 }
3195 return false;
3196}
3197
3198std::optional<StringRef>
3199AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3200 switch (RegKind) {
3201 case IS_VGPR:
3202 return StringRef(".amdgcn.next_free_vgpr");
3203 case IS_SGPR:
3204 return StringRef(".amdgcn.next_free_sgpr");
3205 default:
3206 return std::nullopt;
3207 }
3208}
3209
3210void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3211 auto SymbolName = getGprCountSymbolName(RegKind);
3212 assert(SymbolName && "initializing invalid register kind");
3213 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3215 Sym->setRedefinable(true);
3216}
3217
3218bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3219 unsigned DwordRegIndex,
3220 unsigned RegWidth) {
3221 // Symbols are only defined for GCN targets
3222 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3223 return true;
3224
3225 auto SymbolName = getGprCountSymbolName(RegKind);
3226 if (!SymbolName)
3227 return true;
3228 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3229
3230 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3231 int64_t OldCount;
3232
3233 if (!Sym->isVariable())
3234 return !Error(getLoc(),
3235 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3236 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3237 return !Error(
3238 getLoc(),
3239 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3240
3241 if (OldCount <= NewMax)
3243
3244 return true;
3245}
3246
3247std::unique_ptr<AMDGPUOperand>
3248AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3249 const auto &Tok = getToken();
3250 SMLoc StartLoc = Tok.getLoc();
3251 SMLoc EndLoc = Tok.getEndLoc();
3252 RegisterKind RegKind;
3253 MCRegister Reg;
3254 unsigned RegNum, RegWidth;
3255
3256 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3257 return nullptr;
3258 }
3259 if (isHsaAbi(getSTI())) {
3260 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3261 return nullptr;
3262 } else
3263 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3264 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3265}
3266
3267ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3268 bool HasSP3AbsModifier, LitModifier Lit) {
3269 // TODO: add syntactic sugar for 1/(2*PI)
3270
3271 if (isRegister() || isModifier())
3272 return ParseStatus::NoMatch;
3273
3274 if (Lit == LitModifier::None) {
3275 if (trySkipId("lit"))
3276 Lit = LitModifier::Lit;
3277 else if (trySkipId("lit64"))
3278 Lit = LitModifier::Lit64;
3279
3280 if (Lit != LitModifier::None) {
3281 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3282 return ParseStatus::Failure;
3283 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3284 if (S.isSuccess() &&
3285 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3286 return ParseStatus::Failure;
3287 return S;
3288 }
3289 }
3290
3291 const auto& Tok = getToken();
3292 const auto& NextTok = peekToken();
3293 bool IsReal = Tok.is(AsmToken::Real);
3294 SMLoc S = getLoc();
3295 bool Negate = false;
3296
3297 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3298 lex();
3299 IsReal = true;
3300 Negate = true;
3301 }
3302
3303 AMDGPUOperand::Modifiers Mods;
3304 Mods.Lit = Lit;
3305
3306 if (IsReal) {
3307 // Floating-point expressions are not supported.
3308 // Can only allow floating-point literals with an
3309 // optional sign.
3310
3311 StringRef Num = getTokenStr();
3312 lex();
3313
3314 APFloat RealVal(APFloat::IEEEdouble());
3315 auto roundMode = APFloat::rmNearestTiesToEven;
3316 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3317 return ParseStatus::Failure;
3318 if (Negate)
3319 RealVal.changeSign();
3320
3321 Operands.push_back(
3322 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3323 AMDGPUOperand::ImmTyNone, true));
3324 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3325 Op.setModifiers(Mods);
3326
3327 return ParseStatus::Success;
3328
3329 } else {
3330 int64_t IntVal;
3331 const MCExpr *Expr;
3332 SMLoc S = getLoc();
3333
3334 if (HasSP3AbsModifier) {
3335 // This is a workaround for handling expressions
3336 // as arguments of SP3 'abs' modifier, for example:
3337 // |1.0|
3338 // |-1|
3339 // |1+x|
3340 // This syntax is not compatible with syntax of standard
3341 // MC expressions (due to the trailing '|').
3342 SMLoc EndLoc;
3343 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3344 return ParseStatus::Failure;
3345 } else {
3346 if (Parser.parseExpression(Expr))
3347 return ParseStatus::Failure;
3348 }
3349
3350 if (Expr->evaluateAsAbsolute(IntVal)) {
3351 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3352 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3353 Op.setModifiers(Mods);
3354 } else {
3355 if (Lit != LitModifier::None)
3356 return ParseStatus::NoMatch;
3357 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3358 }
3359
3360 return ParseStatus::Success;
3361 }
3362
3363 return ParseStatus::NoMatch;
3364}
3365
3366ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3367 if (!isRegister())
3368 return ParseStatus::NoMatch;
3369
3370 if (auto R = parseRegister()) {
3371 assert(R->isReg());
3372 Operands.push_back(std::move(R));
3373 return ParseStatus::Success;
3374 }
3375 return ParseStatus::Failure;
3376}
3377
3378ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3379 bool HasSP3AbsMod, LitModifier Lit) {
3380 ParseStatus Res = parseReg(Operands);
3381 if (!Res.isNoMatch())
3382 return Res;
3383 if (isModifier())
3384 return ParseStatus::NoMatch;
3385 return parseImm(Operands, HasSP3AbsMod, Lit);
3386}
3387
3388bool
3389AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3390 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3391 const auto &str = Token.getString();
3392 return str == "abs" || str == "neg" || str == "sext";
3393 }
3394 return false;
3395}
3396
3397bool
3398AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3399 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3400}
3401
3402bool
3403AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3404 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3405}
3406
3407bool
3408AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3409 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3410}
3411
3412// Check if this is an operand modifier or an opcode modifier
3413// which may look like an expression but it is not. We should
3414// avoid parsing these modifiers as expressions. Currently
3415// recognized sequences are:
3416// |...|
3417// abs(...)
3418// neg(...)
3419// sext(...)
3420// -reg
3421// -|...|
3422// -abs(...)
3423// name:...
3424//
3425bool
3426AMDGPUAsmParser::isModifier() {
3427
3428 AsmToken Tok = getToken();
3429 AsmToken NextToken[2];
3430 peekTokens(NextToken);
3431
3432 return isOperandModifier(Tok, NextToken[0]) ||
3433 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3434 isOpcodeModifierWithVal(Tok, NextToken[0]);
3435}
3436
3437// Check if the current token is an SP3 'neg' modifier.
3438// Currently this modifier is allowed in the following context:
3439//
3440// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3441// 2. Before an 'abs' modifier: -abs(...)
3442// 3. Before an SP3 'abs' modifier: -|...|
3443//
3444// In all other cases "-" is handled as a part
3445// of an expression that follows the sign.
3446//
3447// Note: When "-" is followed by an integer literal,
3448// this is interpreted as integer negation rather
3449// than a floating-point NEG modifier applied to N.
3450// Beside being contr-intuitive, such use of floating-point
3451// NEG modifier would have resulted in different meaning
3452// of integer literals used with VOP1/2/C and VOP3,
3453// for example:
3454// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3455// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3456// Negative fp literals with preceding "-" are
3457// handled likewise for uniformity
3458//
3459bool
3460AMDGPUAsmParser::parseSP3NegModifier() {
3461
3462 AsmToken NextToken[2];
3463 peekTokens(NextToken);
3464
3465 if (isToken(AsmToken::Minus) &&
3466 (isRegister(NextToken[0], NextToken[1]) ||
3467 NextToken[0].is(AsmToken::Pipe) ||
3468 isId(NextToken[0], "abs"))) {
3469 lex();
3470 return true;
3471 }
3472
3473 return false;
3474}
3475
3476ParseStatus
3477AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3478 bool AllowImm) {
3479 bool Neg, SP3Neg;
3480 bool Abs, SP3Abs;
3481 SMLoc Loc;
3482
3483 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3484 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3485 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3486
3487 SP3Neg = parseSP3NegModifier();
3488
3489 Loc = getLoc();
3490 Neg = trySkipId("neg");
3491 if (Neg && SP3Neg)
3492 return Error(Loc, "expected register or immediate");
3493 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3494 return ParseStatus::Failure;
3495
3496 Abs = trySkipId("abs");
3497 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3498 return ParseStatus::Failure;
3499
3500 LitModifier Lit = LitModifier::None;
3501 if (trySkipId("lit")) {
3502 Lit = LitModifier::Lit;
3503 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3504 return ParseStatus::Failure;
3505 } else if (trySkipId("lit64")) {
3506 Lit = LitModifier::Lit64;
3507 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3508 return ParseStatus::Failure;
3509 if (!has64BitLiterals())
3510 return Error(Loc, "lit64 is not supported on this GPU");
3511 }
3512
3513 Loc = getLoc();
3514 SP3Abs = trySkipToken(AsmToken::Pipe);
3515 if (Abs && SP3Abs)
3516 return Error(Loc, "expected register or immediate");
3517
3518 ParseStatus Res;
3519 if (AllowImm) {
3520 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3521 } else {
3522 Res = parseReg(Operands);
3523 }
3524 if (!Res.isSuccess())
3525 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3527 : Res;
3528
3529 if (Lit != LitModifier::None && !Operands.back()->isImm())
3530 Error(Loc, "expected immediate with lit modifier");
3531
3532 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3533 return ParseStatus::Failure;
3534 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3535 return ParseStatus::Failure;
3536 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3537 return ParseStatus::Failure;
3538 if (Lit != LitModifier::None &&
3539 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3540 return ParseStatus::Failure;
3541
3542 AMDGPUOperand::Modifiers Mods;
3543 Mods.Abs = Abs || SP3Abs;
3544 Mods.Neg = Neg || SP3Neg;
3545 Mods.Lit = Lit;
3546
3547 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3548 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3549 if (Op.isExpr())
3550 return Error(Op.getStartLoc(), "expected an absolute expression");
3551 Op.setModifiers(Mods);
3552 }
3553 return ParseStatus::Success;
3554}
3555
3556ParseStatus
3557AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3558 bool AllowImm) {
3559 bool Sext = trySkipId("sext");
3560 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3561 return ParseStatus::Failure;
3562
3563 ParseStatus Res;
3564 if (AllowImm) {
3565 Res = parseRegOrImm(Operands);
3566 } else {
3567 Res = parseReg(Operands);
3568 }
3569 if (!Res.isSuccess())
3570 return Sext ? ParseStatus::Failure : Res;
3571
3572 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3573 return ParseStatus::Failure;
3574
3575 AMDGPUOperand::Modifiers Mods;
3576 Mods.Sext = Sext;
3577
3578 if (Mods.hasIntModifiers()) {
3579 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3580 if (Op.isExpr())
3581 return Error(Op.getStartLoc(), "expected an absolute expression");
3582 Op.setModifiers(Mods);
3583 }
3584
3585 return ParseStatus::Success;
3586}
3587
3588ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3589 return parseRegOrImmWithFPInputMods(Operands, false);
3590}
3591
3592ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3593 return parseRegOrImmWithIntInputMods(Operands, false);
3594}
3595
3596ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3597 auto Loc = getLoc();
3598 if (trySkipId("off")) {
3599 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3600 AMDGPUOperand::ImmTyOff, false));
3601 return ParseStatus::Success;
3602 }
3603
3604 if (!isRegister())
3605 return ParseStatus::NoMatch;
3606
3607 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3608 if (Reg) {
3609 Operands.push_back(std::move(Reg));
3610 return ParseStatus::Success;
3611 }
3612
3613 return ParseStatus::Failure;
3614}
3615
3616unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3617 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3618
3619 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3620 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3621 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3622 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3623 return Match_InvalidOperand;
3624
3625 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3626 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3627 // v_mac_f32/16 allow only dst_sel == DWORD;
3628 auto OpNum =
3629 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3630 const auto &Op = Inst.getOperand(OpNum);
3631 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3632 return Match_InvalidOperand;
3633 }
3634 }
3635
3636 // Asm can first try to match VOPD or VOPD3. By failing early here with
3637 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3638 // Checking later during validateInstruction does not give a chance to retry
3639 // parsing as a different encoding.
3640 if (tryAnotherVOPDEncoding(Inst))
3641 return Match_InvalidOperand;
3642
3643 return Match_Success;
3644}
3645
3655
3656// What asm variants we should check
3657ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3658 if (isForcedDPP() && isForcedVOP3()) {
3659 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3660 return ArrayRef(Variants);
3661 }
3662 if (getForcedEncodingSize() == 32) {
3663 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3664 return ArrayRef(Variants);
3665 }
3666
3667 if (isForcedVOP3()) {
3668 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3669 return ArrayRef(Variants);
3670 }
3671
3672 if (isForcedSDWA()) {
3673 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3675 return ArrayRef(Variants);
3676 }
3677
3678 if (isForcedDPP()) {
3679 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3680 return ArrayRef(Variants);
3681 }
3682
3683 return getAllVariants();
3684}
3685
3686StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3687 if (isForcedDPP() && isForcedVOP3())
3688 return "e64_dpp";
3689
3690 if (getForcedEncodingSize() == 32)
3691 return "e32";
3692
3693 if (isForcedVOP3())
3694 return "e64";
3695
3696 if (isForcedSDWA())
3697 return "sdwa";
3698
3699 if (isForcedDPP())
3700 return "dpp";
3701
3702 return "";
3703}
3704
3705MCRegister
3706AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3707 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3708 for (MCPhysReg Reg : Desc.implicit_uses()) {
3709 switch (Reg) {
3710 case AMDGPU::FLAT_SCR:
3711 case AMDGPU::VCC:
3712 case AMDGPU::VCC_LO:
3713 case AMDGPU::VCC_HI:
3714 case AMDGPU::M0:
3715 return Reg;
3716 default:
3717 break;
3718 }
3719 }
3720 return MCRegister();
3721}
3722
3723// NB: This code is correct only when used to check constant
3724// bus limitations because GFX7 support no f16 inline constants.
3725// Note that there are no cases when a GFX7 opcode violates
3726// constant bus limitations due to the use of an f16 constant.
3727bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3728 unsigned OpIdx) const {
3729 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3730
3733 return false;
3734 }
3735
3736 const MCOperand &MO = Inst.getOperand(OpIdx);
3737
3738 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3739 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3740
3741 switch (OpSize) { // expected operand size
3742 case 8:
3743 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3744 case 4:
3745 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3746 case 2: {
3747 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3750 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3751
3755
3759
3762
3766
3769 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3770
3773 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3774
3776 return false;
3777
3778 llvm_unreachable("invalid operand type");
3779 }
3780 default:
3781 llvm_unreachable("invalid operand size");
3782 }
3783}
3784
3785unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3786 if (!isGFX10Plus())
3787 return 1;
3788
3789 switch (Opcode) {
3790 // 64-bit shift instructions can use only one scalar value input
3791 case AMDGPU::V_LSHLREV_B64_e64:
3792 case AMDGPU::V_LSHLREV_B64_gfx10:
3793 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3794 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3795 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3796 case AMDGPU::V_LSHRREV_B64_e64:
3797 case AMDGPU::V_LSHRREV_B64_gfx10:
3798 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3799 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3800 case AMDGPU::V_ASHRREV_I64_e64:
3801 case AMDGPU::V_ASHRREV_I64_gfx10:
3802 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3803 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3804 case AMDGPU::V_LSHL_B64_e64:
3805 case AMDGPU::V_LSHR_B64_e64:
3806 case AMDGPU::V_ASHR_I64_e64:
3807 return 1;
3808 default:
3809 return 2;
3810 }
3811}
3812
3813constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3815
3816// Get regular operand indices in the same order as specified
3817// in the instruction (but append mandatory literals to the end).
3819 bool AddMandatoryLiterals = false) {
3820
3821 int16_t ImmIdx =
3822 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3823
3824 if (isVOPD(Opcode)) {
3825 int16_t ImmXIdx =
3826 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3827
3828 return {getNamedOperandIdx(Opcode, OpName::src0X),
3829 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3830 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3831 getNamedOperandIdx(Opcode, OpName::src0Y),
3832 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3833 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3834 ImmXIdx,
3835 ImmIdx};
3836 }
3837
3838 return {getNamedOperandIdx(Opcode, OpName::src0),
3839 getNamedOperandIdx(Opcode, OpName::src1),
3840 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3841}
3842
3843bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3844 const MCOperand &MO = Inst.getOperand(OpIdx);
3845 if (MO.isImm())
3846 return !isInlineConstant(Inst, OpIdx);
3847 if (MO.isReg()) {
3848 auto Reg = MO.getReg();
3849 if (!Reg)
3850 return false;
3851 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3852 auto PReg = mc2PseudoReg(Reg);
3853 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3854 }
3855 return true;
3856}
3857
3858// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3859// Writelane is special in that it can use SGPR and M0 (which would normally
3860// count as using the constant bus twice - but in this case it is allowed since
3861// the lane selector doesn't count as a use of the constant bus). However, it is
3862// still required to abide by the 1 SGPR rule.
3863static bool checkWriteLane(const MCInst &Inst) {
3864 const unsigned Opcode = Inst.getOpcode();
3865 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3866 return false;
3867 const MCOperand &LaneSelOp = Inst.getOperand(2);
3868 if (!LaneSelOp.isReg())
3869 return false;
3870 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3871 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3872}
3873
3874bool AMDGPUAsmParser::validateConstantBusLimitations(
3875 const MCInst &Inst, const OperandVector &Operands) {
3876 const unsigned Opcode = Inst.getOpcode();
3877 const MCInstrDesc &Desc = MII.get(Opcode);
3878 MCRegister LastSGPR;
3879 unsigned ConstantBusUseCount = 0;
3880 unsigned NumLiterals = 0;
3881 unsigned LiteralSize;
3882
3883 if (!(Desc.TSFlags &
3886 !isVOPD(Opcode))
3887 return true;
3888
3889 if (checkWriteLane(Inst))
3890 return true;
3891
3892 // Check special imm operands (used by madmk, etc)
3893 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3894 ++NumLiterals;
3895 LiteralSize = 4;
3896 }
3897
3898 SmallDenseSet<MCRegister> SGPRsUsed;
3899 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3900 if (SGPRUsed) {
3901 SGPRsUsed.insert(SGPRUsed);
3902 ++ConstantBusUseCount;
3903 }
3904
3905 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3906
3907 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3908
3909 for (int OpIdx : OpIndices) {
3910 if (OpIdx == -1)
3911 continue;
3912
3913 const MCOperand &MO = Inst.getOperand(OpIdx);
3914 if (usesConstantBus(Inst, OpIdx)) {
3915 if (MO.isReg()) {
3916 LastSGPR = mc2PseudoReg(MO.getReg());
3917 // Pairs of registers with a partial intersections like these
3918 // s0, s[0:1]
3919 // flat_scratch_lo, flat_scratch
3920 // flat_scratch_lo, flat_scratch_hi
3921 // are theoretically valid but they are disabled anyway.
3922 // Note that this code mimics SIInstrInfo::verifyInstruction
3923 if (SGPRsUsed.insert(LastSGPR).second) {
3924 ++ConstantBusUseCount;
3925 }
3926 } else { // Expression or a literal
3927
3928 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3929 continue; // special operand like VINTERP attr_chan
3930
3931 // An instruction may use only one literal.
3932 // This has been validated on the previous step.
3933 // See validateVOPLiteral.
3934 // This literal may be used as more than one operand.
3935 // If all these operands are of the same size,
3936 // this literal counts as one scalar value.
3937 // Otherwise it counts as 2 scalar values.
3938 // See "GFX10 Shader Programming", section 3.6.2.3.
3939
3941 if (Size < 4)
3942 Size = 4;
3943
3944 if (NumLiterals == 0) {
3945 NumLiterals = 1;
3946 LiteralSize = Size;
3947 } else if (LiteralSize != Size) {
3948 NumLiterals = 2;
3949 }
3950 }
3951 }
3952
3953 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3954 Error(getOperandLoc(Operands, OpIdx),
3955 "invalid operand (violates constant bus restrictions)");
3956 return false;
3957 }
3958 }
3959 return true;
3960}
3961
3962std::optional<unsigned>
3963AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3964
3965 const unsigned Opcode = Inst.getOpcode();
3966 if (!isVOPD(Opcode))
3967 return {};
3968
3969 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3970
3971 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3972 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3973 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3974 ? Opr.getReg()
3975 : MCRegister();
3976 };
3977
3978 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3979 // source-cache.
3980 bool SkipSrc =
3981 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
3982 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3983 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3984 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3985 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3986 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
3987 bool AllowSameVGPR = isGFX1250Plus();
3988
3989 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3990 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3991 int I = getNamedOperandIdx(Opcode, OpName);
3992 const MCOperand &Op = Inst.getOperand(I);
3993 if (!Op.isImm())
3994 continue;
3995 int64_t Imm = Op.getImm();
3996 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3997 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3998 return (unsigned)I;
3999 }
4000
4001 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4002 OpName::vsrc2Y, OpName::imm}) {
4003 int I = getNamedOperandIdx(Opcode, OpName);
4004 if (I == -1)
4005 continue;
4006 const MCOperand &Op = Inst.getOperand(I);
4007 if (Op.isImm())
4008 return (unsigned)I;
4009 }
4010 }
4011
4012 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4013 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4014 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4015
4016 return InvalidCompOprIdx;
4017}
4018
4019bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4020 const OperandVector &Operands) {
4021
4022 unsigned Opcode = Inst.getOpcode();
4023 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4024
4025 if (AsVOPD3) {
4026 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4027 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4028 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4029 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4030 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4031 }
4032 }
4033
4034 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4035 if (!InvalidCompOprIdx.has_value())
4036 return true;
4037
4038 auto CompOprIdx = *InvalidCompOprIdx;
4039 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4040 auto ParsedIdx =
4041 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4042 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4043 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4044
4045 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4046 if (CompOprIdx == VOPD::Component::DST) {
4047 if (AsVOPD3)
4048 Error(Loc, "dst registers must be distinct");
4049 else
4050 Error(Loc, "one dst register must be even and the other odd");
4051 } else {
4052 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4053 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4054 " operands must use different VGPR banks");
4055 }
4056
4057 return false;
4058}
4059
4060// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4061// potentially used as VOPD3 with the same operands.
4062bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4063 // First check if it fits VOPD
4064 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4065 if (!InvalidCompOprIdx.has_value())
4066 return false;
4067
4068 // Then if it fits VOPD3
4069 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4070 if (InvalidCompOprIdx.has_value()) {
4071 // If failed operand is dst it is better to show error about VOPD3
4072 // instruction as it has more capabilities and error message will be
4073 // more informative. If the dst is not legal for VOPD3, then it is not
4074 // legal for VOPD either.
4075 if (*InvalidCompOprIdx == VOPD::Component::DST)
4076 return true;
4077
4078 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4079 // with a conflict in tied implicit src2 of fmac and no asm operand to
4080 // to point to.
4081 return false;
4082 }
4083 return true;
4084}
4085
4086// \returns true is a VOPD3 instruction can be also represented as a shorter
4087// VOPD encoding.
4088bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4089 const unsigned Opcode = Inst.getOpcode();
4090 const auto &II = getVOPDInstInfo(Opcode, &MII);
4091 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4092 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4093 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4094 return false;
4095
4096 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4097 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4098 // be parsed as VOPD which does not accept src2.
4099 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4100 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4101 return false;
4102
4103 // If any modifiers are set this cannot be VOPD.
4104 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4105 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4106 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4107 int I = getNamedOperandIdx(Opcode, OpName);
4108 if (I == -1)
4109 continue;
4110 if (Inst.getOperand(I).getImm())
4111 return false;
4112 }
4113
4114 return !tryVOPD3(Inst);
4115}
4116
4117// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4118// form but switch to VOPD3 otherwise.
4119bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4120 const unsigned Opcode = Inst.getOpcode();
4121 if (!isGFX1250Plus() || !isVOPD(Opcode))
4122 return false;
4123
4124 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4125 return tryVOPD(Inst);
4126 return tryVOPD3(Inst);
4127}
4128
4129bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4130
4131 const unsigned Opc = Inst.getOpcode();
4132 const MCInstrDesc &Desc = MII.get(Opc);
4133
4134 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4135 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4136 assert(ClampIdx != -1);
4137 return Inst.getOperand(ClampIdx).getImm() == 0;
4138 }
4139
4140 return true;
4141}
4142
4145
4146bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4147
4148 const unsigned Opc = Inst.getOpcode();
4149 const MCInstrDesc &Desc = MII.get(Opc);
4150
4151 if ((Desc.TSFlags & MIMGFlags) == 0)
4152 return true;
4153
4154 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4155 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4156 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4157
4158 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4159 return true;
4160
4161 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4162 return true;
4163
4164 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4165 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4166 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4167 if (DMask == 0)
4168 DMask = 1;
4169
4170 bool IsPackedD16 = false;
4171 unsigned DataSize =
4172 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4173 if (hasPackedD16()) {
4174 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4175 IsPackedD16 = D16Idx >= 0;
4176 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4177 DataSize = (DataSize + 1) / 2;
4178 }
4179
4180 if ((VDataSize / 4) == DataSize + TFESize)
4181 return true;
4182
4183 StringRef Modifiers;
4184 if (isGFX90A())
4185 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4186 else
4187 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4188
4189 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4190 return false;
4191}
4192
4193bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4194 const unsigned Opc = Inst.getOpcode();
4195 const MCInstrDesc &Desc = MII.get(Opc);
4196
4197 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4198 return true;
4199
4200 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4201
4202 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4204 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4205 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4206 ? AMDGPU::OpName::srsrc
4207 : AMDGPU::OpName::rsrc;
4208 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4209 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4210 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4211
4212 assert(VAddr0Idx != -1);
4213 assert(SrsrcIdx != -1);
4214 assert(SrsrcIdx > VAddr0Idx);
4215
4216 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4217 if (BaseOpcode->BVH) {
4218 if (IsA16 == BaseOpcode->A16)
4219 return true;
4220 Error(IDLoc, "image address size does not match a16");
4221 return false;
4222 }
4223
4224 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4225 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4226 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4227 unsigned ActualAddrSize =
4228 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4229
4230 unsigned ExpectedAddrSize =
4231 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4232
4233 if (IsNSA) {
4234 if (hasPartialNSAEncoding() &&
4235 ExpectedAddrSize >
4237 int VAddrLastIdx = SrsrcIdx - 1;
4238 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4239
4240 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4241 }
4242 } else {
4243 if (ExpectedAddrSize > 12)
4244 ExpectedAddrSize = 16;
4245
4246 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4247 // This provides backward compatibility for assembly created
4248 // before 160b/192b/224b types were directly supported.
4249 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4250 return true;
4251 }
4252
4253 if (ActualAddrSize == ExpectedAddrSize)
4254 return true;
4255
4256 Error(IDLoc, "image address size does not match dim and a16");
4257 return false;
4258}
4259
4260bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4261
4262 const unsigned Opc = Inst.getOpcode();
4263 const MCInstrDesc &Desc = MII.get(Opc);
4264
4265 if ((Desc.TSFlags & MIMGFlags) == 0)
4266 return true;
4267 if (!Desc.mayLoad() || !Desc.mayStore())
4268 return true; // Not atomic
4269
4270 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4271 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4272
4273 // This is an incomplete check because image_atomic_cmpswap
4274 // may only use 0x3 and 0xf while other atomic operations
4275 // may use 0x1 and 0x3. However these limitations are
4276 // verified when we check that dmask matches dst size.
4277 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4278}
4279
4280bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4281
4282 const unsigned Opc = Inst.getOpcode();
4283 const MCInstrDesc &Desc = MII.get(Opc);
4284
4285 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4286 return true;
4287
4288 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4289 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4290
4291 // GATHER4 instructions use dmask in a different fashion compared to
4292 // other MIMG instructions. The only useful DMASK values are
4293 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4294 // (red,red,red,red) etc.) The ISA document doesn't mention
4295 // this.
4296 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4297}
4298
4299bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4300 const OperandVector &Operands) {
4301 if (!isGFX10Plus())
4302 return true;
4303
4304 const unsigned Opc = Inst.getOpcode();
4305 const MCInstrDesc &Desc = MII.get(Opc);
4306
4307 if ((Desc.TSFlags & MIMGFlags) == 0)
4308 return true;
4309
4310 // image_bvh_intersect_ray instructions do not have dim
4312 return true;
4313
4314 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4315 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4316 if (Op.isDim())
4317 return true;
4318 }
4319 return false;
4320}
4321
4322bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4323 const unsigned Opc = Inst.getOpcode();
4324 const MCInstrDesc &Desc = MII.get(Opc);
4325
4326 if ((Desc.TSFlags & MIMGFlags) == 0)
4327 return true;
4328
4329 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4330 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4332
4333 if (!BaseOpcode->MSAA)
4334 return true;
4335
4336 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4337 assert(DimIdx != -1);
4338
4339 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4340 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4341
4342 return DimInfo->MSAA;
4343}
4344
4345static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4346{
4347 switch (Opcode) {
4348 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4349 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4350 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4351 return true;
4352 default:
4353 return false;
4354 }
4355}
4356
4357// movrels* opcodes should only allow VGPRS as src0.
4358// This is specified in .td description for vop1/vop3,
4359// but sdwa is handled differently. See isSDWAOperand.
4360bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4361 const OperandVector &Operands) {
4362
4363 const unsigned Opc = Inst.getOpcode();
4364 const MCInstrDesc &Desc = MII.get(Opc);
4365
4366 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4367 return true;
4368
4369 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4370 assert(Src0Idx != -1);
4371
4372 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4373 if (Src0.isReg()) {
4374 auto Reg = mc2PseudoReg(Src0.getReg());
4375 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4376 if (!isSGPR(Reg, TRI))
4377 return true;
4378 }
4379
4380 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4381 return false;
4382}
4383
4384bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4385 const OperandVector &Operands) {
4386
4387 const unsigned Opc = Inst.getOpcode();
4388
4389 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4390 return true;
4391
4392 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4393 assert(Src0Idx != -1);
4394
4395 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4396 if (!Src0.isReg())
4397 return true;
4398
4399 auto Reg = mc2PseudoReg(Src0.getReg());
4400 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4401 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4402 Error(getOperandLoc(Operands, Src0Idx),
4403 "source operand must be either a VGPR or an inline constant");
4404 return false;
4405 }
4406
4407 return true;
4408}
4409
4410bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4411 const OperandVector &Operands) {
4412 unsigned Opcode = Inst.getOpcode();
4413 const MCInstrDesc &Desc = MII.get(Opcode);
4414
4415 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4416 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4417 return true;
4418
4419 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4420 if (Src2Idx == -1)
4421 return true;
4422
4423 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4424 Error(getOperandLoc(Operands, Src2Idx),
4425 "inline constants are not allowed for this operand");
4426 return false;
4427 }
4428
4429 return true;
4430}
4431
4432bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4433 const OperandVector &Operands) {
4434 const unsigned Opc = Inst.getOpcode();
4435 const MCInstrDesc &Desc = MII.get(Opc);
4436
4437 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4438 return true;
4439
4440 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4441 if (BlgpIdx != -1) {
4442 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4443 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4444
4445 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4446 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4447
4448 // Validate the correct register size was used for the floating point
4449 // format operands
4450
4451 bool Success = true;
4452 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4453 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4454 Error(getOperandLoc(Operands, Src0Idx),
4455 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4456 Success = false;
4457 }
4458
4459 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4460 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4461 Error(getOperandLoc(Operands, Src1Idx),
4462 "wrong register tuple size for blgp value " + Twine(BLGP));
4463 Success = false;
4464 }
4465
4466 return Success;
4467 }
4468 }
4469
4470 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4471 if (Src2Idx == -1)
4472 return true;
4473
4474 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4475 if (!Src2.isReg())
4476 return true;
4477
4478 MCRegister Src2Reg = Src2.getReg();
4479 MCRegister DstReg = Inst.getOperand(0).getReg();
4480 if (Src2Reg == DstReg)
4481 return true;
4482
4483 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4484 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4485 .getSizeInBits() <= 128)
4486 return true;
4487
4488 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4489 Error(getOperandLoc(Operands, Src2Idx),
4490 "source 2 operand must not partially overlap with dst");
4491 return false;
4492 }
4493
4494 return true;
4495}
4496
4497bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4498 switch (Inst.getOpcode()) {
4499 default:
4500 return true;
4501 case V_DIV_SCALE_F32_gfx6_gfx7:
4502 case V_DIV_SCALE_F32_vi:
4503 case V_DIV_SCALE_F32_gfx10:
4504 case V_DIV_SCALE_F64_gfx6_gfx7:
4505 case V_DIV_SCALE_F64_vi:
4506 case V_DIV_SCALE_F64_gfx10:
4507 break;
4508 }
4509
4510 // TODO: Check that src0 = src1 or src2.
4511
4512 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4513 AMDGPU::OpName::src2_modifiers,
4514 AMDGPU::OpName::src2_modifiers}) {
4515 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4516 .getImm() &
4518 return false;
4519 }
4520 }
4521
4522 return true;
4523}
4524
4525bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4526
4527 const unsigned Opc = Inst.getOpcode();
4528 const MCInstrDesc &Desc = MII.get(Opc);
4529
4530 if ((Desc.TSFlags & MIMGFlags) == 0)
4531 return true;
4532
4533 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4534 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4535 if (isCI() || isSI())
4536 return false;
4537 }
4538
4539 return true;
4540}
4541
4542bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4543 const unsigned Opc = Inst.getOpcode();
4544 const MCInstrDesc &Desc = MII.get(Opc);
4545
4546 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4547 return true;
4548
4549 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4550
4551 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4552}
4553
4554static bool IsRevOpcode(const unsigned Opcode)
4555{
4556 switch (Opcode) {
4557 case AMDGPU::V_SUBREV_F32_e32:
4558 case AMDGPU::V_SUBREV_F32_e64:
4559 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4560 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4561 case AMDGPU::V_SUBREV_F32_e32_vi:
4562 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4563 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4564 case AMDGPU::V_SUBREV_F32_e64_vi:
4565
4566 case AMDGPU::V_SUBREV_CO_U32_e32:
4567 case AMDGPU::V_SUBREV_CO_U32_e64:
4568 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4569 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4570
4571 case AMDGPU::V_SUBBREV_U32_e32:
4572 case AMDGPU::V_SUBBREV_U32_e64:
4573 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4574 case AMDGPU::V_SUBBREV_U32_e32_vi:
4575 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4576 case AMDGPU::V_SUBBREV_U32_e64_vi:
4577
4578 case AMDGPU::V_SUBREV_U32_e32:
4579 case AMDGPU::V_SUBREV_U32_e64:
4580 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4581 case AMDGPU::V_SUBREV_U32_e32_vi:
4582 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4583 case AMDGPU::V_SUBREV_U32_e64_vi:
4584
4585 case AMDGPU::V_SUBREV_F16_e32:
4586 case AMDGPU::V_SUBREV_F16_e64:
4587 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4588 case AMDGPU::V_SUBREV_F16_e32_vi:
4589 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4590 case AMDGPU::V_SUBREV_F16_e64_vi:
4591
4592 case AMDGPU::V_SUBREV_U16_e32:
4593 case AMDGPU::V_SUBREV_U16_e64:
4594 case AMDGPU::V_SUBREV_U16_e32_vi:
4595 case AMDGPU::V_SUBREV_U16_e64_vi:
4596
4597 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4598 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4599 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4600
4601 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4602 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4603
4604 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4605 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4606
4607 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4608 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4609
4610 case AMDGPU::V_LSHRREV_B32_e32:
4611 case AMDGPU::V_LSHRREV_B32_e64:
4612 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4613 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4614 case AMDGPU::V_LSHRREV_B32_e32_vi:
4615 case AMDGPU::V_LSHRREV_B32_e64_vi:
4616 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4617 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4618
4619 case AMDGPU::V_ASHRREV_I32_e32:
4620 case AMDGPU::V_ASHRREV_I32_e64:
4621 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4622 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4623 case AMDGPU::V_ASHRREV_I32_e32_vi:
4624 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4625 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4626 case AMDGPU::V_ASHRREV_I32_e64_vi:
4627
4628 case AMDGPU::V_LSHLREV_B32_e32:
4629 case AMDGPU::V_LSHLREV_B32_e64:
4630 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4631 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4632 case AMDGPU::V_LSHLREV_B32_e32_vi:
4633 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4634 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4635 case AMDGPU::V_LSHLREV_B32_e64_vi:
4636
4637 case AMDGPU::V_LSHLREV_B16_e32:
4638 case AMDGPU::V_LSHLREV_B16_e64:
4639 case AMDGPU::V_LSHLREV_B16_e32_vi:
4640 case AMDGPU::V_LSHLREV_B16_e64_vi:
4641 case AMDGPU::V_LSHLREV_B16_gfx10:
4642
4643 case AMDGPU::V_LSHRREV_B16_e32:
4644 case AMDGPU::V_LSHRREV_B16_e64:
4645 case AMDGPU::V_LSHRREV_B16_e32_vi:
4646 case AMDGPU::V_LSHRREV_B16_e64_vi:
4647 case AMDGPU::V_LSHRREV_B16_gfx10:
4648
4649 case AMDGPU::V_ASHRREV_I16_e32:
4650 case AMDGPU::V_ASHRREV_I16_e64:
4651 case AMDGPU::V_ASHRREV_I16_e32_vi:
4652 case AMDGPU::V_ASHRREV_I16_e64_vi:
4653 case AMDGPU::V_ASHRREV_I16_gfx10:
4654
4655 case AMDGPU::V_LSHLREV_B64_e64:
4656 case AMDGPU::V_LSHLREV_B64_gfx10:
4657 case AMDGPU::V_LSHLREV_B64_vi:
4658
4659 case AMDGPU::V_LSHRREV_B64_e64:
4660 case AMDGPU::V_LSHRREV_B64_gfx10:
4661 case AMDGPU::V_LSHRREV_B64_vi:
4662
4663 case AMDGPU::V_ASHRREV_I64_e64:
4664 case AMDGPU::V_ASHRREV_I64_gfx10:
4665 case AMDGPU::V_ASHRREV_I64_vi:
4666
4667 case AMDGPU::V_PK_LSHLREV_B16:
4668 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4669 case AMDGPU::V_PK_LSHLREV_B16_vi:
4670
4671 case AMDGPU::V_PK_LSHRREV_B16:
4672 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4673 case AMDGPU::V_PK_LSHRREV_B16_vi:
4674 case AMDGPU::V_PK_ASHRREV_I16:
4675 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4676 case AMDGPU::V_PK_ASHRREV_I16_vi:
4677 return true;
4678 default:
4679 return false;
4680 }
4681}
4682
4683bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4684 const OperandVector &Operands) {
4685 using namespace SIInstrFlags;
4686 const unsigned Opcode = Inst.getOpcode();
4687 const MCInstrDesc &Desc = MII.get(Opcode);
4688
4689 // lds_direct register is defined so that it can be used
4690 // with 9-bit operands only. Ignore encodings which do not accept these.
4691 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4692 if ((Desc.TSFlags & Enc) == 0)
4693 return true;
4694
4695 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4696 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4697 if (SrcIdx == -1)
4698 break;
4699 const auto &Src = Inst.getOperand(SrcIdx);
4700 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4701
4702 if (isGFX90A() || isGFX11Plus()) {
4703 Error(getOperandLoc(Operands, SrcIdx),
4704 "lds_direct is not supported on this GPU");
4705 return false;
4706 }
4707
4708 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4709 Error(getOperandLoc(Operands, SrcIdx),
4710 "lds_direct cannot be used with this instruction");
4711 return false;
4712 }
4713
4714 if (SrcName != OpName::src0) {
4715 Error(getOperandLoc(Operands, SrcIdx),
4716 "lds_direct may be used as src0 only");
4717 return false;
4718 }
4719 }
4720 }
4721
4722 return true;
4723}
4724
4725SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4726 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4727 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4728 if (Op.isFlatOffset())
4729 return Op.getStartLoc();
4730 }
4731 return getLoc();
4732}
4733
4734bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4735 const OperandVector &Operands) {
4736 auto Opcode = Inst.getOpcode();
4737 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4738 if (OpNum == -1)
4739 return true;
4740
4741 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4742 if ((TSFlags & SIInstrFlags::FLAT))
4743 return validateFlatOffset(Inst, Operands);
4744
4745 if ((TSFlags & SIInstrFlags::SMRD))
4746 return validateSMEMOffset(Inst, Operands);
4747
4748 const auto &Op = Inst.getOperand(OpNum);
4749 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4750 if (isGFX12Plus() &&
4751 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4752 const unsigned OffsetSize = 24;
4753 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4754 Error(getFlatOffsetLoc(Operands),
4755 Twine("expected a ") + Twine(OffsetSize - 1) +
4756 "-bit unsigned offset for buffer ops");
4757 return false;
4758 }
4759 } else {
4760 const unsigned OffsetSize = 16;
4761 if (!isUIntN(OffsetSize, Op.getImm())) {
4762 Error(getFlatOffsetLoc(Operands),
4763 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4764 return false;
4765 }
4766 }
4767 return true;
4768}
4769
4770bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4771 const OperandVector &Operands) {
4772 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4773 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4774 return true;
4775
4776 auto Opcode = Inst.getOpcode();
4777 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4778 assert(OpNum != -1);
4779
4780 const auto &Op = Inst.getOperand(OpNum);
4781 if (!hasFlatOffsets() && Op.getImm() != 0) {
4782 Error(getFlatOffsetLoc(Operands),
4783 "flat offset modifier is not supported on this GPU");
4784 return false;
4785 }
4786
4787 // For pre-GFX12 FLAT instructions the offset must be positive;
4788 // MSB is ignored and forced to zero.
4789 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4790 bool AllowNegative =
4792 isGFX12Plus();
4793 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4794 Error(getFlatOffsetLoc(Operands),
4795 Twine("expected a ") +
4796 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4797 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4798 return false;
4799 }
4800
4801 return true;
4802}
4803
4804SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4805 // Start with second operand because SMEM Offset cannot be dst or src0.
4806 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4807 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4808 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4809 return Op.getStartLoc();
4810 }
4811 return getLoc();
4812}
4813
4814bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4815 const OperandVector &Operands) {
4816 if (isCI() || isSI())
4817 return true;
4818
4819 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4820 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4821 return true;
4822
4823 auto Opcode = Inst.getOpcode();
4824 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4825 if (OpNum == -1)
4826 return true;
4827
4828 const auto &Op = Inst.getOperand(OpNum);
4829 if (!Op.isImm())
4830 return true;
4831
4832 uint64_t Offset = Op.getImm();
4833 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4836 return true;
4837
4838 Error(getSMEMOffsetLoc(Operands),
4839 isGFX12Plus() && IsBuffer
4840 ? "expected a 23-bit unsigned offset for buffer ops"
4841 : isGFX12Plus() ? "expected a 24-bit signed offset"
4842 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4843 : "expected a 21-bit signed offset");
4844
4845 return false;
4846}
4847
4848bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4849 const OperandVector &Operands) {
4850 unsigned Opcode = Inst.getOpcode();
4851 const MCInstrDesc &Desc = MII.get(Opcode);
4852 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4853 return true;
4854
4855 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4856 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4857
4858 const int OpIndices[] = { Src0Idx, Src1Idx };
4859
4860 unsigned NumExprs = 0;
4861 unsigned NumLiterals = 0;
4862 int64_t LiteralValue;
4863
4864 for (int OpIdx : OpIndices) {
4865 if (OpIdx == -1) break;
4866
4867 const MCOperand &MO = Inst.getOperand(OpIdx);
4868 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4870 bool IsLit = false;
4871 std::optional<int64_t> Imm;
4872 if (MO.isImm()) {
4873 Imm = MO.getImm();
4874 } else if (MO.isExpr()) {
4875 if (isLitExpr(MO.getExpr())) {
4876 IsLit = true;
4877 Imm = getLitValue(MO.getExpr());
4878 }
4879 } else {
4880 continue;
4881 }
4882
4883 if (!Imm.has_value()) {
4884 ++NumExprs;
4885 } else if (!isInlineConstant(Inst, OpIdx)) {
4886 auto OpType = static_cast<AMDGPU::OperandType>(
4887 Desc.operands()[OpIdx].OperandType);
4888 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4889 if (NumLiterals == 0 || LiteralValue != Value) {
4891 ++NumLiterals;
4892 }
4893 }
4894 }
4895 }
4896
4897 if (NumLiterals + NumExprs <= 1)
4898 return true;
4899
4900 Error(getOperandLoc(Operands, Src1Idx),
4901 "only one unique literal operand is allowed");
4902 return false;
4903}
4904
4905bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4906 const unsigned Opc = Inst.getOpcode();
4907 if (isPermlane16(Opc)) {
4908 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4909 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4910
4911 if (OpSel & ~3)
4912 return false;
4913 }
4914
4915 uint64_t TSFlags = MII.get(Opc).TSFlags;
4916
4917 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4918 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4919 if (OpSelIdx != -1) {
4920 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4921 return false;
4922 }
4923 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4924 if (OpSelHiIdx != -1) {
4925 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4926 return false;
4927 }
4928 }
4929
4930 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4931 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4932 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4933 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4934 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4935 if (OpSel & 3)
4936 return false;
4937 }
4938
4939 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4940 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4941 // the first SGPR and use it for both the low and high operations.
4942 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4943 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4944 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4945 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4946 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4947
4948 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4949 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4950 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4951 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4952
4953 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4954
4955 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4956 unsigned Mask = 1U << Index;
4957 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4958 };
4959
4960 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4961 !VerifyOneSGPR(/*Index=*/0))
4962 return false;
4963 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4964 !VerifyOneSGPR(/*Index=*/1))
4965 return false;
4966
4967 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4968 if (Src2Idx != -1) {
4969 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4970 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4971 !VerifyOneSGPR(/*Index=*/2))
4972 return false;
4973 }
4974 }
4975
4976 return true;
4977}
4978
4979bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4980 if (!hasTrue16Insts())
4981 return true;
4982 const MCRegisterInfo *MRI = getMRI();
4983 const unsigned Opc = Inst.getOpcode();
4984 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4985 if (OpSelIdx == -1)
4986 return true;
4987 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4988 // If the value is 0 we could have a default OpSel Operand, so conservatively
4989 // allow it.
4990 if (OpSelOpValue == 0)
4991 return true;
4992 unsigned OpCount = 0;
4993 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4994 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4995 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4996 if (OpIdx == -1)
4997 continue;
4998 const MCOperand &Op = Inst.getOperand(OpIdx);
4999 if (Op.isReg() &&
5000 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
5001 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
5002 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5003 if (OpSelOpIsHi != VGPRSuffixIsHi)
5004 return false;
5005 }
5006 ++OpCount;
5007 }
5008
5009 return true;
5010}
5011
5012bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5013 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5014
5015 const unsigned Opc = Inst.getOpcode();
5016 uint64_t TSFlags = MII.get(Opc).TSFlags;
5017
5018 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5019 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5020 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5021 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5022 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5023 !(TSFlags & SIInstrFlags::IsSWMMAC))
5024 return true;
5025
5026 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5027 if (NegIdx == -1)
5028 return true;
5029
5030 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5031
5032 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5033 // on some src operands but not allowed on other.
5034 // It is convenient that such instructions don't have src_modifiers operand
5035 // for src operands that don't allow neg because they also don't allow opsel.
5036
5037 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5038 AMDGPU::OpName::src1_modifiers,
5039 AMDGPU::OpName::src2_modifiers};
5040
5041 for (unsigned i = 0; i < 3; ++i) {
5042 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5043 if (Neg & (1 << i))
5044 return false;
5045 }
5046 }
5047
5048 return true;
5049}
5050
5051bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5052 const OperandVector &Operands) {
5053 const unsigned Opc = Inst.getOpcode();
5054 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5055 if (DppCtrlIdx >= 0) {
5056 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5057
5058 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5059 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5060 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5061 // only on GFX12.
5062 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5063 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5064 : "DP ALU dpp only supports row_newbcast");
5065 return false;
5066 }
5067 }
5068
5069 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5070 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5071
5072 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5073 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5074 if (Src1Idx >= 0) {
5075 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5076 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5077 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5078 Error(getOperandLoc(Operands, Src1Idx),
5079 "invalid operand for instruction");
5080 return false;
5081 }
5082 if (Src1.isImm()) {
5083 Error(getInstLoc(Operands),
5084 "src1 immediate operand invalid for instruction");
5085 return false;
5086 }
5087 }
5088 }
5089
5090 return true;
5091}
5092
5093// Check if VCC register matches wavefront size
5094bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5095 return (Reg == AMDGPU::VCC && isWave64()) ||
5096 (Reg == AMDGPU::VCC_LO && isWave32());
5097}
5098
5099// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5100bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5101 const OperandVector &Operands) {
5102 unsigned Opcode = Inst.getOpcode();
5103 const MCInstrDesc &Desc = MII.get(Opcode);
5104 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5105 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5106 !HasMandatoryLiteral && !isVOPD(Opcode))
5107 return true;
5108
5109 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5110
5111 std::optional<unsigned> LiteralOpIdx;
5112 std::optional<uint64_t> LiteralValue;
5113
5114 for (int OpIdx : OpIndices) {
5115 if (OpIdx == -1)
5116 continue;
5117
5118 const MCOperand &MO = Inst.getOperand(OpIdx);
5119 if (!MO.isImm() && !MO.isExpr())
5120 continue;
5121 if (!isSISrcOperand(Desc, OpIdx))
5122 continue;
5123
5124 std::optional<int64_t> Imm;
5125 if (MO.isImm())
5126 Imm = MO.getImm();
5127 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5128 Imm = getLitValue(MO.getExpr());
5129
5130 bool IsAnotherLiteral = false;
5131 bool IsForcedLit64 = findMCOperand(Operands, OpIdx).isForcedLit64();
5132 if (!Imm.has_value()) {
5133 // Literal value not known, so we conservately assume it's different.
5134 IsAnotherLiteral = true;
5135 } else if (IsForcedLit64 || !isInlineConstant(Inst, OpIdx)) {
5136 uint64_t Value = *Imm;
5137 bool IsForcedFP64 =
5138 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5140 HasMandatoryLiteral);
5141 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5142 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5143 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5144
5145 if (((!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5146 !IsForcedFP64) ||
5147 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5148 (!has64BitLiterals() || Desc.getSize() != 4)) {
5149 Error(getOperandLoc(Operands, OpIdx),
5150 "invalid operand for instruction");
5151 return false;
5152 }
5153
5154 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5155 Value = Hi_32(Value);
5156
5157 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5159 }
5160
5161 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5162 !getFeatureBits()[FeatureVOP3Literal]) {
5163 Error(getOperandLoc(Operands, OpIdx),
5164 "literal operands are not supported");
5165 return false;
5166 }
5167
5168 if (LiteralOpIdx && IsAnotherLiteral) {
5169 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5170 getOperandLoc(Operands, *LiteralOpIdx)),
5171 "only one unique literal operand is allowed");
5172 return false;
5173 }
5174
5175 if (IsAnotherLiteral)
5176 LiteralOpIdx = OpIdx;
5177 }
5178
5179 return true;
5180}
5181
5182// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5183static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5184 const MCRegisterInfo *MRI) {
5185 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5186 if (OpIdx < 0)
5187 return -1;
5188
5189 const MCOperand &Op = Inst.getOperand(OpIdx);
5190 if (!Op.isReg())
5191 return -1;
5192
5193 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5194 auto Reg = Sub ? Sub : Op.getReg();
5195 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5196 return AGPR32.contains(Reg) ? 1 : 0;
5197}
5198
5199bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5200 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5201 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5203 SIInstrFlags::DS)) == 0)
5204 return true;
5205
5206 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5207 ? AMDGPU::OpName::data0
5208 : AMDGPU::OpName::vdata;
5209
5210 const MCRegisterInfo *MRI = getMRI();
5211 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5212 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5213
5214 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5215 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5216 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5217 return false;
5218 }
5219
5220 auto FB = getFeatureBits();
5221 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5222 if (DataAreg < 0 || DstAreg < 0)
5223 return true;
5224 return DstAreg == DataAreg;
5225 }
5226
5227 return DstAreg < 1 && DataAreg < 1;
5228}
5229
5230bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5231 auto FB = getFeatureBits();
5232 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5233 return true;
5234
5235 unsigned Opc = Inst.getOpcode();
5236 const MCRegisterInfo *MRI = getMRI();
5237 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5238 // unaligned VGPR. All others only allow even aligned VGPRs.
5239 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5240 return true;
5241
5242 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5243 switch (Opc) {
5244 default:
5245 break;
5246 case AMDGPU::DS_LOAD_TR6_B96:
5247 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5248 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5249 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5250 return true;
5251 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5252 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5253 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5254 // allows unaligned VGPR for vdst, but other operands still only allow
5255 // even aligned VGPRs.
5256 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5257 if (VAddrIdx != -1) {
5258 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5259 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5260 if ((Sub - AMDGPU::VGPR0) & 1)
5261 return false;
5262 }
5263 return true;
5264 }
5265 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5266 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5267 return true;
5268 }
5269 }
5270
5271 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5272 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5273 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5274 const MCOperand &Op = Inst.getOperand(I);
5275 if (!Op.isReg())
5276 continue;
5277
5278 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5279 if (!Sub)
5280 continue;
5281
5282 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5283 return false;
5284 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5285 return false;
5286 }
5287
5288 return true;
5289}
5290
5291SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5292 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5293 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5294 if (Op.isBLGP())
5295 return Op.getStartLoc();
5296 }
5297 return SMLoc();
5298}
5299
5300bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5301 const OperandVector &Operands) {
5302 unsigned Opc = Inst.getOpcode();
5303 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5304 if (BlgpIdx == -1)
5305 return true;
5306 SMLoc BLGPLoc = getBLGPLoc(Operands);
5307 if (!BLGPLoc.isValid())
5308 return true;
5309 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5310 auto FB = getFeatureBits();
5311 bool UsesNeg = false;
5312 if (FB[AMDGPU::FeatureGFX940Insts]) {
5313 switch (Opc) {
5314 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5315 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5316 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5317 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5318 UsesNeg = true;
5319 }
5320 }
5321
5322 if (IsNeg == UsesNeg)
5323 return true;
5324
5325 Error(BLGPLoc,
5326 UsesNeg ? "invalid modifier: blgp is not supported"
5327 : "invalid modifier: neg is not supported");
5328
5329 return false;
5330}
5331
5332bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5333 const OperandVector &Operands) {
5334 if (!isGFX11Plus())
5335 return true;
5336
5337 unsigned Opc = Inst.getOpcode();
5338 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5339 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5340 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5341 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5342 return true;
5343
5344 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5345 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5346 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5347 if (Reg == AMDGPU::SGPR_NULL)
5348 return true;
5349
5350 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5351 return false;
5352}
5353
5354bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5355 const OperandVector &Operands) {
5356 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5357 if ((TSFlags & SIInstrFlags::DS) == 0)
5358 return true;
5359 if (TSFlags & SIInstrFlags::GWS)
5360 return validateGWS(Inst, Operands);
5361 // Only validate GDS for non-GWS instructions.
5362 if (hasGDS())
5363 return true;
5364 int GDSIdx =
5365 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5366 if (GDSIdx < 0)
5367 return true;
5368 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5369 if (GDS) {
5370 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5371 Error(S, "gds modifier is not supported on this GPU");
5372 return false;
5373 }
5374 return true;
5375}
5376
5377// gfx90a has an undocumented limitation:
5378// DS_GWS opcodes must use even aligned registers.
5379bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5380 const OperandVector &Operands) {
5381 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5382 return true;
5383
5384 int Opc = Inst.getOpcode();
5385 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5386 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5387 return true;
5388
5389 const MCRegisterInfo *MRI = getMRI();
5390 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5391 int Data0Pos =
5392 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5393 assert(Data0Pos != -1);
5394 auto Reg = Inst.getOperand(Data0Pos).getReg();
5395 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5396 if (RegIdx & 1) {
5397 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5398 return false;
5399 }
5400
5401 return true;
5402}
5403
5404bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5405 const OperandVector &Operands,
5406 SMLoc IDLoc) {
5407 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5408 AMDGPU::OpName::cpol);
5409 if (CPolPos == -1)
5410 return true;
5411
5412 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5413
5414 if (!isGFX1250Plus()) {
5415 if (CPol & CPol::SCAL) {
5416 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5417 StringRef CStr(S.getPointer());
5418 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5419 Error(S, "scale_offset is not supported on this GPU");
5420 }
5421 if (CPol & CPol::NV) {
5422 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5423 StringRef CStr(S.getPointer());
5424 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5425 Error(S, "nv is not supported on this GPU");
5426 }
5427 }
5428
5429 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5430 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5431 StringRef CStr(S.getPointer());
5432 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5433 Error(S, "scale_offset is not supported for this instruction");
5434 }
5435
5436 if (isGFX12Plus())
5437 return validateTHAndScopeBits(Inst, Operands, CPol);
5438
5439 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5440 if (TSFlags & SIInstrFlags::SMRD) {
5441 if (CPol && (isSI() || isCI())) {
5442 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5443 Error(S, "cache policy is not supported for SMRD instructions");
5444 return false;
5445 }
5446 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5447 Error(IDLoc, "invalid cache policy for SMEM instruction");
5448 return false;
5449 }
5450 }
5451
5452 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5453 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5456 if (!(TSFlags & AllowSCCModifier)) {
5457 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5458 StringRef CStr(S.getPointer());
5459 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5460 Error(S,
5461 "scc modifier is not supported for this instruction on this GPU");
5462 return false;
5463 }
5464 }
5465
5467 return true;
5468
5469 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5470 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5471 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5472 : "instruction must use glc");
5473 return false;
5474 }
5475 } else {
5476 if (CPol & CPol::GLC) {
5477 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5478 StringRef CStr(S.getPointer());
5480 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5481 Error(S, isGFX940() ? "instruction must not use sc0"
5482 : "instruction must not use glc");
5483 return false;
5484 }
5485 }
5486
5487 return true;
5488}
5489
5490bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5491 const OperandVector &Operands,
5492 const unsigned CPol) {
5493 const unsigned TH = CPol & AMDGPU::CPol::TH;
5494 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5495
5496 const unsigned Opcode = Inst.getOpcode();
5497 const MCInstrDesc &TID = MII.get(Opcode);
5498
5499 auto PrintError = [&](StringRef Msg) {
5500 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5501 Error(S, Msg);
5502 return false;
5503 };
5504
5505 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5507 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5508
5509 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5512 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5513
5514 if (TH == 0)
5515 return true;
5516
5517 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5518 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5519 (TH == AMDGPU::CPol::TH_NT_HT)))
5520 return PrintError("invalid th value for SMEM instruction");
5521
5522 if (TH == AMDGPU::CPol::TH_BYPASS) {
5523 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5525 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5527 return PrintError("scope and th combination is not valid");
5528 }
5529
5530 unsigned THType = AMDGPU::getTemporalHintType(TID);
5531 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5532 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5533 return PrintError("invalid th value for atomic instructions");
5534 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5535 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5536 return PrintError("invalid th value for store instructions");
5537 } else {
5538 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5539 return PrintError("invalid th value for load instructions");
5540 }
5541
5542 return true;
5543}
5544
5545bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5546 const OperandVector &Operands) {
5547 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5548 if (Desc.mayStore() &&
5550 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5551 if (Loc != getInstLoc(Operands)) {
5552 Error(Loc, "TFE modifier has no meaning for store instructions");
5553 return false;
5554 }
5555 }
5556
5557 return true;
5558}
5559
5560bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5561 const OperandVector &Operands) {
5562 unsigned Opc = Inst.getOpcode();
5563 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5564 const MCInstrDesc &Desc = MII.get(Opc);
5565
5566 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5567 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5568 if (FmtIdx == -1)
5569 return true;
5570 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5571 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5572 unsigned RegSize =
5573 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5574 .getSizeInBits();
5575
5577 return true;
5578
5579 Error(getOperandLoc(Operands, SrcIdx),
5580 "wrong register tuple size for " +
5581 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5582 return false;
5583 };
5584
5585 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5586 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5587}
5588
5589bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5590 const OperandVector &Operands) {
5591 if (!validateLdsDirect(Inst, Operands))
5592 return false;
5593 if (!validateTrue16OpSel(Inst)) {
5594 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5595 "op_sel operand conflicts with 16-bit operand suffix");
5596 return false;
5597 }
5598 if (!validateSOPLiteral(Inst, Operands))
5599 return false;
5600 if (!validateVOPLiteral(Inst, Operands)) {
5601 return false;
5602 }
5603 if (!validateConstantBusLimitations(Inst, Operands)) {
5604 return false;
5605 }
5606 if (!validateVOPD(Inst, Operands)) {
5607 return false;
5608 }
5609 if (!validateIntClampSupported(Inst)) {
5610 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5611 "integer clamping is not supported on this GPU");
5612 return false;
5613 }
5614 if (!validateOpSel(Inst)) {
5615 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5616 "invalid op_sel operand");
5617 return false;
5618 }
5619 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5620 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5621 "invalid neg_lo operand");
5622 return false;
5623 }
5624 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5625 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5626 "invalid neg_hi operand");
5627 return false;
5628 }
5629 if (!validateDPP(Inst, Operands)) {
5630 return false;
5631 }
5632 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5633 if (!validateMIMGD16(Inst)) {
5634 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5635 "d16 modifier is not supported on this GPU");
5636 return false;
5637 }
5638 if (!validateMIMGDim(Inst, Operands)) {
5639 Error(IDLoc, "missing dim operand");
5640 return false;
5641 }
5642 if (!validateTensorR128(Inst)) {
5643 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5644 "instruction must set modifier r128=0");
5645 return false;
5646 }
5647 if (!validateMIMGMSAA(Inst)) {
5648 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5649 "invalid dim; must be MSAA type");
5650 return false;
5651 }
5652 if (!validateMIMGDataSize(Inst, IDLoc)) {
5653 return false;
5654 }
5655 if (!validateMIMGAddrSize(Inst, IDLoc))
5656 return false;
5657 if (!validateMIMGAtomicDMask(Inst)) {
5658 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5659 "invalid atomic image dmask");
5660 return false;
5661 }
5662 if (!validateMIMGGatherDMask(Inst)) {
5663 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5664 "invalid image_gather dmask: only one bit must be set");
5665 return false;
5666 }
5667 if (!validateMovrels(Inst, Operands)) {
5668 return false;
5669 }
5670 if (!validateOffset(Inst, Operands)) {
5671 return false;
5672 }
5673 if (!validateMAIAccWrite(Inst, Operands)) {
5674 return false;
5675 }
5676 if (!validateMAISrc2(Inst, Operands)) {
5677 return false;
5678 }
5679 if (!validateMFMA(Inst, Operands)) {
5680 return false;
5681 }
5682 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5683 return false;
5684 }
5685
5686 if (!validateAGPRLdSt(Inst)) {
5687 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5688 ? "invalid register class: data and dst should be all VGPR or AGPR"
5689 : "invalid register class: agpr loads and stores not supported on this GPU"
5690 );
5691 return false;
5692 }
5693 if (!validateVGPRAlign(Inst)) {
5694 Error(IDLoc,
5695 "invalid register class: vgpr tuples must be 64 bit aligned");
5696 return false;
5697 }
5698 if (!validateDS(Inst, Operands)) {
5699 return false;
5700 }
5701
5702 if (!validateBLGP(Inst, Operands)) {
5703 return false;
5704 }
5705
5706 if (!validateDivScale(Inst)) {
5707 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5708 return false;
5709 }
5710 if (!validateWaitCnt(Inst, Operands)) {
5711 return false;
5712 }
5713 if (!validateTFE(Inst, Operands)) {
5714 return false;
5715 }
5716 if (!validateWMMA(Inst, Operands)) {
5717 return false;
5718 }
5719
5720 return true;
5721}
5722
5724 const FeatureBitset &FBS,
5725 unsigned VariantID = 0);
5726
5727static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5728 const FeatureBitset &AvailableFeatures,
5729 unsigned VariantID);
5730
5731bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5732 const FeatureBitset &FBS) {
5733 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5734}
5735
5736bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5737 const FeatureBitset &FBS,
5738 ArrayRef<unsigned> Variants) {
5739 for (auto Variant : Variants) {
5740 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5741 return true;
5742 }
5743
5744 return false;
5745}
5746
5747bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5748 SMLoc IDLoc) {
5749 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5750
5751 // Check if requested instruction variant is supported.
5752 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5753 return false;
5754
5755 // This instruction is not supported.
5756 // Clear any other pending errors because they are no longer relevant.
5757 getParser().clearPendingErrors();
5758
5759 // Requested instruction variant is not supported.
5760 // Check if any other variants are supported.
5761 StringRef VariantName = getMatchedVariantName();
5762 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5763 return Error(IDLoc,
5764 Twine(VariantName,
5765 " variant of this instruction is not supported"));
5766 }
5767
5768 // Check if this instruction may be used with a different wavesize.
5769 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5770 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5771 // FIXME: Use getAvailableFeatures, and do not manually recompute
5772 FeatureBitset FeaturesWS32 = getFeatureBits();
5773 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5774 .flip(AMDGPU::FeatureWavefrontSize32);
5775 FeatureBitset AvailableFeaturesWS32 =
5776 ComputeAvailableFeatures(FeaturesWS32);
5777
5778 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5779 return Error(IDLoc, "instruction requires wavesize=32");
5780 }
5781
5782 // Finally check if this instruction is supported on any other GPU.
5783 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5784 return Error(IDLoc, "instruction not supported on this GPU (" +
5785 getSTI().getCPU() + ")" + ": " + Mnemo);
5786 }
5787
5788 // Instruction not supported on any GPU. Probably a typo.
5789 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5790 return Error(IDLoc, "invalid instruction" + Suggestion);
5791}
5792
5793static bool isInvalidVOPDY(const OperandVector &Operands,
5794 uint64_t InvalidOprIdx) {
5795 assert(InvalidOprIdx < Operands.size());
5796 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5797 if (Op.isToken() && InvalidOprIdx > 1) {
5798 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5799 return PrevOp.isToken() && PrevOp.getToken() == "::";
5800 }
5801 return false;
5802}
5803
5804bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5805 OperandVector &Operands,
5806 MCStreamer &Out,
5807 uint64_t &ErrorInfo,
5808 bool MatchingInlineAsm) {
5809 MCInst Inst;
5810 Inst.setLoc(IDLoc);
5811 unsigned Result = Match_Success;
5812 for (auto Variant : getMatchedVariants()) {
5813 uint64_t EI;
5814 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5815 Variant);
5816 // We order match statuses from least to most specific. We use most specific
5817 // status as resulting
5818 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5819 if (R == Match_Success || R == Match_MissingFeature ||
5820 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5821 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5822 Result != Match_MissingFeature)) {
5823 Result = R;
5824 ErrorInfo = EI;
5825 }
5826 if (R == Match_Success)
5827 break;
5828 }
5829
5830 if (Result == Match_Success) {
5831 if (!validateInstruction(Inst, IDLoc, Operands)) {
5832 return true;
5833 }
5834 Out.emitInstruction(Inst, getSTI());
5835 return false;
5836 }
5837
5838 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5839 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5840 return true;
5841 }
5842
5843 switch (Result) {
5844 default: break;
5845 case Match_MissingFeature:
5846 // It has been verified that the specified instruction
5847 // mnemonic is valid. A match was found but it requires
5848 // features which are not supported on this GPU.
5849 return Error(IDLoc, "operands are not valid for this GPU or mode");
5850
5851 case Match_InvalidOperand: {
5852 SMLoc ErrorLoc = IDLoc;
5853 if (ErrorInfo != ~0ULL) {
5854 if (ErrorInfo >= Operands.size()) {
5855 return Error(IDLoc, "too few operands for instruction");
5856 }
5857 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5858 if (ErrorLoc == SMLoc())
5859 ErrorLoc = IDLoc;
5860
5861 if (isInvalidVOPDY(Operands, ErrorInfo))
5862 return Error(ErrorLoc, "invalid VOPDY instruction");
5863 }
5864 return Error(ErrorLoc, "invalid operand for instruction");
5865 }
5866
5867 case Match_MnemonicFail:
5868 llvm_unreachable("Invalid instructions should have been handled already");
5869 }
5870 llvm_unreachable("Implement any new match types added!");
5871}
5872
5873bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5874 int64_t Tmp = -1;
5875 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5876 return true;
5877 }
5878 if (getParser().parseAbsoluteExpression(Tmp)) {
5879 return true;
5880 }
5881 Ret = static_cast<uint32_t>(Tmp);
5882 return false;
5883}
5884
5885bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5886 if (!getSTI().getTargetTriple().isAMDGCN())
5887 return TokError("directive only supported for amdgcn architecture");
5888
5889 std::string TargetIDDirective;
5890 SMLoc TargetStart = getTok().getLoc();
5891 if (getParser().parseEscapedString(TargetIDDirective))
5892 return true;
5893
5894 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5895 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5896 return getParser().Error(TargetRange.Start,
5897 (Twine(".amdgcn_target directive's target id ") +
5898 Twine(TargetIDDirective) +
5899 Twine(" does not match the specified target id ") +
5900 Twine(getTargetStreamer().getTargetID()->toString())).str());
5901
5902 return false;
5903}
5904
5905bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5906 return Error(Range.Start, "value out of range", Range);
5907}
5908
5909bool AMDGPUAsmParser::calculateGPRBlocks(
5910 const FeatureBitset &Features, const MCExpr *VCCUsed,
5911 const MCExpr *FlatScrUsed, bool XNACKUsed,
5912 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5913 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5914 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5915 // TODO(scott.linder): These calculations are duplicated from
5916 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5917 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5918 MCContext &Ctx = getContext();
5919
5920 const MCExpr *NumSGPRs = NextFreeSGPR;
5921 int64_t EvaluatedSGPRs;
5922
5923 if (Version.Major >= 10)
5925 else {
5926 unsigned MaxAddressableNumSGPRs =
5928
5929 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5930 !Features.test(FeatureSGPRInitBug) &&
5931 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5932 return OutOfRangeError(SGPRRange);
5933
5934 const MCExpr *ExtraSGPRs =
5935 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5936 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5937
5938 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5939 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5940 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5941 return OutOfRangeError(SGPRRange);
5942
5943 if (Features.test(FeatureSGPRInitBug))
5944 NumSGPRs =
5946 }
5947
5948 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5949 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5950 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5951 unsigned Granule) -> const MCExpr * {
5952 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5953 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5954 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5955 const MCExpr *AlignToGPR =
5956 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5957 const MCExpr *DivGPR =
5958 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5959 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5960 return SubGPR;
5961 };
5962
5963 VGPRBlocks = GetNumGPRBlocks(
5964 NextFreeVGPR,
5965 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5966 SGPRBlocks =
5967 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5968
5969 return false;
5970}
5971
5972bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5973 if (!getSTI().getTargetTriple().isAMDGCN())
5974 return TokError("directive only supported for amdgcn architecture");
5975
5976 if (!isHsaAbi(getSTI()))
5977 return TokError("directive only supported for amdhsa OS");
5978
5979 StringRef KernelName;
5980 if (getParser().parseIdentifier(KernelName))
5981 return true;
5982
5983 AMDGPU::MCKernelDescriptor KD =
5985 &getSTI(), getContext());
5986
5987 StringSet<> Seen;
5988
5989 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5990
5991 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5992 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5993
5994 SMRange VGPRRange;
5995 const MCExpr *NextFreeVGPR = ZeroExpr;
5996 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5997 const MCExpr *NamedBarCnt = ZeroExpr;
5998 uint64_t SharedVGPRCount = 0;
5999 uint64_t PreloadLength = 0;
6000 uint64_t PreloadOffset = 0;
6001 SMRange SGPRRange;
6002 const MCExpr *NextFreeSGPR = ZeroExpr;
6003
6004 // Count the number of user SGPRs implied from the enabled feature bits.
6005 unsigned ImpliedUserSGPRCount = 0;
6006
6007 // Track if the asm explicitly contains the directive for the user SGPR
6008 // count.
6009 std::optional<unsigned> ExplicitUserSGPRCount;
6010 const MCExpr *ReserveVCC = OneExpr;
6011 const MCExpr *ReserveFlatScr = OneExpr;
6012 std::optional<bool> EnableWavefrontSize32;
6013
6014 while (true) {
6015 while (trySkipToken(AsmToken::EndOfStatement));
6016
6017 StringRef ID;
6018 SMRange IDRange = getTok().getLocRange();
6019 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6020 return true;
6021
6022 if (ID == ".end_amdhsa_kernel")
6023 break;
6024
6025 if (!Seen.insert(ID).second)
6026 return TokError(".amdhsa_ directives cannot be repeated");
6027
6028 SMLoc ValStart = getLoc();
6029 const MCExpr *ExprVal;
6030 if (getParser().parseExpression(ExprVal))
6031 return true;
6032 SMLoc ValEnd = getLoc();
6033 SMRange ValRange = SMRange(ValStart, ValEnd);
6034
6035 int64_t IVal = 0;
6036 uint64_t Val = IVal;
6037 bool EvaluatableExpr;
6038 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6039 if (IVal < 0)
6040 return OutOfRangeError(ValRange);
6041 Val = IVal;
6042 }
6043
6044#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6045 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6046 return OutOfRangeError(RANGE); \
6047 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6048 getContext());
6049
6050// Some fields use the parsed value immediately which requires the expression to
6051// be solvable.
6052#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6053 if (!(RESOLVED)) \
6054 return Error(IDRange.Start, "directive should have resolvable expression", \
6055 IDRange);
6056
6057 if (ID == ".amdhsa_group_segment_fixed_size") {
6059 CHAR_BIT>(Val))
6060 return OutOfRangeError(ValRange);
6061 KD.group_segment_fixed_size = ExprVal;
6062 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6064 CHAR_BIT>(Val))
6065 return OutOfRangeError(ValRange);
6066 KD.private_segment_fixed_size = ExprVal;
6067 } else if (ID == ".amdhsa_kernarg_size") {
6068 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6069 return OutOfRangeError(ValRange);
6070 KD.kernarg_size = ExprVal;
6071 } else if (ID == ".amdhsa_user_sgpr_count") {
6072 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6073 ExplicitUserSGPRCount = Val;
6074 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6075 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6077 return Error(IDRange.Start,
6078 "directive is not supported with architected flat scratch",
6079 IDRange);
6081 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6082 ExprVal, ValRange);
6083 if (Val)
6084 ImpliedUserSGPRCount += 4;
6085 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6086 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6087 if (!hasKernargPreload())
6088 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6089
6090 if (Val > getMaxNumUserSGPRs())
6091 return OutOfRangeError(ValRange);
6092 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6093 ValRange);
6094 if (Val) {
6095 ImpliedUserSGPRCount += Val;
6096 PreloadLength = Val;
6097 }
6098 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6099 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6100 if (!hasKernargPreload())
6101 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6102
6103 if (Val >= 1024)
6104 return OutOfRangeError(ValRange);
6105 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6106 ValRange);
6107 if (Val)
6108 PreloadOffset = Val;
6109 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6110 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6112 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6113 ValRange);
6114 if (Val)
6115 ImpliedUserSGPRCount += 2;
6116 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6117 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6119 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6120 ValRange);
6121 if (Val)
6122 ImpliedUserSGPRCount += 2;
6123 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6124 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6126 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6127 ExprVal, ValRange);
6128 if (Val)
6129 ImpliedUserSGPRCount += 2;
6130 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6131 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6133 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6134 ValRange);
6135 if (Val)
6136 ImpliedUserSGPRCount += 2;
6137 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6139 return Error(IDRange.Start,
6140 "directive is not supported with architected flat scratch",
6141 IDRange);
6142 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6144 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6145 ExprVal, ValRange);
6146 if (Val)
6147 ImpliedUserSGPRCount += 2;
6148 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6149 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6151 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6152 ExprVal, ValRange);
6153 if (Val)
6154 ImpliedUserSGPRCount += 1;
6155 } else if (ID == ".amdhsa_wavefront_size32") {
6156 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6157 if (IVersion.Major < 10)
6158 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6159 EnableWavefrontSize32 = Val;
6161 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6162 ValRange);
6163 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6165 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6166 ValRange);
6167 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6169 return Error(IDRange.Start,
6170 "directive is not supported with architected flat scratch",
6171 IDRange);
6173 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6174 ValRange);
6175 } else if (ID == ".amdhsa_enable_private_segment") {
6177 return Error(
6178 IDRange.Start,
6179 "directive is not supported without architected flat scratch",
6180 IDRange);
6182 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6183 ValRange);
6184 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6186 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6187 ValRange);
6188 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6190 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6191 ValRange);
6192 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6194 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6195 ValRange);
6196 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6198 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6199 ValRange);
6200 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6202 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6203 ValRange);
6204 } else if (ID == ".amdhsa_next_free_vgpr") {
6205 VGPRRange = ValRange;
6206 NextFreeVGPR = ExprVal;
6207 } else if (ID == ".amdhsa_next_free_sgpr") {
6208 SGPRRange = ValRange;
6209 NextFreeSGPR = ExprVal;
6210 } else if (ID == ".amdhsa_accum_offset") {
6211 if (!isGFX90A())
6212 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6213 AccumOffset = ExprVal;
6214 } else if (ID == ".amdhsa_named_barrier_count") {
6215 if (!isGFX1250Plus())
6216 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6217 NamedBarCnt = ExprVal;
6218 } else if (ID == ".amdhsa_reserve_vcc") {
6219 if (EvaluatableExpr && !isUInt<1>(Val))
6220 return OutOfRangeError(ValRange);
6221 ReserveVCC = ExprVal;
6222 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6223 if (IVersion.Major < 7)
6224 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6226 return Error(IDRange.Start,
6227 "directive is not supported with architected flat scratch",
6228 IDRange);
6229 if (EvaluatableExpr && !isUInt<1>(Val))
6230 return OutOfRangeError(ValRange);
6231 ReserveFlatScr = ExprVal;
6232 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6233 if (IVersion.Major < 8)
6234 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6235 if (!isUInt<1>(Val))
6236 return OutOfRangeError(ValRange);
6237 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6238 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6239 IDRange);
6240 } else if (ID == ".amdhsa_float_round_mode_32") {
6242 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6243 ValRange);
6244 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6246 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6247 ValRange);
6248 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6250 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6251 ValRange);
6252 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6254 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6255 ValRange);
6256 } else if (ID == ".amdhsa_dx10_clamp") {
6257 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6258 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6259 IDRange);
6261 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6262 ValRange);
6263 } else if (ID == ".amdhsa_ieee_mode") {
6264 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6265 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6266 IDRange);
6268 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6269 ValRange);
6270 } else if (ID == ".amdhsa_fp16_overflow") {
6271 if (IVersion.Major < 9)
6272 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6274 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6275 ValRange);
6276 } else if (ID == ".amdhsa_tg_split") {
6277 if (!isGFX90A())
6278 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6279 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6280 ExprVal, ValRange);
6281 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6282 if (!supportsWGP(getSTI()))
6283 return Error(IDRange.Start,
6284 "directive unsupported on " + getSTI().getCPU(), IDRange);
6286 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6287 ValRange);
6288 } else if (ID == ".amdhsa_memory_ordered") {
6289 if (IVersion.Major < 10)
6290 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6292 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6293 ValRange);
6294 } else if (ID == ".amdhsa_forward_progress") {
6295 if (IVersion.Major < 10)
6296 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6298 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6299 ValRange);
6300 } else if (ID == ".amdhsa_shared_vgpr_count") {
6301 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6302 if (IVersion.Major < 10 || IVersion.Major >= 12)
6303 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6304 IDRange);
6305 SharedVGPRCount = Val;
6307 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6308 ValRange);
6309 } else if (ID == ".amdhsa_inst_pref_size") {
6310 if (IVersion.Major < 11)
6311 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6312 if (IVersion.Major == 11) {
6314 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6315 ValRange);
6316 } else {
6318 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6319 ValRange);
6320 }
6321 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6324 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6325 ExprVal, ValRange);
6326 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6328 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6329 ExprVal, ValRange);
6330 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6333 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6334 ExprVal, ValRange);
6335 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6337 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6338 ExprVal, ValRange);
6339 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6341 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6342 ExprVal, ValRange);
6343 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6345 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6346 ExprVal, ValRange);
6347 } else if (ID == ".amdhsa_exception_int_div_zero") {
6349 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6350 ExprVal, ValRange);
6351 } else if (ID == ".amdhsa_round_robin_scheduling") {
6352 if (IVersion.Major < 12)
6353 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6355 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6356 ValRange);
6357 } else {
6358 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6359 }
6360
6361#undef PARSE_BITS_ENTRY
6362 }
6363
6364 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6365 return TokError(".amdhsa_next_free_vgpr directive is required");
6366
6367 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6368 return TokError(".amdhsa_next_free_sgpr directive is required");
6369
6370 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6371 if (UserSGPRCount > getMaxNumUserSGPRs())
6372 return TokError("too many user SGPRs enabled, found " +
6373 Twine(UserSGPRCount) + ", but only " +
6374 Twine(getMaxNumUserSGPRs()) + " are supported.");
6375
6376 // Consider the case where the total number of UserSGPRs with trailing
6377 // allocated preload SGPRs, is greater than the number of explicitly
6378 // referenced SGPRs.
6379 if (PreloadLength) {
6380 MCContext &Ctx = getContext();
6381 NextFreeSGPR = AMDGPUMCExpr::createMax(
6382 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6383 }
6384
6385 const MCExpr *VGPRBlocks;
6386 const MCExpr *SGPRBlocks;
6387 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6388 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6389 EnableWavefrontSize32, NextFreeVGPR,
6390 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6391 SGPRBlocks))
6392 return true;
6393
6394 int64_t EvaluatedVGPRBlocks;
6395 bool VGPRBlocksEvaluatable =
6396 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6397 if (VGPRBlocksEvaluatable &&
6399 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6400 return OutOfRangeError(VGPRRange);
6401 }
6403 KD.compute_pgm_rsrc1, VGPRBlocks,
6404 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6405 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6406
6407 int64_t EvaluatedSGPRBlocks;
6408 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6410 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6411 return OutOfRangeError(SGPRRange);
6413 KD.compute_pgm_rsrc1, SGPRBlocks,
6414 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6415 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6416
6417 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6418 return TokError("amdgpu_user_sgpr_count smaller than implied by "
6419 "enabled user SGPRs");
6420
6421 if (isGFX1250Plus()) {
6424 MCConstantExpr::create(UserSGPRCount, getContext()),
6425 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6426 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6427 } else {
6430 MCConstantExpr::create(UserSGPRCount, getContext()),
6431 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6432 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6433 }
6434
6435 int64_t IVal = 0;
6436 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6437 return TokError("Kernarg size should be resolvable");
6438 uint64_t kernarg_size = IVal;
6439 if (PreloadLength && kernarg_size &&
6440 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6441 return TokError("Kernarg preload length + offset is larger than the "
6442 "kernarg segment size");
6443
6444 if (isGFX90A()) {
6445 if (!Seen.contains(".amdhsa_accum_offset"))
6446 return TokError(".amdhsa_accum_offset directive is required");
6447 int64_t EvaluatedAccum;
6448 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6449 uint64_t UEvaluatedAccum = EvaluatedAccum;
6450 if (AccumEvaluatable &&
6451 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6452 return TokError("accum_offset should be in range [4..256] in "
6453 "increments of 4");
6454
6455 int64_t EvaluatedNumVGPR;
6456 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6457 AccumEvaluatable &&
6458 UEvaluatedAccum >
6459 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6460 return TokError("accum_offset exceeds total VGPR allocation");
6461 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6463 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6466 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6467 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6468 getContext());
6469 }
6470
6471 if (isGFX1250Plus())
6473 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6474 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6475 getContext());
6476
6477 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6478 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6479 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6480 return TokError("shared_vgpr_count directive not valid on "
6481 "wavefront size 32");
6482 }
6483
6484 if (VGPRBlocksEvaluatable &&
6485 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6486 63)) {
6487 return TokError("shared_vgpr_count*2 + "
6488 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6489 "exceed 63\n");
6490 }
6491 }
6492
6493 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6494 NextFreeVGPR, NextFreeSGPR,
6495 ReserveVCC, ReserveFlatScr);
6496 return false;
6497}
6498
6499bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6500 uint32_t Version;
6501 if (ParseAsAbsoluteExpression(Version))
6502 return true;
6503
6504 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6505 return false;
6506}
6507
6508bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6509 AMDGPUMCKernelCodeT &C) {
6510 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6511 // assembly for backwards compatibility.
6512 if (ID == "max_scratch_backing_memory_byte_size") {
6513 Parser.eatToEndOfStatement();
6514 return false;
6515 }
6516
6517 SmallString<40> ErrStr;
6518 raw_svector_ostream Err(ErrStr);
6519 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6520 return TokError(Err.str());
6521 }
6522 Lex();
6523
6524 if (ID == "enable_wavefront_size32") {
6525 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6526 if (!isGFX10Plus())
6527 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6528 if (!isWave32())
6529 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6530 } else {
6531 if (!isWave64())
6532 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6533 }
6534 }
6535
6536 if (ID == "wavefront_size") {
6537 if (C.wavefront_size == 5) {
6538 if (!isGFX10Plus())
6539 return TokError("wavefront_size=5 is only allowed on GFX10+");
6540 if (!isWave32())
6541 return TokError("wavefront_size=5 requires +WavefrontSize32");
6542 } else if (C.wavefront_size == 6) {
6543 if (!isWave64())
6544 return TokError("wavefront_size=6 requires +WavefrontSize64");
6545 }
6546 }
6547
6548 return false;
6549}
6550
6551bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6552 AMDGPUMCKernelCodeT KernelCode;
6553 KernelCode.initDefault(&getSTI(), getContext());
6554
6555 while (true) {
6556 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6557 // will set the current token to EndOfStatement.
6558 while(trySkipToken(AsmToken::EndOfStatement));
6559
6560 StringRef ID;
6561 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6562 return true;
6563
6564 if (ID == ".end_amd_kernel_code_t")
6565 break;
6566
6567 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6568 return true;
6569 }
6570
6571 KernelCode.validate(&getSTI(), getContext());
6572 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6573
6574 return false;
6575}
6576
6577bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6578 StringRef KernelName;
6579 if (!parseId(KernelName, "expected symbol name"))
6580 return true;
6581
6582 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6584
6585 KernelScope.initialize(getContext());
6586 return false;
6587}
6588
6589bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6590 if (!getSTI().getTargetTriple().isAMDGCN()) {
6591 return Error(getLoc(),
6592 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6593 "architectures");
6594 }
6595
6596 auto TargetIDDirective = getLexer().getTok().getStringContents();
6597 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6598 return Error(getParser().getTok().getLoc(), "target id must match options");
6599
6600 getTargetStreamer().EmitISAVersion();
6601 Lex();
6602
6603 return false;
6604}
6605
6606bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6607 assert(isHsaAbi(getSTI()));
6608
6609 std::string HSAMetadataString;
6610 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6611 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6612 return true;
6613
6614 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6615 return Error(getLoc(), "invalid HSA metadata");
6616
6617 return false;
6618}
6619
6620/// Common code to parse out a block of text (typically YAML) between start and
6621/// end directives.
6622bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6623 const char *AssemblerDirectiveEnd,
6624 std::string &CollectString) {
6625
6626 raw_string_ostream CollectStream(CollectString);
6627
6628 getLexer().setSkipSpace(false);
6629
6630 bool FoundEnd = false;
6631 while (!isToken(AsmToken::Eof)) {
6632 while (isToken(AsmToken::Space)) {
6633 CollectStream << getTokenStr();
6634 Lex();
6635 }
6636
6637 if (trySkipId(AssemblerDirectiveEnd)) {
6638 FoundEnd = true;
6639 break;
6640 }
6641
6642 CollectStream << Parser.parseStringToEndOfStatement()
6643 << getContext().getAsmInfo().getSeparatorString();
6644
6645 Parser.eatToEndOfStatement();
6646 }
6647
6648 getLexer().setSkipSpace(true);
6649
6650 if (isToken(AsmToken::Eof) && !FoundEnd) {
6651 return TokError(Twine("expected directive ") +
6652 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6653 }
6654
6655 return false;
6656}
6657
6658/// Parse the assembler directive for new MsgPack-format PAL metadata.
6659bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6660 std::string String;
6661 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6663 return true;
6664
6665 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6666 if (!PALMetadata->setFromString(String))
6667 return Error(getLoc(), "invalid PAL metadata");
6668 return false;
6669}
6670
6671/// Parse the assembler directive for old linear-format PAL metadata.
6672bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6673 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6674 return Error(getLoc(),
6675 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6676 "not available on non-amdpal OSes")).str());
6677 }
6678
6679 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6680 PALMetadata->setLegacy();
6681 for (;;) {
6682 uint32_t Key, Value;
6683 if (ParseAsAbsoluteExpression(Key)) {
6684 return TokError(Twine("invalid value in ") +
6686 }
6687 if (!trySkipToken(AsmToken::Comma)) {
6688 return TokError(Twine("expected an even number of values in ") +
6690 }
6691 if (ParseAsAbsoluteExpression(Value)) {
6692 return TokError(Twine("invalid value in ") +
6694 }
6695 PALMetadata->setRegister(Key, Value);
6696 if (!trySkipToken(AsmToken::Comma))
6697 break;
6698 }
6699 return false;
6700}
6701
6702/// ParseDirectiveAMDGPULDS
6703/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6704bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6705 if (getParser().checkForValidSection())
6706 return true;
6707
6708 StringRef Name;
6709 SMLoc NameLoc = getLoc();
6710 if (getParser().parseIdentifier(Name))
6711 return TokError("expected identifier in directive");
6712
6713 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6714 if (getParser().parseComma())
6715 return true;
6716
6717 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6718
6719 int64_t Size;
6720 SMLoc SizeLoc = getLoc();
6721 if (getParser().parseAbsoluteExpression(Size))
6722 return true;
6723 if (Size < 0)
6724 return Error(SizeLoc, "size must be non-negative");
6725 if (Size > LocalMemorySize)
6726 return Error(SizeLoc, "size is too large");
6727
6728 int64_t Alignment = 4;
6729 if (trySkipToken(AsmToken::Comma)) {
6730 SMLoc AlignLoc = getLoc();
6731 if (getParser().parseAbsoluteExpression(Alignment))
6732 return true;
6733 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6734 return Error(AlignLoc, "alignment must be a power of two");
6735
6736 // Alignment larger than the size of LDS is possible in theory, as long
6737 // as the linker manages to place to symbol at address 0, but we do want
6738 // to make sure the alignment fits nicely into a 32-bit integer.
6739 if (Alignment >= 1u << 31)
6740 return Error(AlignLoc, "alignment is too large");
6741 }
6742
6743 if (parseEOL())
6744 return true;
6745
6746 Symbol->redefineIfPossible();
6747 if (!Symbol->isUndefined())
6748 return Error(NameLoc, "invalid symbol redefinition");
6749
6750 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6751 return false;
6752}
6753
6754bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6755 StringRef IDVal = DirectiveID.getString();
6756
6757 if (isHsaAbi(getSTI())) {
6758 if (IDVal == ".amdhsa_kernel")
6759 return ParseDirectiveAMDHSAKernel();
6760
6761 if (IDVal == ".amdhsa_code_object_version")
6762 return ParseDirectiveAMDHSACodeObjectVersion();
6763
6764 // TODO: Restructure/combine with PAL metadata directive.
6766 return ParseDirectiveHSAMetadata();
6767 } else {
6768 if (IDVal == ".amd_kernel_code_t")
6769 return ParseDirectiveAMDKernelCodeT();
6770
6771 if (IDVal == ".amdgpu_hsa_kernel")
6772 return ParseDirectiveAMDGPUHsaKernel();
6773
6774 if (IDVal == ".amd_amdgpu_isa")
6775 return ParseDirectiveISAVersion();
6776
6778 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6779 Twine(" directive is "
6780 "not available on non-amdhsa OSes"))
6781 .str());
6782 }
6783 }
6784
6785 if (IDVal == ".amdgcn_target")
6786 return ParseDirectiveAMDGCNTarget();
6787
6788 if (IDVal == ".amdgpu_lds")
6789 return ParseDirectiveAMDGPULDS();
6790
6791 if (IDVal == PALMD::AssemblerDirectiveBegin)
6792 return ParseDirectivePALMetadataBegin();
6793
6794 if (IDVal == PALMD::AssemblerDirective)
6795 return ParseDirectivePALMetadata();
6796
6797 return true;
6798}
6799
6800bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6801 MCRegister Reg) {
6802 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6803 return isGFX9Plus();
6804
6805 // GFX10+ has 2 more SGPRs 104 and 105.
6806 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6807 return hasSGPR104_SGPR105();
6808
6809 switch (Reg.id()) {
6810 case SRC_SHARED_BASE_LO:
6811 case SRC_SHARED_BASE:
6812 case SRC_SHARED_LIMIT_LO:
6813 case SRC_SHARED_LIMIT:
6814 case SRC_PRIVATE_BASE_LO:
6815 case SRC_PRIVATE_BASE:
6816 case SRC_PRIVATE_LIMIT_LO:
6817 case SRC_PRIVATE_LIMIT:
6818 return isGFX9Plus();
6819 case SRC_FLAT_SCRATCH_BASE_LO:
6820 case SRC_FLAT_SCRATCH_BASE_HI:
6821 return hasGloballyAddressableScratch();
6822 case SRC_POPS_EXITING_WAVE_ID:
6823 return isGFX9Plus() && !isGFX11Plus();
6824 case TBA:
6825 case TBA_LO:
6826 case TBA_HI:
6827 case TMA:
6828 case TMA_LO:
6829 case TMA_HI:
6830 return !isGFX9Plus();
6831 case XNACK_MASK:
6832 case XNACK_MASK_LO:
6833 case XNACK_MASK_HI:
6834 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6835 case SGPR_NULL:
6836 return isGFX10Plus();
6837 case SRC_EXECZ:
6838 case SRC_VCCZ:
6839 return !isGFX11Plus();
6840 default:
6841 break;
6842 }
6843
6844 if (isCI())
6845 return true;
6846
6847 if (isSI() || isGFX10Plus()) {
6848 // No flat_scr on SI.
6849 // On GFX10Plus flat scratch is not a valid register operand and can only be
6850 // accessed with s_setreg/s_getreg.
6851 switch (Reg.id()) {
6852 case FLAT_SCR:
6853 case FLAT_SCR_LO:
6854 case FLAT_SCR_HI:
6855 return false;
6856 default:
6857 return true;
6858 }
6859 }
6860
6861 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6862 // SI/CI have.
6863 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6864 return hasSGPR102_SGPR103();
6865
6866 return true;
6867}
6868
6869ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6870 StringRef Mnemonic,
6871 OperandMode Mode) {
6872 ParseStatus Res = parseVOPD(Operands);
6873 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6874 return Res;
6875
6876 // Try to parse with a custom parser
6877 Res = MatchOperandParserImpl(Operands, Mnemonic);
6878
6879 // If we successfully parsed the operand or if there as an error parsing,
6880 // we are done.
6881 //
6882 // If we are parsing after we reach EndOfStatement then this means we
6883 // are appending default values to the Operands list. This is only done
6884 // by custom parser, so we shouldn't continue on to the generic parsing.
6885 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6886 return Res;
6887
6888 SMLoc RBraceLoc;
6889 SMLoc LBraceLoc = getLoc();
6890 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6891 unsigned Prefix = Operands.size();
6892
6893 for (;;) {
6894 auto Loc = getLoc();
6895 Res = parseReg(Operands);
6896 if (Res.isNoMatch())
6897 Error(Loc, "expected a register");
6898 if (!Res.isSuccess())
6899 return ParseStatus::Failure;
6900
6901 RBraceLoc = getLoc();
6902 if (trySkipToken(AsmToken::RBrac))
6903 break;
6904
6905 if (!skipToken(AsmToken::Comma,
6906 "expected a comma or a closing square bracket"))
6907 return ParseStatus::Failure;
6908 }
6909
6910 if (Operands.size() - Prefix > 1) {
6911 Operands.insert(Operands.begin() + Prefix,
6912 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6913 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6914 }
6915
6916 return ParseStatus::Success;
6917 }
6918
6919 return parseRegOrImm(Operands);
6920}
6921
6922StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6923 // Clear any forced encodings from the previous instruction.
6924 setForcedEncodingSize(0);
6925 setForcedDPP(false);
6926 setForcedSDWA(false);
6927
6928 if (Name.consume_back("_e64_dpp")) {
6929 setForcedDPP(true);
6930 setForcedEncodingSize(64);
6931 return Name;
6932 }
6933 if (Name.consume_back("_e64")) {
6934 setForcedEncodingSize(64);
6935 return Name;
6936 }
6937 if (Name.consume_back("_e32")) {
6938 setForcedEncodingSize(32);
6939 return Name;
6940 }
6941 if (Name.consume_back("_dpp")) {
6942 setForcedDPP(true);
6943 return Name;
6944 }
6945 if (Name.consume_back("_sdwa")) {
6946 setForcedSDWA(true);
6947 return Name;
6948 }
6949 return Name;
6950}
6951
6952static void applyMnemonicAliases(StringRef &Mnemonic,
6953 const FeatureBitset &Features,
6954 unsigned VariantID);
6955
6956bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6957 StringRef Name, SMLoc NameLoc,
6958 OperandVector &Operands) {
6959 // Add the instruction mnemonic
6960 Name = parseMnemonicSuffix(Name);
6961
6962 // If the target architecture uses MnemonicAlias, call it here to parse
6963 // operands correctly.
6964 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6965
6966 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6967
6968 bool IsMIMG = Name.starts_with("image_");
6969
6970 while (!trySkipToken(AsmToken::EndOfStatement)) {
6971 OperandMode Mode = OperandMode_Default;
6972 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6973 Mode = OperandMode_NSA;
6974 ParseStatus Res = parseOperand(Operands, Name, Mode);
6975
6976 if (!Res.isSuccess()) {
6977 checkUnsupportedInstruction(Name, NameLoc);
6978 if (!Parser.hasPendingError()) {
6979 // FIXME: use real operand location rather than the current location.
6980 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6981 : "not a valid operand.";
6982 Error(getLoc(), Msg);
6983 }
6984 while (!trySkipToken(AsmToken::EndOfStatement)) {
6985 lex();
6986 }
6987 return true;
6988 }
6989
6990 // Eat the comma or space if there is one.
6991 trySkipToken(AsmToken::Comma);
6992 }
6993
6994 return false;
6995}
6996
6997//===----------------------------------------------------------------------===//
6998// Utility functions
6999//===----------------------------------------------------------------------===//
7000
7001ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7002 OperandVector &Operands) {
7003 SMLoc S = getLoc();
7004 if (!trySkipId(Name))
7005 return ParseStatus::NoMatch;
7006
7007 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7008 return ParseStatus::Success;
7009}
7010
7011ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7012 int64_t &IntVal) {
7013
7014 if (!trySkipId(Prefix, AsmToken::Colon))
7015 return ParseStatus::NoMatch;
7016
7018}
7019
7020ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7021 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7022 std::function<bool(int64_t &)> ConvertResult) {
7023 SMLoc S = getLoc();
7024 int64_t Value = 0;
7025
7026 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7027 if (!Res.isSuccess())
7028 return Res;
7029
7030 if (ConvertResult && !ConvertResult(Value)) {
7031 Error(S, "invalid " + StringRef(Prefix) + " value.");
7032 }
7033
7034 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7035 return ParseStatus::Success;
7036}
7037
7038ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7039 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7040 bool (*ConvertResult)(int64_t &)) {
7041 SMLoc S = getLoc();
7042 if (!trySkipId(Prefix, AsmToken::Colon))
7043 return ParseStatus::NoMatch;
7044
7045 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7046 return ParseStatus::Failure;
7047
7048 unsigned Val = 0;
7049 const unsigned MaxSize = 4;
7050
7051 // FIXME: How to verify the number of elements matches the number of src
7052 // operands?
7053 for (int I = 0; ; ++I) {
7054 int64_t Op;
7055 SMLoc Loc = getLoc();
7056 if (!parseExpr(Op))
7057 return ParseStatus::Failure;
7058
7059 if (Op != 0 && Op != 1)
7060 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7061
7062 Val |= (Op << I);
7063
7064 if (trySkipToken(AsmToken::RBrac))
7065 break;
7066
7067 if (I + 1 == MaxSize)
7068 return Error(getLoc(), "expected a closing square bracket");
7069
7070 if (!skipToken(AsmToken::Comma, "expected a comma"))
7071 return ParseStatus::Failure;
7072 }
7073
7074 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7075 return ParseStatus::Success;
7076}
7077
7078ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7079 OperandVector &Operands,
7080 AMDGPUOperand::ImmTy ImmTy,
7081 bool IgnoreNegative) {
7082 int64_t Bit;
7083 SMLoc S = getLoc();
7084
7085 if (trySkipId(Name)) {
7086 Bit = 1;
7087 } else if (trySkipId("no", Name)) {
7088 if (IgnoreNegative)
7089 return ParseStatus::Success;
7090 Bit = 0;
7091 } else {
7092 return ParseStatus::NoMatch;
7093 }
7094
7095 if (Name == "r128" && !hasMIMG_R128())
7096 return Error(S, "r128 modifier is not supported on this GPU");
7097 if (Name == "a16" && !hasA16())
7098 return Error(S, "a16 modifier is not supported on this GPU");
7099
7100 if (Bit == 0 && Name == "gds") {
7101 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7102 if (Mnemo.starts_with("ds_gws"))
7103 return Error(S, "nogds is not allowed");
7104 }
7105
7106 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7107 ImmTy = AMDGPUOperand::ImmTyR128A16;
7108
7109 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7110 return ParseStatus::Success;
7111}
7112
7113unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7114 bool &Disabling) const {
7115 Disabling = Id.consume_front("no");
7116
7117 if (isGFX940() && !Mnemo.starts_with("s_")) {
7118 return StringSwitch<unsigned>(Id)
7119 .Case("nt", AMDGPU::CPol::NT)
7120 .Case("sc0", AMDGPU::CPol::SC0)
7121 .Case("sc1", AMDGPU::CPol::SC1)
7122 .Default(0);
7123 }
7124
7125 return StringSwitch<unsigned>(Id)
7126 .Case("dlc", AMDGPU::CPol::DLC)
7127 .Case("glc", AMDGPU::CPol::GLC)
7128 .Case("scc", AMDGPU::CPol::SCC)
7129 .Case("slc", AMDGPU::CPol::SLC)
7130 .Default(0);
7131}
7132
7133ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7134 if (isGFX12Plus()) {
7135 SMLoc StringLoc = getLoc();
7136
7137 int64_t CPolVal = 0;
7138 ParseStatus ResTH = ParseStatus::NoMatch;
7139 ParseStatus ResScope = ParseStatus::NoMatch;
7140 ParseStatus ResNV = ParseStatus::NoMatch;
7141 ParseStatus ResScal = ParseStatus::NoMatch;
7142
7143 for (;;) {
7144 if (ResTH.isNoMatch()) {
7145 int64_t TH;
7146 ResTH = parseTH(Operands, TH);
7147 if (ResTH.isFailure())
7148 return ResTH;
7149 if (ResTH.isSuccess()) {
7150 CPolVal |= TH;
7151 continue;
7152 }
7153 }
7154
7155 if (ResScope.isNoMatch()) {
7156 int64_t Scope;
7157 ResScope = parseScope(Operands, Scope);
7158 if (ResScope.isFailure())
7159 return ResScope;
7160 if (ResScope.isSuccess()) {
7161 CPolVal |= Scope;
7162 continue;
7163 }
7164 }
7165
7166 // NV bit exists on GFX12+, but does something starting from GFX1250.
7167 // Allow parsing on all GFX12 and fail on validation for better
7168 // diagnostics.
7169 if (ResNV.isNoMatch()) {
7170 if (trySkipId("nv")) {
7171 ResNV = ParseStatus::Success;
7172 CPolVal |= CPol::NV;
7173 continue;
7174 } else if (trySkipId("no", "nv")) {
7175 ResNV = ParseStatus::Success;
7176 continue;
7177 }
7178 }
7179
7180 if (ResScal.isNoMatch()) {
7181 if (trySkipId("scale_offset")) {
7182 ResScal = ParseStatus::Success;
7183 CPolVal |= CPol::SCAL;
7184 continue;
7185 } else if (trySkipId("no", "scale_offset")) {
7186 ResScal = ParseStatus::Success;
7187 continue;
7188 }
7189 }
7190
7191 break;
7192 }
7193
7194 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7195 ResScal.isNoMatch())
7196 return ParseStatus::NoMatch;
7197
7198 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7199 AMDGPUOperand::ImmTyCPol));
7200 return ParseStatus::Success;
7201 }
7202
7203 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7204 SMLoc OpLoc = getLoc();
7205 unsigned Enabled = 0, Seen = 0;
7206 for (;;) {
7207 SMLoc S = getLoc();
7208 bool Disabling;
7209 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7210 if (!CPol)
7211 break;
7212
7213 lex();
7214
7215 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7216 return Error(S, "dlc modifier is not supported on this GPU");
7217
7218 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7219 return Error(S, "scc modifier is not supported on this GPU");
7220
7221 if (Seen & CPol)
7222 return Error(S, "duplicate cache policy modifier");
7223
7224 if (!Disabling)
7225 Enabled |= CPol;
7226
7227 Seen |= CPol;
7228 }
7229
7230 if (!Seen)
7231 return ParseStatus::NoMatch;
7232
7233 Operands.push_back(
7234 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7235 return ParseStatus::Success;
7236}
7237
7238ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7239 int64_t &Scope) {
7240 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7242
7243 ParseStatus Res = parseStringOrIntWithPrefix(
7244 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7245 Scope);
7246
7247 if (Res.isSuccess())
7248 Scope = Scopes[Scope];
7249
7250 return Res;
7251}
7252
7253ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7254 TH = AMDGPU::CPol::TH_RT; // default
7255
7256 StringRef Value;
7257 SMLoc StringLoc;
7258 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7259 if (!Res.isSuccess())
7260 return Res;
7261
7262 if (Value == "TH_DEFAULT")
7264 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7265 Value == "TH_LOAD_NT_WB") {
7266 return Error(StringLoc, "invalid th value");
7267 } else if (Value.consume_front("TH_ATOMIC_")) {
7269 } else if (Value.consume_front("TH_LOAD_")) {
7271 } else if (Value.consume_front("TH_STORE_")) {
7273 } else {
7274 return Error(StringLoc, "invalid th value");
7275 }
7276
7277 if (Value == "BYPASS")
7279
7280 if (TH != 0) {
7282 TH |= StringSwitch<int64_t>(Value)
7283 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7284 .Case("RT", AMDGPU::CPol::TH_RT)
7285 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7286 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7287 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7289 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7290 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7292 .Default(0xffffffff);
7293 else
7294 TH |= StringSwitch<int64_t>(Value)
7295 .Case("RT", AMDGPU::CPol::TH_RT)
7296 .Case("NT", AMDGPU::CPol::TH_NT)
7297 .Case("HT", AMDGPU::CPol::TH_HT)
7298 .Case("LU", AMDGPU::CPol::TH_LU)
7299 .Case("WB", AMDGPU::CPol::TH_WB)
7300 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7301 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7302 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7303 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7304 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7305 .Default(0xffffffff);
7306 }
7307
7308 if (TH == 0xffffffff)
7309 return Error(StringLoc, "invalid th value");
7310
7311 return ParseStatus::Success;
7312}
7313
7314static void
7316 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7317 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7318 std::optional<unsigned> InsertAt = std::nullopt) {
7319 auto i = OptionalIdx.find(ImmT);
7320 if (i != OptionalIdx.end()) {
7321 unsigned Idx = i->second;
7322 const AMDGPUOperand &Op =
7323 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7324 if (InsertAt)
7325 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7326 else
7327 Op.addImmOperands(Inst, 1);
7328 } else {
7329 if (InsertAt.has_value())
7330 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7331 else
7333 }
7334}
7335
7336ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7337 StringRef &Value,
7338 SMLoc &StringLoc) {
7339 if (!trySkipId(Prefix, AsmToken::Colon))
7340 return ParseStatus::NoMatch;
7341
7342 StringLoc = getLoc();
7343 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7345}
7346
7347ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7348 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7349 int64_t &IntVal) {
7350 if (!trySkipId(Name, AsmToken::Colon))
7351 return ParseStatus::NoMatch;
7352
7353 SMLoc StringLoc = getLoc();
7354
7355 StringRef Value;
7356 if (isToken(AsmToken::Identifier)) {
7357 Value = getTokenStr();
7358 lex();
7359
7360 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7361 if (Value == Ids[IntVal])
7362 break;
7363 } else if (!parseExpr(IntVal))
7364 return ParseStatus::Failure;
7365
7366 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7367 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7368
7369 return ParseStatus::Success;
7370}
7371
7372ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7373 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7374 AMDGPUOperand::ImmTy Type) {
7375 SMLoc S = getLoc();
7376 int64_t IntVal;
7377
7378 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7379 if (Res.isSuccess())
7380 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7381
7382 return Res;
7383}
7384
7385//===----------------------------------------------------------------------===//
7386// MTBUF format
7387//===----------------------------------------------------------------------===//
7388
7389bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7390 int64_t MaxVal,
7391 int64_t &Fmt) {
7392 int64_t Val;
7393 SMLoc Loc = getLoc();
7394
7395 auto Res = parseIntWithPrefix(Pref, Val);
7396 if (Res.isFailure())
7397 return false;
7398 if (Res.isNoMatch())
7399 return true;
7400
7401 if (Val < 0 || Val > MaxVal) {
7402 Error(Loc, Twine("out of range ", StringRef(Pref)));
7403 return false;
7404 }
7405
7406 Fmt = Val;
7407 return true;
7408}
7409
7410ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7411 AMDGPUOperand::ImmTy ImmTy) {
7412 const char *Pref = "index_key";
7413 int64_t ImmVal = 0;
7414 SMLoc Loc = getLoc();
7415 auto Res = parseIntWithPrefix(Pref, ImmVal);
7416 if (!Res.isSuccess())
7417 return Res;
7418
7419 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7420 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7421 (ImmVal < 0 || ImmVal > 1))
7422 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7423
7424 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7425 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7426
7427 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7428 return ParseStatus::Success;
7429}
7430
7431ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7432 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7433}
7434
7435ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7436 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7437}
7438
7439ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7440 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7441}
7442
7443ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7444 StringRef Name,
7445 AMDGPUOperand::ImmTy Type) {
7446 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7447 Type);
7448}
7449
7450ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7451 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7452 AMDGPUOperand::ImmTyMatrixAFMT);
7453}
7454
7455ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7456 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7457 AMDGPUOperand::ImmTyMatrixBFMT);
7458}
7459
7460ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7461 StringRef Name,
7462 AMDGPUOperand::ImmTy Type) {
7463 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7464 Type);
7465}
7466
7467ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7468 return tryParseMatrixScale(Operands, "matrix_a_scale",
7469 AMDGPUOperand::ImmTyMatrixAScale);
7470}
7471
7472ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7473 return tryParseMatrixScale(Operands, "matrix_b_scale",
7474 AMDGPUOperand::ImmTyMatrixBScale);
7475}
7476
7477ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7478 StringRef Name,
7479 AMDGPUOperand::ImmTy Type) {
7480 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7481 Type);
7482}
7483
7484ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7485 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7486 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7487}
7488
7489ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7490 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7491 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7492}
7493
7494// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7495// values to live in a joint format operand in the MCInst encoding.
7496ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7497 using namespace llvm::AMDGPU::MTBUFFormat;
7498
7499 int64_t Dfmt = DFMT_UNDEF;
7500 int64_t Nfmt = NFMT_UNDEF;
7501
7502 // dfmt and nfmt can appear in either order, and each is optional.
7503 for (int I = 0; I < 2; ++I) {
7504 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7505 return ParseStatus::Failure;
7506
7507 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7508 return ParseStatus::Failure;
7509
7510 // Skip optional comma between dfmt/nfmt
7511 // but guard against 2 commas following each other.
7512 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7513 !peekToken().is(AsmToken::Comma)) {
7514 trySkipToken(AsmToken::Comma);
7515 }
7516 }
7517
7518 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7519 return ParseStatus::NoMatch;
7520
7521 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7522 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7523
7524 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7525 return ParseStatus::Success;
7526}
7527
7528ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7529 using namespace llvm::AMDGPU::MTBUFFormat;
7530
7531 int64_t Fmt = UFMT_UNDEF;
7532
7533 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7534 return ParseStatus::Failure;
7535
7536 if (Fmt == UFMT_UNDEF)
7537 return ParseStatus::NoMatch;
7538
7539 Format = Fmt;
7540 return ParseStatus::Success;
7541}
7542
7543bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7544 int64_t &Nfmt,
7545 StringRef FormatStr,
7546 SMLoc Loc) {
7547 using namespace llvm::AMDGPU::MTBUFFormat;
7548 int64_t Format;
7549
7550 Format = getDfmt(FormatStr);
7551 if (Format != DFMT_UNDEF) {
7552 Dfmt = Format;
7553 return true;
7554 }
7555
7556 Format = getNfmt(FormatStr, getSTI());
7557 if (Format != NFMT_UNDEF) {
7558 Nfmt = Format;
7559 return true;
7560 }
7561
7562 Error(Loc, "unsupported format");
7563 return false;
7564}
7565
7566ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7567 SMLoc FormatLoc,
7568 int64_t &Format) {
7569 using namespace llvm::AMDGPU::MTBUFFormat;
7570
7571 int64_t Dfmt = DFMT_UNDEF;
7572 int64_t Nfmt = NFMT_UNDEF;
7573 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7574 return ParseStatus::Failure;
7575
7576 if (trySkipToken(AsmToken::Comma)) {
7577 StringRef Str;
7578 SMLoc Loc = getLoc();
7579 if (!parseId(Str, "expected a format string") ||
7580 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7581 return ParseStatus::Failure;
7582 if (Dfmt == DFMT_UNDEF)
7583 return Error(Loc, "duplicate numeric format");
7584 if (Nfmt == NFMT_UNDEF)
7585 return Error(Loc, "duplicate data format");
7586 }
7587
7588 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7589 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7590
7591 if (isGFX10Plus()) {
7592 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7593 if (Ufmt == UFMT_UNDEF)
7594 return Error(FormatLoc, "unsupported format");
7595 Format = Ufmt;
7596 } else {
7597 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7598 }
7599
7600 return ParseStatus::Success;
7601}
7602
7603ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7604 SMLoc Loc,
7605 int64_t &Format) {
7606 using namespace llvm::AMDGPU::MTBUFFormat;
7607
7608 auto Id = getUnifiedFormat(FormatStr, getSTI());
7609 if (Id == UFMT_UNDEF)
7610 return ParseStatus::NoMatch;
7611
7612 if (!isGFX10Plus())
7613 return Error(Loc, "unified format is not supported on this GPU");
7614
7615 Format = Id;
7616 return ParseStatus::Success;
7617}
7618
7619ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7620 using namespace llvm::AMDGPU::MTBUFFormat;
7621 SMLoc Loc = getLoc();
7622
7623 if (!parseExpr(Format))
7624 return ParseStatus::Failure;
7625 if (!isValidFormatEncoding(Format, getSTI()))
7626 return Error(Loc, "out of range format");
7627
7628 return ParseStatus::Success;
7629}
7630
7631ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7632 using namespace llvm::AMDGPU::MTBUFFormat;
7633
7634 if (!trySkipId("format", AsmToken::Colon))
7635 return ParseStatus::NoMatch;
7636
7637 if (trySkipToken(AsmToken::LBrac)) {
7638 StringRef FormatStr;
7639 SMLoc Loc = getLoc();
7640 if (!parseId(FormatStr, "expected a format string"))
7641 return ParseStatus::Failure;
7642
7643 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7644 if (Res.isNoMatch())
7645 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7646 if (!Res.isSuccess())
7647 return Res;
7648
7649 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7650 return ParseStatus::Failure;
7651
7652 return ParseStatus::Success;
7653 }
7654
7655 return parseNumericFormat(Format);
7656}
7657
7658ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7659 using namespace llvm::AMDGPU::MTBUFFormat;
7660
7661 int64_t Format = getDefaultFormatEncoding(getSTI());
7662 ParseStatus Res;
7663 SMLoc Loc = getLoc();
7664
7665 // Parse legacy format syntax.
7666 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7667 if (Res.isFailure())
7668 return Res;
7669
7670 bool FormatFound = Res.isSuccess();
7671
7672 Operands.push_back(
7673 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7674
7675 if (FormatFound)
7676 trySkipToken(AsmToken::Comma);
7677
7678 if (isToken(AsmToken::EndOfStatement)) {
7679 // We are expecting an soffset operand,
7680 // but let matcher handle the error.
7681 return ParseStatus::Success;
7682 }
7683
7684 // Parse soffset.
7685 Res = parseRegOrImm(Operands);
7686 if (!Res.isSuccess())
7687 return Res;
7688
7689 trySkipToken(AsmToken::Comma);
7690
7691 if (!FormatFound) {
7692 Res = parseSymbolicOrNumericFormat(Format);
7693 if (Res.isFailure())
7694 return Res;
7695 if (Res.isSuccess()) {
7696 auto Size = Operands.size();
7697 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7698 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7699 Op.setImm(Format);
7700 }
7701 return ParseStatus::Success;
7702 }
7703
7704 if (isId("format") && peekToken().is(AsmToken::Colon))
7705 return Error(getLoc(), "duplicate format");
7706 return ParseStatus::Success;
7707}
7708
7709ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7710 ParseStatus Res =
7711 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7712 if (Res.isNoMatch()) {
7713 Res = parseIntWithPrefix("inst_offset", Operands,
7714 AMDGPUOperand::ImmTyInstOffset);
7715 }
7716 return Res;
7717}
7718
7719ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7720 ParseStatus Res =
7721 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7722 if (Res.isNoMatch())
7723 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7724 return Res;
7725}
7726
7727ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7728 ParseStatus Res =
7729 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7730 if (Res.isNoMatch()) {
7731 Res =
7732 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7733 }
7734 return Res;
7735}
7736
7737//===----------------------------------------------------------------------===//
7738// Exp
7739//===----------------------------------------------------------------------===//
7740
7741void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7742 OptionalImmIndexMap OptionalIdx;
7743
7744 unsigned OperandIdx[4];
7745 unsigned EnMask = 0;
7746 int SrcIdx = 0;
7747
7748 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7749 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7750
7751 // Add the register arguments
7752 if (Op.isReg()) {
7753 assert(SrcIdx < 4);
7754 OperandIdx[SrcIdx] = Inst.size();
7755 Op.addRegOperands(Inst, 1);
7756 ++SrcIdx;
7757 continue;
7758 }
7759
7760 if (Op.isOff()) {
7761 assert(SrcIdx < 4);
7762 OperandIdx[SrcIdx] = Inst.size();
7763 Inst.addOperand(MCOperand::createReg(MCRegister()));
7764 ++SrcIdx;
7765 continue;
7766 }
7767
7768 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7769 Op.addImmOperands(Inst, 1);
7770 continue;
7771 }
7772
7773 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7774 continue;
7775
7776 // Handle optional arguments
7777 OptionalIdx[Op.getImmTy()] = i;
7778 }
7779
7780 assert(SrcIdx == 4);
7781
7782 bool Compr = false;
7783 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7784 Compr = true;
7785 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7786 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7787 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7788 }
7789
7790 for (auto i = 0; i < SrcIdx; ++i) {
7791 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7792 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7793 }
7794 }
7795
7796 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7797 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7798
7799 Inst.addOperand(MCOperand::createImm(EnMask));
7800}
7801
7802//===----------------------------------------------------------------------===//
7803// s_waitcnt
7804//===----------------------------------------------------------------------===//
7805
7806static bool
7808 const AMDGPU::IsaVersion ISA,
7809 int64_t &IntVal,
7810 int64_t CntVal,
7811 bool Saturate,
7812 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7813 unsigned (*decode)(const IsaVersion &Version, unsigned))
7814{
7815 bool Failed = false;
7816
7817 IntVal = encode(ISA, IntVal, CntVal);
7818 if (CntVal != decode(ISA, IntVal)) {
7819 if (Saturate) {
7820 IntVal = encode(ISA, IntVal, -1);
7821 } else {
7822 Failed = true;
7823 }
7824 }
7825 return Failed;
7826}
7827
7828bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7829
7830 SMLoc CntLoc = getLoc();
7831 StringRef CntName = getTokenStr();
7832
7833 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7834 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7835 return false;
7836
7837 int64_t CntVal;
7838 SMLoc ValLoc = getLoc();
7839 if (!parseExpr(CntVal))
7840 return false;
7841
7842 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7843
7844 bool Failed = true;
7845 bool Sat = CntName.ends_with("_sat");
7846
7847 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7848 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7849 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7850 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7851 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7852 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7853 } else {
7854 Error(CntLoc, "invalid counter name " + CntName);
7855 return false;
7856 }
7857
7858 if (Failed) {
7859 Error(ValLoc, "too large value for " + CntName);
7860 return false;
7861 }
7862
7863 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7864 return false;
7865
7866 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7867 if (isToken(AsmToken::EndOfStatement)) {
7868 Error(getLoc(), "expected a counter name");
7869 return false;
7870 }
7871 }
7872
7873 return true;
7874}
7875
7876ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7877 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7878 int64_t Waitcnt = getWaitcntBitMask(ISA);
7879 SMLoc S = getLoc();
7880
7881 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7882 while (!isToken(AsmToken::EndOfStatement)) {
7883 if (!parseCnt(Waitcnt))
7884 return ParseStatus::Failure;
7885 }
7886 } else {
7887 if (!parseExpr(Waitcnt))
7888 return ParseStatus::Failure;
7889 }
7890
7891 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7892 return ParseStatus::Success;
7893}
7894
7895bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7896 SMLoc FieldLoc = getLoc();
7897 StringRef FieldName = getTokenStr();
7898 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7899 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7900 return false;
7901
7902 SMLoc ValueLoc = getLoc();
7903 StringRef ValueName = getTokenStr();
7904 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7905 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7906 return false;
7907
7908 unsigned Shift;
7909 if (FieldName == "instid0") {
7910 Shift = 0;
7911 } else if (FieldName == "instskip") {
7912 Shift = 4;
7913 } else if (FieldName == "instid1") {
7914 Shift = 7;
7915 } else {
7916 Error(FieldLoc, "invalid field name " + FieldName);
7917 return false;
7918 }
7919
7920 int Value;
7921 if (Shift == 4) {
7922 // Parse values for instskip.
7923 Value = StringSwitch<int>(ValueName)
7924 .Case("SAME", 0)
7925 .Case("NEXT", 1)
7926 .Case("SKIP_1", 2)
7927 .Case("SKIP_2", 3)
7928 .Case("SKIP_3", 4)
7929 .Case("SKIP_4", 5)
7930 .Default(-1);
7931 } else {
7932 // Parse values for instid0 and instid1.
7933 Value = StringSwitch<int>(ValueName)
7934 .Case("NO_DEP", 0)
7935 .Case("VALU_DEP_1", 1)
7936 .Case("VALU_DEP_2", 2)
7937 .Case("VALU_DEP_3", 3)
7938 .Case("VALU_DEP_4", 4)
7939 .Case("TRANS32_DEP_1", 5)
7940 .Case("TRANS32_DEP_2", 6)
7941 .Case("TRANS32_DEP_3", 7)
7942 .Case("FMA_ACCUM_CYCLE_1", 8)
7943 .Case("SALU_CYCLE_1", 9)
7944 .Case("SALU_CYCLE_2", 10)
7945 .Case("SALU_CYCLE_3", 11)
7946 .Default(-1);
7947 }
7948 if (Value < 0) {
7949 Error(ValueLoc, "invalid value name " + ValueName);
7950 return false;
7951 }
7952
7953 Delay |= Value << Shift;
7954 return true;
7955}
7956
7957ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7958 int64_t Delay = 0;
7959 SMLoc S = getLoc();
7960
7961 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7962 do {
7963 if (!parseDelay(Delay))
7964 return ParseStatus::Failure;
7965 } while (trySkipToken(AsmToken::Pipe));
7966 } else {
7967 if (!parseExpr(Delay))
7968 return ParseStatus::Failure;
7969 }
7970
7971 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7972 return ParseStatus::Success;
7973}
7974
7975bool
7976AMDGPUOperand::isSWaitCnt() const {
7977 return isImm();
7978}
7979
7980bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7981
7982//===----------------------------------------------------------------------===//
7983// DepCtr
7984//===----------------------------------------------------------------------===//
7985
7986void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7987 StringRef DepCtrName) {
7988 switch (ErrorId) {
7989 case OPR_ID_UNKNOWN:
7990 Error(Loc, Twine("invalid counter name ", DepCtrName));
7991 return;
7992 case OPR_ID_UNSUPPORTED:
7993 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7994 return;
7995 case OPR_ID_DUPLICATE:
7996 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7997 return;
7998 case OPR_VAL_INVALID:
7999 Error(Loc, Twine("invalid value for ", DepCtrName));
8000 return;
8001 default:
8002 assert(false);
8003 }
8004}
8005
8006bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8007
8008 using namespace llvm::AMDGPU::DepCtr;
8009
8010 SMLoc DepCtrLoc = getLoc();
8011 StringRef DepCtrName = getTokenStr();
8012
8013 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8014 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8015 return false;
8016
8017 int64_t ExprVal;
8018 if (!parseExpr(ExprVal))
8019 return false;
8020
8021 unsigned PrevOprMask = UsedOprMask;
8022 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8023
8024 if (CntVal < 0) {
8025 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8026 return false;
8027 }
8028
8029 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8030 return false;
8031
8032 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8033 if (isToken(AsmToken::EndOfStatement)) {
8034 Error(getLoc(), "expected a counter name");
8035 return false;
8036 }
8037 }
8038
8039 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8040 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8041 return true;
8042}
8043
8044ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8045 using namespace llvm::AMDGPU::DepCtr;
8046
8047 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8048 SMLoc Loc = getLoc();
8049
8050 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8051 unsigned UsedOprMask = 0;
8052 while (!isToken(AsmToken::EndOfStatement)) {
8053 if (!parseDepCtr(DepCtr, UsedOprMask))
8054 return ParseStatus::Failure;
8055 }
8056 } else {
8057 if (!parseExpr(DepCtr))
8058 return ParseStatus::Failure;
8059 }
8060
8061 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8062 return ParseStatus::Success;
8063}
8064
8065bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8066
8067//===----------------------------------------------------------------------===//
8068// hwreg
8069//===----------------------------------------------------------------------===//
8070
8071ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8072 OperandInfoTy &Offset,
8073 OperandInfoTy &Width) {
8074 using namespace llvm::AMDGPU::Hwreg;
8075
8076 if (!trySkipId("hwreg", AsmToken::LParen))
8077 return ParseStatus::NoMatch;
8078
8079 // The register may be specified by name or using a numeric code
8080 HwReg.Loc = getLoc();
8081 if (isToken(AsmToken::Identifier) &&
8082 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8083 HwReg.IsSymbolic = true;
8084 lex(); // skip register name
8085 } else if (!parseExpr(HwReg.Val, "a register name")) {
8086 return ParseStatus::Failure;
8087 }
8088
8089 if (trySkipToken(AsmToken::RParen))
8090 return ParseStatus::Success;
8091
8092 // parse optional params
8093 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8094 return ParseStatus::Failure;
8095
8096 Offset.Loc = getLoc();
8097 if (!parseExpr(Offset.Val))
8098 return ParseStatus::Failure;
8099
8100 if (!skipToken(AsmToken::Comma, "expected a comma"))
8101 return ParseStatus::Failure;
8102
8103 Width.Loc = getLoc();
8104 if (!parseExpr(Width.Val) ||
8105 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8106 return ParseStatus::Failure;
8107
8108 return ParseStatus::Success;
8109}
8110
8111ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8112 using namespace llvm::AMDGPU::Hwreg;
8113
8114 int64_t ImmVal = 0;
8115 SMLoc Loc = getLoc();
8116
8117 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8118 HwregId::Default);
8119 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8120 HwregOffset::Default);
8121 struct : StructuredOpField {
8122 using StructuredOpField::StructuredOpField;
8123 bool validate(AMDGPUAsmParser &Parser) const override {
8124 if (!isUIntN(Width, Val - 1))
8125 return Error(Parser, "only values from 1 to 32 are legal");
8126 return true;
8127 }
8128 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8129 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8130
8131 if (Res.isNoMatch())
8132 Res = parseHwregFunc(HwReg, Offset, Width);
8133
8134 if (Res.isSuccess()) {
8135 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8136 return ParseStatus::Failure;
8137 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8138 }
8139
8140 if (Res.isNoMatch() &&
8141 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8143
8144 if (!Res.isSuccess())
8145 return ParseStatus::Failure;
8146
8147 if (!isUInt<16>(ImmVal))
8148 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8149 Operands.push_back(
8150 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8151 return ParseStatus::Success;
8152}
8153
8154bool AMDGPUOperand::isHwreg() const {
8155 return isImmTy(ImmTyHwreg);
8156}
8157
8158//===----------------------------------------------------------------------===//
8159// sendmsg
8160//===----------------------------------------------------------------------===//
8161
8162bool
8163AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8164 OperandInfoTy &Op,
8165 OperandInfoTy &Stream) {
8166 using namespace llvm::AMDGPU::SendMsg;
8167
8168 Msg.Loc = getLoc();
8169 if (isToken(AsmToken::Identifier) &&
8170 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8171 Msg.IsSymbolic = true;
8172 lex(); // skip message name
8173 } else if (!parseExpr(Msg.Val, "a message name")) {
8174 return false;
8175 }
8176
8177 if (trySkipToken(AsmToken::Comma)) {
8178 Op.IsDefined = true;
8179 Op.Loc = getLoc();
8180 if (isToken(AsmToken::Identifier) &&
8181 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8183 lex(); // skip operation name
8184 } else if (!parseExpr(Op.Val, "an operation name")) {
8185 return false;
8186 }
8187
8188 if (trySkipToken(AsmToken::Comma)) {
8189 Stream.IsDefined = true;
8190 Stream.Loc = getLoc();
8191 if (!parseExpr(Stream.Val))
8192 return false;
8193 }
8194 }
8195
8196 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8197}
8198
8199bool
8200AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8201 const OperandInfoTy &Op,
8202 const OperandInfoTy &Stream) {
8203 using namespace llvm::AMDGPU::SendMsg;
8204
8205 // Validation strictness depends on whether message is specified
8206 // in a symbolic or in a numeric form. In the latter case
8207 // only encoding possibility is checked.
8208 bool Strict = Msg.IsSymbolic;
8209
8210 if (Strict) {
8211 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8212 Error(Msg.Loc, "specified message id is not supported on this GPU");
8213 return false;
8214 }
8215 } else {
8216 if (!isValidMsgId(Msg.Val, getSTI())) {
8217 Error(Msg.Loc, "invalid message id");
8218 return false;
8219 }
8220 }
8221 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8222 if (Op.IsDefined) {
8223 Error(Op.Loc, "message does not support operations");
8224 } else {
8225 Error(Msg.Loc, "missing message operation");
8226 }
8227 return false;
8228 }
8229 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8230 if (Op.Val == OPR_ID_UNSUPPORTED)
8231 Error(Op.Loc, "specified operation id is not supported on this GPU");
8232 else
8233 Error(Op.Loc, "invalid operation id");
8234 return false;
8235 }
8236 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8237 Stream.IsDefined) {
8238 Error(Stream.Loc, "message operation does not support streams");
8239 return false;
8240 }
8241 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8242 Error(Stream.Loc, "invalid message stream id");
8243 return false;
8244 }
8245 return true;
8246}
8247
8248ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8249 using namespace llvm::AMDGPU::SendMsg;
8250
8251 int64_t ImmVal = 0;
8252 SMLoc Loc = getLoc();
8253
8254 if (trySkipId("sendmsg", AsmToken::LParen)) {
8255 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8256 OperandInfoTy Op(OP_NONE_);
8257 OperandInfoTy Stream(STREAM_ID_NONE_);
8258 if (parseSendMsgBody(Msg, Op, Stream) &&
8259 validateSendMsg(Msg, Op, Stream)) {
8260 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8261 } else {
8262 return ParseStatus::Failure;
8263 }
8264 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8265 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8266 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8267 } else {
8268 return ParseStatus::Failure;
8269 }
8270
8271 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8272 return ParseStatus::Success;
8273}
8274
8275bool AMDGPUOperand::isSendMsg() const {
8276 return isImmTy(ImmTySendMsg);
8277}
8278
8279ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8280 using namespace llvm::AMDGPU::WaitEvent;
8281
8282 SMLoc Loc = getLoc();
8283 int64_t ImmVal = 0;
8284
8285 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8286 1, 0);
8287 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8288
8289 StructuredOpField *TargetBitfield =
8290 isGFX11() ? &DontWaitExportReady : &ExportReady;
8291
8292 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8293 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8295 else if (Res.isSuccess()) {
8296 if (!validateStructuredOpFields({TargetBitfield}))
8297 return ParseStatus::Failure;
8298 ImmVal = TargetBitfield->Val;
8299 }
8300
8301 if (!Res.isSuccess())
8302 return ParseStatus::Failure;
8303
8304 if (!isUInt<16>(ImmVal))
8305 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8306
8307 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8308 AMDGPUOperand::ImmTyWaitEvent));
8309 return ParseStatus::Success;
8310}
8311
8312bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8313
8314//===----------------------------------------------------------------------===//
8315// v_interp
8316//===----------------------------------------------------------------------===//
8317
8318ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8319 StringRef Str;
8320 SMLoc S = getLoc();
8321
8322 if (!parseId(Str))
8323 return ParseStatus::NoMatch;
8324
8325 int Slot = StringSwitch<int>(Str)
8326 .Case("p10", 0)
8327 .Case("p20", 1)
8328 .Case("p0", 2)
8329 .Default(-1);
8330
8331 if (Slot == -1)
8332 return Error(S, "invalid interpolation slot");
8333
8334 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8335 AMDGPUOperand::ImmTyInterpSlot));
8336 return ParseStatus::Success;
8337}
8338
8339ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8340 StringRef Str;
8341 SMLoc S = getLoc();
8342
8343 if (!parseId(Str))
8344 return ParseStatus::NoMatch;
8345
8346 if (!Str.starts_with("attr"))
8347 return Error(S, "invalid interpolation attribute");
8348
8349 StringRef Chan = Str.take_back(2);
8350 int AttrChan = StringSwitch<int>(Chan)
8351 .Case(".x", 0)
8352 .Case(".y", 1)
8353 .Case(".z", 2)
8354 .Case(".w", 3)
8355 .Default(-1);
8356 if (AttrChan == -1)
8357 return Error(S, "invalid or missing interpolation attribute channel");
8358
8359 Str = Str.drop_back(2).drop_front(4);
8360
8361 uint8_t Attr;
8362 if (Str.getAsInteger(10, Attr))
8363 return Error(S, "invalid or missing interpolation attribute number");
8364
8365 if (Attr > 32)
8366 return Error(S, "out of bounds interpolation attribute number");
8367
8368 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8369
8370 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8371 AMDGPUOperand::ImmTyInterpAttr));
8372 Operands.push_back(AMDGPUOperand::CreateImm(
8373 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8374 return ParseStatus::Success;
8375}
8376
8377//===----------------------------------------------------------------------===//
8378// exp
8379//===----------------------------------------------------------------------===//
8380
8381ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8382 using namespace llvm::AMDGPU::Exp;
8383
8384 StringRef Str;
8385 SMLoc S = getLoc();
8386
8387 if (!parseId(Str))
8388 return ParseStatus::NoMatch;
8389
8390 unsigned Id = getTgtId(Str);
8391 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8392 return Error(S, (Id == ET_INVALID)
8393 ? "invalid exp target"
8394 : "exp target is not supported on this GPU");
8395
8396 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8397 AMDGPUOperand::ImmTyExpTgt));
8398 return ParseStatus::Success;
8399}
8400
8401//===----------------------------------------------------------------------===//
8402// parser helpers
8403//===----------------------------------------------------------------------===//
8404
8405bool
8406AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8407 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8408}
8409
8410bool
8411AMDGPUAsmParser::isId(const StringRef Id) const {
8412 return isId(getToken(), Id);
8413}
8414
8415bool
8416AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8417 return getTokenKind() == Kind;
8418}
8419
8420StringRef AMDGPUAsmParser::getId() const {
8421 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8422}
8423
8424bool
8425AMDGPUAsmParser::trySkipId(const StringRef Id) {
8426 if (isId(Id)) {
8427 lex();
8428 return true;
8429 }
8430 return false;
8431}
8432
8433bool
8434AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8435 if (isToken(AsmToken::Identifier)) {
8436 StringRef Tok = getTokenStr();
8437 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8438 lex();
8439 return true;
8440 }
8441 }
8442 return false;
8443}
8444
8445bool
8446AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8447 if (isId(Id) && peekToken().is(Kind)) {
8448 lex();
8449 lex();
8450 return true;
8451 }
8452 return false;
8453}
8454
8455bool
8456AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8457 if (isToken(Kind)) {
8458 lex();
8459 return true;
8460 }
8461 return false;
8462}
8463
8464bool
8465AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8466 const StringRef ErrMsg) {
8467 if (!trySkipToken(Kind)) {
8468 Error(getLoc(), ErrMsg);
8469 return false;
8470 }
8471 return true;
8472}
8473
8474bool
8475AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8476 SMLoc S = getLoc();
8477
8478 const MCExpr *Expr;
8479 if (Parser.parseExpression(Expr))
8480 return false;
8481
8482 if (Expr->evaluateAsAbsolute(Imm))
8483 return true;
8484
8485 if (Expected.empty()) {
8486 Error(S, "expected absolute expression");
8487 } else {
8488 Error(S, Twine("expected ", Expected) +
8489 Twine(" or an absolute expression"));
8490 }
8491 return false;
8492}
8493
8494bool
8495AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8496 SMLoc S = getLoc();
8497
8498 const MCExpr *Expr;
8499 if (Parser.parseExpression(Expr))
8500 return false;
8501
8502 int64_t IntVal;
8503 if (Expr->evaluateAsAbsolute(IntVal)) {
8504 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8505 } else {
8506 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8507 }
8508 return true;
8509}
8510
8511bool
8512AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8513 if (isToken(AsmToken::String)) {
8514 Val = getToken().getStringContents();
8515 lex();
8516 return true;
8517 }
8518 Error(getLoc(), ErrMsg);
8519 return false;
8520}
8521
8522bool
8523AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8524 if (isToken(AsmToken::Identifier)) {
8525 Val = getTokenStr();
8526 lex();
8527 return true;
8528 }
8529 if (!ErrMsg.empty())
8530 Error(getLoc(), ErrMsg);
8531 return false;
8532}
8533
8534AsmToken
8535AMDGPUAsmParser::getToken() const {
8536 return Parser.getTok();
8537}
8538
8539AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8540 return isToken(AsmToken::EndOfStatement)
8541 ? getToken()
8542 : getLexer().peekTok(ShouldSkipSpace);
8543}
8544
8545void
8546AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8547 auto TokCount = getLexer().peekTokens(Tokens);
8548
8549 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8550 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8551}
8552
8554AMDGPUAsmParser::getTokenKind() const {
8555 return getLexer().getKind();
8556}
8557
8558SMLoc
8559AMDGPUAsmParser::getLoc() const {
8560 return getToken().getLoc();
8561}
8562
8563StringRef
8564AMDGPUAsmParser::getTokenStr() const {
8565 return getToken().getString();
8566}
8567
8568void
8569AMDGPUAsmParser::lex() {
8570 Parser.Lex();
8571}
8572
8573const AMDGPUOperand &
8574AMDGPUAsmParser::findMCOperand(const OperandVector &Operands,
8575 int MCOpIdx) const {
8576 for (const auto &Op : Operands) {
8577 const AMDGPUOperand &TargetOp = static_cast<AMDGPUOperand &>(*Op);
8578 if (TargetOp.getMCOpIdx() == MCOpIdx)
8579 return TargetOp;
8580 }
8581 llvm_unreachable("no such MC operand!");
8582}
8583
8584SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8585 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8586}
8587
8588// Returns one of the given locations that comes later in the source.
8589SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8590 return a.getPointer() < b.getPointer() ? b : a;
8591}
8592
8593SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8594 int MCOpIdx) const {
8595 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8596}
8597
8598SMLoc
8599AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8600 const OperandVector &Operands) const {
8601 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8602 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8603 if (Test(Op))
8604 return Op.getStartLoc();
8605 }
8606 return getInstLoc(Operands);
8607}
8608
8609SMLoc
8610AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8611 const OperandVector &Operands) const {
8612 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8613 return getOperandLoc(Test, Operands);
8614}
8615
8616ParseStatus
8617AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8618 if (!trySkipToken(AsmToken::LCurly))
8619 return ParseStatus::NoMatch;
8620
8621 bool First = true;
8622 while (!trySkipToken(AsmToken::RCurly)) {
8623 if (!First &&
8624 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8625 return ParseStatus::Failure;
8626
8627 StringRef Id = getTokenStr();
8628 SMLoc IdLoc = getLoc();
8629 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8630 !skipToken(AsmToken::Colon, "colon expected"))
8631 return ParseStatus::Failure;
8632
8633 const auto *I =
8634 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8635 if (I == Fields.end())
8636 return Error(IdLoc, "unknown field");
8637 if ((*I)->IsDefined)
8638 return Error(IdLoc, "duplicate field");
8639
8640 // TODO: Support symbolic values.
8641 (*I)->Loc = getLoc();
8642 if (!parseExpr((*I)->Val))
8643 return ParseStatus::Failure;
8644 (*I)->IsDefined = true;
8645
8646 First = false;
8647 }
8648 return ParseStatus::Success;
8649}
8650
8651bool AMDGPUAsmParser::validateStructuredOpFields(
8653 return all_of(Fields, [this](const StructuredOpField *F) {
8654 return F->validate(*this);
8655 });
8656}
8657
8658//===----------------------------------------------------------------------===//
8659// swizzle
8660//===----------------------------------------------------------------------===//
8661
8663static unsigned
8664encodeBitmaskPerm(const unsigned AndMask,
8665 const unsigned OrMask,
8666 const unsigned XorMask) {
8667 using namespace llvm::AMDGPU::Swizzle;
8668
8669 return BITMASK_PERM_ENC |
8670 (AndMask << BITMASK_AND_SHIFT) |
8671 (OrMask << BITMASK_OR_SHIFT) |
8672 (XorMask << BITMASK_XOR_SHIFT);
8673}
8674
8675bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8676 const unsigned MaxVal,
8677 const Twine &ErrMsg, SMLoc &Loc) {
8678 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8679 return false;
8680 }
8681 Loc = getLoc();
8682 if (!parseExpr(Op)) {
8683 return false;
8684 }
8685 if (Op < MinVal || Op > MaxVal) {
8686 Error(Loc, ErrMsg);
8687 return false;
8688 }
8689
8690 return true;
8691}
8692
8693bool
8694AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8695 const unsigned MinVal,
8696 const unsigned MaxVal,
8697 const StringRef ErrMsg) {
8698 SMLoc Loc;
8699 for (unsigned i = 0; i < OpNum; ++i) {
8700 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8701 return false;
8702 }
8703
8704 return true;
8705}
8706
8707bool
8708AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8709 using namespace llvm::AMDGPU::Swizzle;
8710
8711 int64_t Lane[LANE_NUM];
8712 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8713 "expected a 2-bit lane id")) {
8715 for (unsigned I = 0; I < LANE_NUM; ++I) {
8716 Imm |= Lane[I] << (LANE_SHIFT * I);
8717 }
8718 return true;
8719 }
8720 return false;
8721}
8722
8723bool
8724AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8725 using namespace llvm::AMDGPU::Swizzle;
8726
8727 SMLoc Loc;
8728 int64_t GroupSize;
8729 int64_t LaneIdx;
8730
8731 if (!parseSwizzleOperand(GroupSize,
8732 2, 32,
8733 "group size must be in the interval [2,32]",
8734 Loc)) {
8735 return false;
8736 }
8737 if (!isPowerOf2_64(GroupSize)) {
8738 Error(Loc, "group size must be a power of two");
8739 return false;
8740 }
8741 if (parseSwizzleOperand(LaneIdx,
8742 0, GroupSize - 1,
8743 "lane id must be in the interval [0,group size - 1]",
8744 Loc)) {
8745 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8746 return true;
8747 }
8748 return false;
8749}
8750
8751bool
8752AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8753 using namespace llvm::AMDGPU::Swizzle;
8754
8755 SMLoc Loc;
8756 int64_t GroupSize;
8757
8758 if (!parseSwizzleOperand(GroupSize,
8759 2, 32,
8760 "group size must be in the interval [2,32]",
8761 Loc)) {
8762 return false;
8763 }
8764 if (!isPowerOf2_64(GroupSize)) {
8765 Error(Loc, "group size must be a power of two");
8766 return false;
8767 }
8768
8769 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8770 return true;
8771}
8772
8773bool
8774AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8775 using namespace llvm::AMDGPU::Swizzle;
8776
8777 SMLoc Loc;
8778 int64_t GroupSize;
8779
8780 if (!parseSwizzleOperand(GroupSize,
8781 1, 16,
8782 "group size must be in the interval [1,16]",
8783 Loc)) {
8784 return false;
8785 }
8786 if (!isPowerOf2_64(GroupSize)) {
8787 Error(Loc, "group size must be a power of two");
8788 return false;
8789 }
8790
8791 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8792 return true;
8793}
8794
8795bool
8796AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8797 using namespace llvm::AMDGPU::Swizzle;
8798
8799 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8800 return false;
8801 }
8802
8803 StringRef Ctl;
8804 SMLoc StrLoc = getLoc();
8805 if (!parseString(Ctl)) {
8806 return false;
8807 }
8808 if (Ctl.size() != BITMASK_WIDTH) {
8809 Error(StrLoc, "expected a 5-character mask");
8810 return false;
8811 }
8812
8813 unsigned AndMask = 0;
8814 unsigned OrMask = 0;
8815 unsigned XorMask = 0;
8816
8817 for (size_t i = 0; i < Ctl.size(); ++i) {
8818 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8819 switch(Ctl[i]) {
8820 default:
8821 Error(StrLoc, "invalid mask");
8822 return false;
8823 case '0':
8824 break;
8825 case '1':
8826 OrMask |= Mask;
8827 break;
8828 case 'p':
8829 AndMask |= Mask;
8830 break;
8831 case 'i':
8832 AndMask |= Mask;
8833 XorMask |= Mask;
8834 break;
8835 }
8836 }
8837
8838 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8839 return true;
8840}
8841
8842bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8843 using namespace llvm::AMDGPU::Swizzle;
8844
8845 if (!AMDGPU::isGFX9Plus(getSTI())) {
8846 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8847 return false;
8848 }
8849
8850 int64_t Swizzle;
8851 SMLoc Loc;
8852 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8853 "FFT swizzle must be in the interval [0," +
8854 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8855 Loc))
8856 return false;
8857
8858 Imm = FFT_MODE_ENC | Swizzle;
8859 return true;
8860}
8861
8862bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8863 using namespace llvm::AMDGPU::Swizzle;
8864
8865 if (!AMDGPU::isGFX9Plus(getSTI())) {
8866 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8867 return false;
8868 }
8869
8870 SMLoc Loc;
8871 int64_t Direction;
8872
8873 if (!parseSwizzleOperand(Direction, 0, 1,
8874 "direction must be 0 (left) or 1 (right)", Loc))
8875 return false;
8876
8877 int64_t RotateSize;
8878 if (!parseSwizzleOperand(
8879 RotateSize, 0, ROTATE_MAX_SIZE,
8880 "number of threads to rotate must be in the interval [0," +
8881 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8882 Loc))
8883 return false;
8884
8886 (RotateSize << ROTATE_SIZE_SHIFT);
8887 return true;
8888}
8889
8890bool
8891AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8892
8893 SMLoc OffsetLoc = getLoc();
8894
8895 if (!parseExpr(Imm, "a swizzle macro")) {
8896 return false;
8897 }
8898 if (!isUInt<16>(Imm)) {
8899 Error(OffsetLoc, "expected a 16-bit offset");
8900 return false;
8901 }
8902 return true;
8903}
8904
8905bool
8906AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8907 using namespace llvm::AMDGPU::Swizzle;
8908
8909 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8910
8911 SMLoc ModeLoc = getLoc();
8912 bool Ok = false;
8913
8914 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8915 Ok = parseSwizzleQuadPerm(Imm);
8916 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8917 Ok = parseSwizzleBitmaskPerm(Imm);
8918 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8919 Ok = parseSwizzleBroadcast(Imm);
8920 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8921 Ok = parseSwizzleSwap(Imm);
8922 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8923 Ok = parseSwizzleReverse(Imm);
8924 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8925 Ok = parseSwizzleFFT(Imm);
8926 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8927 Ok = parseSwizzleRotate(Imm);
8928 } else {
8929 Error(ModeLoc, "expected a swizzle mode");
8930 }
8931
8932 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8933 }
8934
8935 return false;
8936}
8937
8938ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8939 SMLoc S = getLoc();
8940 int64_t Imm = 0;
8941
8942 if (trySkipId("offset")) {
8943
8944 bool Ok = false;
8945 if (skipToken(AsmToken::Colon, "expected a colon")) {
8946 if (trySkipId("swizzle")) {
8947 Ok = parseSwizzleMacro(Imm);
8948 } else {
8949 Ok = parseSwizzleOffset(Imm);
8950 }
8951 }
8952
8953 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8954
8956 }
8957 return ParseStatus::NoMatch;
8958}
8959
8960bool
8961AMDGPUOperand::isSwizzle() const {
8962 return isImmTy(ImmTySwizzle);
8963}
8964
8965//===----------------------------------------------------------------------===//
8966// VGPR Index Mode
8967//===----------------------------------------------------------------------===//
8968
8969int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8970
8971 using namespace llvm::AMDGPU::VGPRIndexMode;
8972
8973 if (trySkipToken(AsmToken::RParen)) {
8974 return OFF;
8975 }
8976
8977 int64_t Imm = 0;
8978
8979 while (true) {
8980 unsigned Mode = 0;
8981 SMLoc S = getLoc();
8982
8983 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8984 if (trySkipId(IdSymbolic[ModeId])) {
8985 Mode = 1 << ModeId;
8986 break;
8987 }
8988 }
8989
8990 if (Mode == 0) {
8991 Error(S, (Imm == 0)?
8992 "expected a VGPR index mode or a closing parenthesis" :
8993 "expected a VGPR index mode");
8994 return UNDEF;
8995 }
8996
8997 if (Imm & Mode) {
8998 Error(S, "duplicate VGPR index mode");
8999 return UNDEF;
9000 }
9001 Imm |= Mode;
9002
9003 if (trySkipToken(AsmToken::RParen))
9004 break;
9005 if (!skipToken(AsmToken::Comma,
9006 "expected a comma or a closing parenthesis"))
9007 return UNDEF;
9008 }
9009
9010 return Imm;
9011}
9012
9013ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9014
9015 using namespace llvm::AMDGPU::VGPRIndexMode;
9016
9017 int64_t Imm = 0;
9018 SMLoc S = getLoc();
9019
9020 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9021 Imm = parseGPRIdxMacro();
9022 if (Imm == UNDEF)
9023 return ParseStatus::Failure;
9024 } else {
9025 if (getParser().parseAbsoluteExpression(Imm))
9026 return ParseStatus::Failure;
9027 if (Imm < 0 || !isUInt<4>(Imm))
9028 return Error(S, "invalid immediate: only 4-bit values are legal");
9029 }
9030
9031 Operands.push_back(
9032 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9033 return ParseStatus::Success;
9034}
9035
9036bool AMDGPUOperand::isGPRIdxMode() const {
9037 return isImmTy(ImmTyGprIdxMode);
9038}
9039
9040//===----------------------------------------------------------------------===//
9041// sopp branch targets
9042//===----------------------------------------------------------------------===//
9043
9044ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9045
9046 // Make sure we are not parsing something
9047 // that looks like a label or an expression but is not.
9048 // This will improve error messages.
9049 if (isRegister() || isModifier())
9050 return ParseStatus::NoMatch;
9051
9052 if (!parseExpr(Operands))
9053 return ParseStatus::Failure;
9054
9055 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9056 assert(Opr.isImm() || Opr.isExpr());
9057 SMLoc Loc = Opr.getStartLoc();
9058
9059 // Currently we do not support arbitrary expressions as branch targets.
9060 // Only labels and absolute expressions are accepted.
9061 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9062 Error(Loc, "expected an absolute expression or a label");
9063 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9064 Error(Loc, "expected a 16-bit signed jump offset");
9065 }
9066
9067 return ParseStatus::Success;
9068}
9069
9070//===----------------------------------------------------------------------===//
9071// Boolean holding registers
9072//===----------------------------------------------------------------------===//
9073
9074ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9075 return parseReg(Operands);
9076}
9077
9078//===----------------------------------------------------------------------===//
9079// mubuf
9080//===----------------------------------------------------------------------===//
9081
9082void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9083 const OperandVector &Operands,
9084 bool IsAtomic) {
9085 OptionalImmIndexMap OptionalIdx;
9086 unsigned FirstOperandIdx = 1;
9087 bool IsAtomicReturn = false;
9088
9089 if (IsAtomic) {
9090 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9092 }
9093
9094 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9096
9097 // Add the register arguments
9098 if (Op.isReg()) {
9099 Op.addRegOperands(Inst, 1);
9100 // Insert a tied src for atomic return dst.
9101 // This cannot be postponed as subsequent calls to
9102 // addImmOperands rely on correct number of MC operands.
9103 if (IsAtomicReturn && i == FirstOperandIdx)
9104 Op.addRegOperands(Inst, 1);
9105 continue;
9106 }
9107
9108 // Handle the case where soffset is an immediate
9109 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9110 Op.addImmOperands(Inst, 1);
9111 continue;
9112 }
9113
9114 // Handle tokens like 'offen' which are sometimes hard-coded into the
9115 // asm string. There are no MCInst operands for these.
9116 if (Op.isToken()) {
9117 continue;
9118 }
9119 assert(Op.isImm());
9120
9121 // Handle optional arguments
9122 OptionalIdx[Op.getImmTy()] = i;
9123 }
9124
9125 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9126 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9127 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9128 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9130}
9131
9132//===----------------------------------------------------------------------===//
9133// smrd
9134//===----------------------------------------------------------------------===//
9135
9136bool AMDGPUOperand::isSMRDOffset8() const {
9137 return isImmLiteral() && isUInt<8>(getImm());
9138}
9139
9140bool AMDGPUOperand::isSMEMOffset() const {
9141 // Offset range is checked later by validator.
9142 return isImmLiteral();
9143}
9144
9145bool AMDGPUOperand::isSMRDLiteralOffset() const {
9146 // 32-bit literals are only supported on CI and we only want to use them
9147 // when the offset is > 8-bits.
9148 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9149}
9150
9151//===----------------------------------------------------------------------===//
9152// vop3
9153//===----------------------------------------------------------------------===//
9154
9155static bool ConvertOmodMul(int64_t &Mul) {
9156 if (Mul != 1 && Mul != 2 && Mul != 4)
9157 return false;
9158
9159 Mul >>= 1;
9160 return true;
9161}
9162
9163static bool ConvertOmodDiv(int64_t &Div) {
9164 if (Div == 1) {
9165 Div = 0;
9166 return true;
9167 }
9168
9169 if (Div == 2) {
9170 Div = 3;
9171 return true;
9172 }
9173
9174 return false;
9175}
9176
9177// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9178// This is intentional and ensures compatibility with sp3.
9179// See bug 35397 for details.
9180bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9181 if (BoundCtrl == 0 || BoundCtrl == 1) {
9182 if (!isGFX11Plus())
9183 BoundCtrl = 1;
9184 return true;
9185 }
9186 return false;
9187}
9188
9189void AMDGPUAsmParser::onBeginOfFile() {
9190 if (!getParser().getStreamer().getTargetStreamer() ||
9191 getSTI().getTargetTriple().getArch() == Triple::r600)
9192 return;
9193
9194 if (!getTargetStreamer().getTargetID())
9195 getTargetStreamer().initializeTargetID(getSTI(),
9196 getSTI().getFeatureString());
9197
9198 if (isHsaAbi(getSTI()))
9199 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9200}
9201
9202/// Parse AMDGPU specific expressions.
9203///
9204/// expr ::= or(expr, ...) |
9205/// max(expr, ...)
9206///
9207bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9208 using AGVK = AMDGPUMCExpr::VariantKind;
9209
9210 if (isToken(AsmToken::Identifier)) {
9211 StringRef TokenId = getTokenStr();
9212 AGVK VK = StringSwitch<AGVK>(TokenId)
9213 .Case("max", AGVK::AGVK_Max)
9214 .Case("or", AGVK::AGVK_Or)
9215 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9216 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9217 .Case("alignto", AGVK::AGVK_AlignTo)
9218 .Case("occupancy", AGVK::AGVK_Occupancy)
9219 .Default(AGVK::AGVK_None);
9220
9221 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9223 uint64_t CommaCount = 0;
9224 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9225 lex(); // Eat '('
9226 while (true) {
9227 if (trySkipToken(AsmToken::RParen)) {
9228 if (Exprs.empty()) {
9229 Error(getToken().getLoc(),
9230 "empty " + Twine(TokenId) + " expression");
9231 return true;
9232 }
9233 if (CommaCount + 1 != Exprs.size()) {
9234 Error(getToken().getLoc(),
9235 "mismatch of commas in " + Twine(TokenId) + " expression");
9236 return true;
9237 }
9238 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9239 return false;
9240 }
9241 const MCExpr *Expr;
9242 if (getParser().parseExpression(Expr, EndLoc))
9243 return true;
9244 Exprs.push_back(Expr);
9245 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9246 if (LastTokenWasComma)
9247 CommaCount++;
9248 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9249 Error(getToken().getLoc(),
9250 "unexpected token in " + Twine(TokenId) + " expression");
9251 return true;
9252 }
9253 }
9254 }
9255 }
9256 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9257}
9258
9259ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9260 StringRef Name = getTokenStr();
9261 if (Name == "mul") {
9262 return parseIntWithPrefix("mul", Operands,
9263 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9264 }
9265
9266 if (Name == "div") {
9267 return parseIntWithPrefix("div", Operands,
9268 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9269 }
9270
9271 return ParseStatus::NoMatch;
9272}
9273
9274// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9275// the number of src operands present, then copies that bit into src0_modifiers.
9276static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9277 int Opc = Inst.getOpcode();
9278 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9279 if (OpSelIdx == -1)
9280 return;
9281
9282 int SrcNum;
9283 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9284 AMDGPU::OpName::src2};
9285 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9286 ++SrcNum)
9287 ;
9288 assert(SrcNum > 0);
9289
9290 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9291
9292 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9293 if (DstIdx == -1)
9294 return;
9295
9296 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9297 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9298 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9299 if (DstOp.isReg() &&
9300 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9301 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9302 ModVal |= SISrcMods::DST_OP_SEL;
9303 } else {
9304 if ((OpSel & (1 << SrcNum)) != 0)
9305 ModVal |= SISrcMods::DST_OP_SEL;
9306 }
9307 Inst.getOperand(ModIdx).setImm(ModVal);
9308}
9309
9310void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9311 const OperandVector &Operands) {
9312 cvtVOP3P(Inst, Operands);
9313 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9314}
9315
9316void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9317 OptionalImmIndexMap &OptionalIdx) {
9318 cvtVOP3P(Inst, Operands, OptionalIdx);
9319 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9320}
9321
9322static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9323 return
9324 // 1. This operand is input modifiers
9325 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9326 // 2. This is not last operand
9327 && Desc.NumOperands > (OpNum + 1)
9328 // 3. Next operand is register class
9329 && Desc.operands()[OpNum + 1].RegClass != -1
9330 // 4. Next register is not tied to any other operand
9331 && Desc.getOperandConstraint(OpNum + 1,
9333}
9334
9335void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9336 unsigned Opc = Inst.getOpcode();
9337 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9338 AMDGPU::OpName::src2};
9339 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9340 AMDGPU::OpName::src1_modifiers,
9341 AMDGPU::OpName::src2_modifiers};
9342 for (int J = 0; J < 3; ++J) {
9343 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9344 if (OpIdx == -1)
9345 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9346 // no src1. So continue instead of break.
9347 continue;
9348
9349 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9350 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9351
9352 if ((OpSel & (1 << J)) != 0)
9353 ModVal |= SISrcMods::OP_SEL_0;
9354 // op_sel[3] is encoded in src0_modifiers.
9355 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9356 ModVal |= SISrcMods::DST_OP_SEL;
9357
9358 Inst.getOperand(ModIdx).setImm(ModVal);
9359 }
9360}
9361
9362void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9363{
9364 OptionalImmIndexMap OptionalIdx;
9365 unsigned Opc = Inst.getOpcode();
9366
9367 unsigned I = 1;
9368 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9369 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9370 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9371 }
9372
9373 for (unsigned E = Operands.size(); I != E; ++I) {
9374 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9376 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9377 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9378 Op.isInterpAttrChan()) {
9379 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9380 } else if (Op.isImmModifier()) {
9381 OptionalIdx[Op.getImmTy()] = I;
9382 } else {
9383 llvm_unreachable("unhandled operand type");
9384 }
9385 }
9386
9387 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9388 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9389 AMDGPUOperand::ImmTyHigh);
9390
9391 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9392 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9393 AMDGPUOperand::ImmTyClamp);
9394
9395 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9396 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9397 AMDGPUOperand::ImmTyOModSI);
9398
9399 // Some v_interp instructions use op_sel[3] for dst.
9400 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9401 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9402 AMDGPUOperand::ImmTyOpSel);
9403 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9404 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9405
9406 cvtOpSelHelper(Inst, OpSel);
9407 }
9408}
9409
9410void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9411{
9412 OptionalImmIndexMap OptionalIdx;
9413 unsigned Opc = Inst.getOpcode();
9414
9415 unsigned I = 1;
9416 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9417 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9418 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9419 }
9420
9421 for (unsigned E = Operands.size(); I != E; ++I) {
9422 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9424 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9425 } else if (Op.isImmModifier()) {
9426 OptionalIdx[Op.getImmTy()] = I;
9427 } else {
9428 llvm_unreachable("unhandled operand type");
9429 }
9430 }
9431
9432 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9433
9434 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9435 if (OpSelIdx != -1)
9436 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9437
9438 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9439
9440 if (OpSelIdx == -1)
9441 return;
9442
9443 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9444 cvtOpSelHelper(Inst, OpSel);
9445}
9446
9447void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9448 const OperandVector &Operands) {
9449 OptionalImmIndexMap OptionalIdx;
9450 unsigned Opc = Inst.getOpcode();
9451 unsigned I = 1;
9452 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9453
9454 const MCInstrDesc &Desc = MII.get(Opc);
9455
9456 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9457 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9458
9459 for (unsigned E = Operands.size(); I != E; ++I) {
9460 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9461 int NumOperands = Inst.getNumOperands();
9462 // The order of operands in MCInst and parsed operands are different.
9463 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9464 // indices for parsing scale values correctly.
9465 if (NumOperands == CbszOpIdx) {
9468 }
9469 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9470 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9471 } else if (Op.isImmModifier()) {
9472 OptionalIdx[Op.getImmTy()] = I;
9473 } else {
9474 Op.addRegOrImmOperands(Inst, 1);
9475 }
9476 }
9477
9478 // Insert CBSZ and BLGP operands for F8F6F4 variants
9479 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9480 if (CbszIdx != OptionalIdx.end()) {
9481 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9482 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9483 }
9484
9485 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9486 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9487 if (BlgpIdx != OptionalIdx.end()) {
9488 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9489 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9490 }
9491
9492 // Add dummy src_modifiers
9495
9496 // Handle op_sel fields
9497
9498 unsigned OpSel = 0;
9499 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9500 if (OpselIdx != OptionalIdx.end()) {
9501 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9502 .getImm();
9503 }
9504
9505 unsigned OpSelHi = 0;
9506 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9507 if (OpselHiIdx != OptionalIdx.end()) {
9508 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9509 .getImm();
9510 }
9511 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9512 AMDGPU::OpName::src1_modifiers};
9513
9514 for (unsigned J = 0; J < 2; ++J) {
9515 unsigned ModVal = 0;
9516 if (OpSel & (1 << J))
9517 ModVal |= SISrcMods::OP_SEL_0;
9518 if (OpSelHi & (1 << J))
9519 ModVal |= SISrcMods::OP_SEL_1;
9520
9521 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9522 Inst.getOperand(ModIdx).setImm(ModVal);
9523 }
9524}
9525
9526void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9527 OptionalImmIndexMap &OptionalIdx) {
9528 unsigned Opc = Inst.getOpcode();
9529
9530 unsigned I = 1;
9531 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9532 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9533 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9534 }
9535
9536 for (unsigned E = Operands.size(); I != E; ++I) {
9537 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9539 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9540 } else if (Op.isImmModifier()) {
9541 OptionalIdx[Op.getImmTy()] = I;
9542 } else {
9543 Op.addRegOrImmOperands(Inst, 1);
9544 }
9545 }
9546
9547 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9548 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9549 AMDGPUOperand::ImmTyScaleSel);
9550
9551 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9552 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9553 AMDGPUOperand::ImmTyClamp);
9554
9555 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9556 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9557 Inst.addOperand(Inst.getOperand(0));
9558 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9559 AMDGPUOperand::ImmTyByteSel);
9560 }
9561
9562 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9563 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9564 AMDGPUOperand::ImmTyOModSI);
9565
9566 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9567 // it has src2 register operand that is tied to dst operand
9568 // we don't allow modifiers for this operand in assembler so src2_modifiers
9569 // should be 0.
9570 if (isMAC(Opc)) {
9571 auto *it = Inst.begin();
9572 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9573 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9574 ++it;
9575 // Copy the operand to ensure it's not invalidated when Inst grows.
9576 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9577 }
9578}
9579
9580void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9581 OptionalImmIndexMap OptionalIdx;
9582 cvtVOP3(Inst, Operands, OptionalIdx);
9583}
9584
9585void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9586 OptionalImmIndexMap &OptIdx) {
9587 const int Opc = Inst.getOpcode();
9588 const MCInstrDesc &Desc = MII.get(Opc);
9589
9590 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9591
9592 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9593 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9594 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9595 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9596 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9597 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9598 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9599 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9600 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9601 Inst.addOperand(Inst.getOperand(0));
9602 }
9603
9604 // Append vdst_in only if a previous converter (cvtVOP3DPP for DPP variants,
9605 // cvtVOP3 for byte_sel variants) hasn't already placed it. Use the position
9606 // of the named operand to detect that, the same way cvtVOP3DPP does
9607 // internally.
9608 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9609 if (VdstInIdx != -1 && VdstInIdx == static_cast<int>(Inst.getNumOperands()))
9610 Inst.addOperand(Inst.getOperand(0));
9611
9612 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9613 if (BitOp3Idx != -1) {
9614 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9615 }
9616
9617 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9618 // instruction, and then figure out where to actually put the modifiers
9619
9620 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9621 if (OpSelIdx != -1) {
9622 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9623 }
9624
9625 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9626 if (OpSelHiIdx != -1) {
9627 int DefaultVal = IsPacked ? -1 : 0;
9628 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9629 DefaultVal);
9630 }
9631
9632 int MatrixAFMTIdx =
9633 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9634 if (MatrixAFMTIdx != -1) {
9635 addOptionalImmOperand(Inst, Operands, OptIdx,
9636 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9637 }
9638
9639 int MatrixBFMTIdx =
9640 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9641 if (MatrixBFMTIdx != -1) {
9642 addOptionalImmOperand(Inst, Operands, OptIdx,
9643 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9644 }
9645
9646 int MatrixAScaleIdx =
9647 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9648 if (MatrixAScaleIdx != -1) {
9649 addOptionalImmOperand(Inst, Operands, OptIdx,
9650 AMDGPUOperand::ImmTyMatrixAScale, 0);
9651 }
9652
9653 int MatrixBScaleIdx =
9654 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9655 if (MatrixBScaleIdx != -1) {
9656 addOptionalImmOperand(Inst, Operands, OptIdx,
9657 AMDGPUOperand::ImmTyMatrixBScale, 0);
9658 }
9659
9660 int MatrixAScaleFmtIdx =
9661 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9662 if (MatrixAScaleFmtIdx != -1) {
9663 addOptionalImmOperand(Inst, Operands, OptIdx,
9664 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9665 }
9666
9667 int MatrixBScaleFmtIdx =
9668 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9669 if (MatrixBScaleFmtIdx != -1) {
9670 addOptionalImmOperand(Inst, Operands, OptIdx,
9671 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9672 }
9673
9674 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9675 addOptionalImmOperand(Inst, Operands, OptIdx,
9676 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9677
9678 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9679 addOptionalImmOperand(Inst, Operands, OptIdx,
9680 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9681
9682 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9683 if (NegLoIdx != -1)
9684 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9685
9686 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9687 if (NegHiIdx != -1)
9688 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9689
9690 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9691 AMDGPU::OpName::src2};
9692 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9693 AMDGPU::OpName::src1_modifiers,
9694 AMDGPU::OpName::src2_modifiers};
9695
9696 unsigned OpSel = 0;
9697 unsigned OpSelHi = 0;
9698 unsigned NegLo = 0;
9699 unsigned NegHi = 0;
9700
9701 if (OpSelIdx != -1)
9702 OpSel = Inst.getOperand(OpSelIdx).getImm();
9703
9704 if (OpSelHiIdx != -1)
9705 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9706
9707 if (NegLoIdx != -1)
9708 NegLo = Inst.getOperand(NegLoIdx).getImm();
9709
9710 if (NegHiIdx != -1)
9711 NegHi = Inst.getOperand(NegHiIdx).getImm();
9712
9713 for (int J = 0; J < 3; ++J) {
9714 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9715 if (OpIdx == -1)
9716 break;
9717
9718 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9719
9720 if (ModIdx == -1)
9721 continue;
9722
9723 uint32_t ModVal = 0;
9724
9725 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9726 if (SrcOp.isReg() && getMRI()
9727 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9728 .contains(SrcOp.getReg())) {
9729 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9730 if (VGPRSuffixIsHi)
9731 ModVal |= SISrcMods::OP_SEL_0;
9732 } else {
9733 if ((OpSel & (1 << J)) != 0)
9734 ModVal |= SISrcMods::OP_SEL_0;
9735 }
9736
9737 if ((OpSelHi & (1 << J)) != 0)
9738 ModVal |= SISrcMods::OP_SEL_1;
9739
9740 if ((NegLo & (1 << J)) != 0)
9741 ModVal |= SISrcMods::NEG;
9742
9743 if ((NegHi & (1 << J)) != 0)
9744 ModVal |= SISrcMods::NEG_HI;
9745
9746 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9747 }
9748}
9749
9750void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9751 OptionalImmIndexMap OptIdx;
9752 cvtVOP3(Inst, Operands, OptIdx);
9753 cvtVOP3P(Inst, Operands, OptIdx);
9754}
9755
9757 unsigned i, unsigned Opc,
9758 AMDGPU::OpName OpName) {
9759 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9760 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9761 else
9762 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9763}
9764
9765void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9766 unsigned Opc = Inst.getOpcode();
9767
9768 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9769 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9770 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9771 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9772 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9773
9774 OptionalImmIndexMap OptIdx;
9775 for (unsigned i = 5; i < Operands.size(); ++i) {
9776 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9777 OptIdx[Op.getImmTy()] = i;
9778 }
9779
9780 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9781 addOptionalImmOperand(Inst, Operands, OptIdx,
9782 AMDGPUOperand::ImmTyIndexKey8bit);
9783
9784 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9785 addOptionalImmOperand(Inst, Operands, OptIdx,
9786 AMDGPUOperand::ImmTyIndexKey16bit);
9787
9788 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9789 addOptionalImmOperand(Inst, Operands, OptIdx,
9790 AMDGPUOperand::ImmTyIndexKey32bit);
9791
9792 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9793 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9794
9795 cvtVOP3P(Inst, Operands, OptIdx);
9796}
9797
9798//===----------------------------------------------------------------------===//
9799// VOPD
9800//===----------------------------------------------------------------------===//
9801
9802ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9803 if (!hasVOPD(getSTI()))
9804 return ParseStatus::NoMatch;
9805
9806 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9807 SMLoc S = getLoc();
9808 lex();
9809 lex();
9810 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9811 SMLoc OpYLoc = getLoc();
9812 StringRef OpYName;
9813 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9814 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9815 return ParseStatus::Success;
9816 }
9817 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9818 }
9819 return ParseStatus::NoMatch;
9820}
9821
9822// Create VOPD MCInst operands using parsed assembler operands.
9823void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9824 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9825
9826 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9827 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9829 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9830 return;
9831 }
9832 if (Op.isReg()) {
9833 Op.addRegOperands(Inst, 1);
9834 return;
9835 }
9836 if (Op.isImm()) {
9837 Op.addImmOperands(Inst, 1);
9838 return;
9839 }
9840 llvm_unreachable("Unhandled operand type in cvtVOPD");
9841 };
9842
9843 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9844
9845 // MCInst operands are ordered as follows:
9846 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9847
9848 for (auto CompIdx : VOPD::COMPONENTS) {
9849 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9850 }
9851
9852 for (auto CompIdx : VOPD::COMPONENTS) {
9853 const auto &CInfo = InstInfo[CompIdx];
9854 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9855 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9856 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9857 if (CInfo.hasSrc2Acc())
9858 addOp(CInfo.getIndexOfDstInParsedOperands());
9859 }
9860
9861 int BitOp3Idx =
9862 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9863 if (BitOp3Idx != -1) {
9864 OptionalImmIndexMap OptIdx;
9865 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9866 if (Op.isImm())
9867 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9868
9869 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9870 }
9871}
9872
9873//===----------------------------------------------------------------------===//
9874// dpp
9875//===----------------------------------------------------------------------===//
9876
9877bool AMDGPUOperand::isDPP8() const {
9878 return isImmTy(ImmTyDPP8);
9879}
9880
9881bool AMDGPUOperand::isDPPCtrl() const {
9882 using namespace AMDGPU::DPP;
9883
9884 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9885 if (result) {
9886 int64_t Imm = getImm();
9887 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9888 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9889 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9890 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9891 (Imm == DppCtrl::WAVE_SHL1) ||
9892 (Imm == DppCtrl::WAVE_ROL1) ||
9893 (Imm == DppCtrl::WAVE_SHR1) ||
9894 (Imm == DppCtrl::WAVE_ROR1) ||
9895 (Imm == DppCtrl::ROW_MIRROR) ||
9896 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9897 (Imm == DppCtrl::BCAST15) ||
9898 (Imm == DppCtrl::BCAST31) ||
9899 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9900 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9901 }
9902 return false;
9903}
9904
9905//===----------------------------------------------------------------------===//
9906// mAI
9907//===----------------------------------------------------------------------===//
9908
9909bool AMDGPUOperand::isBLGP() const {
9910 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9911}
9912
9913bool AMDGPUOperand::isS16Imm() const {
9914 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9915}
9916
9917bool AMDGPUOperand::isU16Imm() const {
9918 return isImmLiteral() && isUInt<16>(getImm());
9919}
9920
9921//===----------------------------------------------------------------------===//
9922// dim
9923//===----------------------------------------------------------------------===//
9924
9925bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9926 // We want to allow "dim:1D" etc.,
9927 // but the initial 1 is tokenized as an integer.
9928 std::string Token;
9929 if (isToken(AsmToken::Integer)) {
9930 SMLoc Loc = getToken().getEndLoc();
9931 Token = std::string(getTokenStr());
9932 lex();
9933 if (getLoc() != Loc)
9934 return false;
9935 }
9936
9937 StringRef Suffix;
9938 if (!parseId(Suffix))
9939 return false;
9940 Token += Suffix;
9941
9942 StringRef DimId = Token;
9943 DimId.consume_front("SQ_RSRC_IMG_");
9944
9945 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9946 if (!DimInfo)
9947 return false;
9948
9949 Encoding = DimInfo->Encoding;
9950 return true;
9951}
9952
9953ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9954 if (!isGFX10Plus())
9955 return ParseStatus::NoMatch;
9956
9957 SMLoc S = getLoc();
9958
9959 if (!trySkipId("dim", AsmToken::Colon))
9960 return ParseStatus::NoMatch;
9961
9962 unsigned Encoding;
9963 SMLoc Loc = getLoc();
9964 if (!parseDimId(Encoding))
9965 return Error(Loc, "invalid dim value");
9966
9967 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9968 AMDGPUOperand::ImmTyDim));
9969 return ParseStatus::Success;
9970}
9971
9972//===----------------------------------------------------------------------===//
9973// dpp
9974//===----------------------------------------------------------------------===//
9975
9976ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9977 SMLoc S = getLoc();
9978
9979 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9980 return ParseStatus::NoMatch;
9981
9982 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9983
9984 int64_t Sels[8];
9985
9986 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9987 return ParseStatus::Failure;
9988
9989 for (size_t i = 0; i < 8; ++i) {
9990 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9991 return ParseStatus::Failure;
9992
9993 SMLoc Loc = getLoc();
9994 if (getParser().parseAbsoluteExpression(Sels[i]))
9995 return ParseStatus::Failure;
9996 if (0 > Sels[i] || 7 < Sels[i])
9997 return Error(Loc, "expected a 3-bit value");
9998 }
9999
10000 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10001 return ParseStatus::Failure;
10002
10003 unsigned DPP8 = 0;
10004 for (size_t i = 0; i < 8; ++i)
10005 DPP8 |= (Sels[i] << (i * 3));
10006
10007 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10008 return ParseStatus::Success;
10009}
10010
10011bool
10012AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10013 const OperandVector &Operands) {
10014 if (Ctrl == "row_newbcast")
10015 return isGFX90A();
10016
10017 if (Ctrl == "row_share" ||
10018 Ctrl == "row_xmask")
10019 return isGFX10Plus();
10020
10021 if (Ctrl == "wave_shl" ||
10022 Ctrl == "wave_shr" ||
10023 Ctrl == "wave_rol" ||
10024 Ctrl == "wave_ror" ||
10025 Ctrl == "row_bcast")
10026 return isVI() || isGFX9();
10027
10028 return Ctrl == "row_mirror" ||
10029 Ctrl == "row_half_mirror" ||
10030 Ctrl == "quad_perm" ||
10031 Ctrl == "row_shl" ||
10032 Ctrl == "row_shr" ||
10033 Ctrl == "row_ror";
10034}
10035
10036int64_t
10037AMDGPUAsmParser::parseDPPCtrlPerm() {
10038 // quad_perm:[%d,%d,%d,%d]
10039
10040 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10041 return -1;
10042
10043 int64_t Val = 0;
10044 for (int i = 0; i < 4; ++i) {
10045 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10046 return -1;
10047
10048 int64_t Temp;
10049 SMLoc Loc = getLoc();
10050 if (getParser().parseAbsoluteExpression(Temp))
10051 return -1;
10052 if (Temp < 0 || Temp > 3) {
10053 Error(Loc, "expected a 2-bit value");
10054 return -1;
10055 }
10056
10057 Val += (Temp << i * 2);
10058 }
10059
10060 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10061 return -1;
10062
10063 return Val;
10064}
10065
10066int64_t
10067AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10068 using namespace AMDGPU::DPP;
10069
10070 // sel:%d
10071
10072 int64_t Val;
10073 SMLoc Loc = getLoc();
10074
10075 if (getParser().parseAbsoluteExpression(Val))
10076 return -1;
10077
10078 struct DppCtrlCheck {
10079 int64_t Ctrl;
10080 int Lo;
10081 int Hi;
10082 };
10083
10084 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10085 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10086 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10087 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10088 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10089 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10090 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10091 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10092 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10093 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10094 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10095 .Default({-1, 0, 0});
10096
10097 bool Valid;
10098 if (Check.Ctrl == -1) {
10099 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10100 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10101 } else {
10102 Valid = Check.Lo <= Val && Val <= Check.Hi;
10103 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10104 }
10105
10106 if (!Valid) {
10107 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10108 return -1;
10109 }
10110
10111 return Val;
10112}
10113
10114ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10115 using namespace AMDGPU::DPP;
10116
10117 if (!isToken(AsmToken::Identifier) ||
10118 !isSupportedDPPCtrl(getTokenStr(), Operands))
10119 return ParseStatus::NoMatch;
10120
10121 SMLoc S = getLoc();
10122 int64_t Val = -1;
10123 StringRef Ctrl;
10124
10125 parseId(Ctrl);
10126
10127 if (Ctrl == "row_mirror") {
10128 Val = DppCtrl::ROW_MIRROR;
10129 } else if (Ctrl == "row_half_mirror") {
10130 Val = DppCtrl::ROW_HALF_MIRROR;
10131 } else {
10132 if (skipToken(AsmToken::Colon, "expected a colon")) {
10133 if (Ctrl == "quad_perm") {
10134 Val = parseDPPCtrlPerm();
10135 } else {
10136 Val = parseDPPCtrlSel(Ctrl);
10137 }
10138 }
10139 }
10140
10141 if (Val == -1)
10142 return ParseStatus::Failure;
10143
10144 Operands.push_back(
10145 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10146 return ParseStatus::Success;
10147}
10148
10149void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10150 bool IsDPP8) {
10151 OptionalImmIndexMap OptionalIdx;
10152 unsigned Opc = Inst.getOpcode();
10153 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10154
10155 // MAC instructions are special because they have 'old'
10156 // operand which is not tied to dst (but assumed to be).
10157 // They also have dummy unused src2_modifiers.
10158 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10159 int Src2ModIdx =
10160 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10161 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10162 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10163
10164 unsigned I = 1;
10165 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10166 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10167 }
10168
10169 int Fi = 0;
10170 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10171 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10172 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10173 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10174 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10175
10176 for (unsigned E = Operands.size(); I != E; ++I) {
10177
10178 if (IsMAC) {
10179 int NumOperands = Inst.getNumOperands();
10180 if (OldIdx == NumOperands) {
10181 // Handle old operand
10182 constexpr int DST_IDX = 0;
10183 Inst.addOperand(Inst.getOperand(DST_IDX));
10184 } else if (Src2ModIdx == NumOperands) {
10185 // Add unused dummy src2_modifiers
10187 }
10188 }
10189
10190 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10191 Inst.addOperand(Inst.getOperand(0));
10192 }
10193
10194 if (IsVOP3CvtSrDpp) {
10195 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10197 Inst.addOperand(MCOperand::createReg(MCRegister()));
10198 }
10199 }
10200
10201 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10203 if (TiedTo != -1) {
10204 assert((unsigned)TiedTo < Inst.getNumOperands());
10205 // handle tied old or src2 for MAC instructions
10206 Inst.addOperand(Inst.getOperand(TiedTo));
10207 }
10208 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10209 // Add the register arguments
10210 if (IsDPP8 && Op.isDppFI()) {
10211 Fi = Op.getImm();
10212 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10213 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10214 } else if (Op.isReg()) {
10215 Op.addRegOperands(Inst, 1);
10216 } else if (Op.isImm() &&
10217 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10218 Op.addImmOperands(Inst, 1);
10219 } else if (Op.isImm()) {
10220 OptionalIdx[Op.getImmTy()] = I;
10221 } else {
10222 llvm_unreachable("unhandled operand type");
10223 }
10224 }
10225
10226 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10227 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10228 AMDGPUOperand::ImmTyClamp);
10229
10230 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10231 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10232 Inst.addOperand(Inst.getOperand(0));
10233 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10234 AMDGPUOperand::ImmTyByteSel);
10235 }
10236
10237 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10238 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10239
10240 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10241 cvtVOP3P(Inst, Operands, OptionalIdx);
10242 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10243 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10244 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10246 }
10247
10248 if (IsDPP8) {
10249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10250 using namespace llvm::AMDGPU::DPP;
10251 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10252 } else {
10253 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10254 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10255 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10256 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10257
10258 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10259 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10260 AMDGPUOperand::ImmTyDppFI);
10261 }
10262}
10263
10264void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10265 OptionalImmIndexMap OptionalIdx;
10266
10267 unsigned I = 1;
10268 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10269 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10270 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10271 }
10272
10273 int Fi = 0;
10274 for (unsigned E = Operands.size(); I != E; ++I) {
10275 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10277 if (TiedTo != -1) {
10278 assert((unsigned)TiedTo < Inst.getNumOperands());
10279 // handle tied old or src2 for MAC instructions
10280 Inst.addOperand(Inst.getOperand(TiedTo));
10281 }
10282 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10283 // Add the register arguments
10284 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10285 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10286 // Skip it.
10287 continue;
10288 }
10289
10290 if (IsDPP8) {
10291 if (Op.isDPP8()) {
10292 Op.addImmOperands(Inst, 1);
10293 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10294 Op.addRegWithFPInputModsOperands(Inst, 2);
10295 } else if (Op.isDppFI()) {
10296 Fi = Op.getImm();
10297 } else if (Op.isReg()) {
10298 Op.addRegOperands(Inst, 1);
10299 } else {
10300 llvm_unreachable("Invalid operand type");
10301 }
10302 } else {
10304 Op.addRegWithFPInputModsOperands(Inst, 2);
10305 } else if (Op.isReg()) {
10306 Op.addRegOperands(Inst, 1);
10307 } else if (Op.isDPPCtrl()) {
10308 Op.addImmOperands(Inst, 1);
10309 } else if (Op.isImm()) {
10310 // Handle optional arguments
10311 OptionalIdx[Op.getImmTy()] = I;
10312 } else {
10313 llvm_unreachable("Invalid operand type");
10314 }
10315 }
10316 }
10317
10318 if (IsDPP8) {
10319 using namespace llvm::AMDGPU::DPP;
10320 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10321 } else {
10322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10325 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10326 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10327 AMDGPUOperand::ImmTyDppFI);
10328 }
10329 }
10330}
10331
10332//===----------------------------------------------------------------------===//
10333// sdwa
10334//===----------------------------------------------------------------------===//
10335
10336ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10337 StringRef Prefix,
10338 AMDGPUOperand::ImmTy Type) {
10339 return parseStringOrIntWithPrefix(
10340 Operands, Prefix,
10341 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10342 Type);
10343}
10344
10345ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10346 return parseStringOrIntWithPrefix(
10347 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10348 AMDGPUOperand::ImmTySDWADstUnused);
10349}
10350
10351void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10352 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10353}
10354
10355void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10356 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10357}
10358
10359void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10360 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10361}
10362
10363void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10364 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10365}
10366
10367void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10368 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10369}
10370
10371void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10372 uint64_t BasicInstType,
10373 bool SkipDstVcc,
10374 bool SkipSrcVcc) {
10375 using namespace llvm::AMDGPU::SDWA;
10376
10377 OptionalImmIndexMap OptionalIdx;
10378 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10379 bool SkippedVcc = false;
10380
10381 unsigned I = 1;
10382 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10383 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10384 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10385 }
10386
10387 for (unsigned E = Operands.size(); I != E; ++I) {
10388 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10389 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10390 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10391 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10392 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10393 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10394 // Skip VCC only if we didn't skip it on previous iteration.
10395 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10396 if (BasicInstType == SIInstrFlags::VOP2 &&
10397 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10398 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10399 SkippedVcc = true;
10400 continue;
10401 }
10402 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10403 SkippedVcc = true;
10404 continue;
10405 }
10406 }
10408 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10409 } else if (Op.isImm()) {
10410 // Handle optional arguments
10411 OptionalIdx[Op.getImmTy()] = I;
10412 } else {
10413 llvm_unreachable("Invalid operand type");
10414 }
10415 SkippedVcc = false;
10416 }
10417
10418 const unsigned Opc = Inst.getOpcode();
10419 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10420 Opc != AMDGPU::V_NOP_sdwa_vi) {
10421 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10422 switch (BasicInstType) {
10423 case SIInstrFlags::VOP1:
10424 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10425 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10426 AMDGPUOperand::ImmTyClamp, 0);
10427
10428 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10429 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10430 AMDGPUOperand::ImmTyOModSI, 0);
10431
10432 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10433 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10434 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10435
10436 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10437 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10438 AMDGPUOperand::ImmTySDWADstUnused,
10439 DstUnused::UNUSED_PRESERVE);
10440
10441 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10442 break;
10443
10444 case SIInstrFlags::VOP2:
10445 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10446 AMDGPUOperand::ImmTyClamp, 0);
10447
10448 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10449 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10450
10451 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10453 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10455 break;
10456
10457 case SIInstrFlags::VOPC:
10458 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10459 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10460 AMDGPUOperand::ImmTyClamp, 0);
10461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10463 break;
10464
10465 default:
10466 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10467 }
10468 }
10469
10470 // special case v_mac_{f16, f32}:
10471 // it has src2 register operand that is tied to dst operand
10472 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10473 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10474 auto *it = Inst.begin();
10475 std::advance(
10476 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10477 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10478 }
10479}
10480
10481/// Force static initialization.
10482extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10487
10488#define GET_MATCHER_IMPLEMENTATION
10489#define GET_MNEMONIC_SPELL_CHECKER
10490#define GET_MNEMONIC_CHECKER
10491#include "AMDGPUGenAsmMatcher.inc"
10492
10493ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10494 unsigned MCK) {
10495 switch (MCK) {
10496 case MCK_addr64:
10497 return parseTokenOp("addr64", Operands);
10498 case MCK_done:
10499 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10500 case MCK_idxen:
10501 return parseTokenOp("idxen", Operands);
10502 case MCK_lds:
10503 return parseNamedBit("lds", Operands, AMDGPUOperand::ImmTyLDS,
10504 /*IgnoreNegative=*/true);
10505 case MCK_offen:
10506 return parseTokenOp("offen", Operands);
10507 case MCK_off:
10508 return parseTokenOp("off", Operands);
10509 case MCK_row_95_en:
10510 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10511 case MCK_gds:
10512 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10513 case MCK_tfe:
10514 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10515 }
10516 return tryCustomParseOperand(Operands, MCK);
10517}
10518
10519// This function should be defined after auto-generated include so that we have
10520// MatchClassKind enum defined
10521unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10522 unsigned Kind) {
10523 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10524 // But MatchInstructionImpl() expects to meet token and fails to validate
10525 // operand. This method checks if we are given immediate operand but expect to
10526 // get corresponding token.
10527 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10528 switch (Kind) {
10529 case MCK_addr64:
10530 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10531 case MCK_gds:
10532 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10533 case MCK_lds:
10534 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10535 case MCK_idxen:
10536 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10537 case MCK_offen:
10538 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10539 case MCK_tfe:
10540 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10541 case MCK_done:
10542 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10543 case MCK_row_95_en:
10544 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10545 case MCK_SSrc_b32:
10546 // When operands have expression values, they will return true for isToken,
10547 // because it is not possible to distinguish between a token and an
10548 // expression at parse time. MatchInstructionImpl() will always try to
10549 // match an operand as a token, when isToken returns true, and when the
10550 // name of the expression is not a valid token, the match will fail,
10551 // so we need to handle it here.
10552 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10553 case MCK_SSrc_f32:
10554 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10555 case MCK_SOPPBrTarget:
10556 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10557 case MCK_VReg32OrOff:
10558 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10559 case MCK_InterpSlot:
10560 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10561 case MCK_InterpAttr:
10562 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10563 case MCK_InterpAttrChan:
10564 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10565 case MCK_SReg_64:
10566 case MCK_SReg_64_XEXEC:
10567 // Null is defined as a 32-bit register but
10568 // it should also be enabled with 64-bit operands or larger.
10569 // The following code enables it for SReg_64 and larger operands
10570 // used as source and destination. Remaining source
10571 // operands are handled in isInlinableImm.
10572 case MCK_SReg_96:
10573 case MCK_SReg_128:
10574 case MCK_SReg_256:
10575 case MCK_SReg_512:
10576 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10577 default:
10578 return Match_InvalidOperand;
10579 }
10580}
10581
10582//===----------------------------------------------------------------------===//
10583// endpgm
10584//===----------------------------------------------------------------------===//
10585
10586ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10587 SMLoc S = getLoc();
10588 int64_t Imm = 0;
10589
10590 if (!parseExpr(Imm)) {
10591 // The operand is optional, if not present default to 0
10592 Imm = 0;
10593 }
10594
10595 if (!isUInt<16>(Imm))
10596 return Error(S, "expected a 16-bit value");
10597
10598 Operands.push_back(
10599 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10600 return ParseStatus::Success;
10601}
10602
10603bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10604
10605//===----------------------------------------------------------------------===//
10606// Split Barrier
10607//===----------------------------------------------------------------------===//
10608
10609bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5899
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1431
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:571
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1129
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:299
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:32
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...