LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyIndexKey32bit,
160 ImmTyDPP8,
161 ImmTyDppCtrl,
162 ImmTyDppRowMask,
163 ImmTyDppBankMask,
164 ImmTyDppBoundCtrl,
165 ImmTyDppFI,
166 ImmTySwizzle,
167 ImmTyGprIdxMode,
168 ImmTyHigh,
169 ImmTyBLGP,
170 ImmTyCBSZ,
171 ImmTyABID,
172 ImmTyEndpgm,
173 ImmTyWaitVDST,
174 ImmTyWaitEXP,
175 ImmTyWaitVAVDst,
176 ImmTyWaitVMVSrc,
177 ImmTyBitOp3,
178 ImmTyMatrixAFMT,
179 ImmTyMatrixBFMT,
180 ImmTyMatrixAScale,
181 ImmTyMatrixBScale,
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
184 ImmTyMatrixAReuse,
185 ImmTyMatrixBReuse,
186 ImmTyScaleSel,
187 ImmTyByteSel,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 MCRegister RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 // The index of the associated MCInst operand.
216 mutable int MCOpIdx = -1;
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 bool isInlinableImm(MVT type) const;
230 bool isLiteralImm(MVT type) const;
231
232 bool isRegKind() const {
233 return Kind == Register;
234 }
235
236 bool isReg() const override {
237 return isRegKind() && !hasModifiers();
238 }
239
240 bool isRegOrInline(unsigned RCID, MVT type) const {
241 return isRegClass(RCID) || isInlinableImm(type);
242 }
243
244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
246 }
247
248 bool isRegOrImmWithInt16InputMods() const {
249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250 }
251
252 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrInlineImmWithInt16InputMods() const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
263 }
264
265 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
268 }
269
270 bool isRegOrInlineImmWithInt32InputMods() const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
272 }
273
274 bool isRegOrImmWithInt64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
276 }
277
278 bool isRegOrImmWithFP16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
280 }
281
282 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
285 }
286
287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
295 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
298 }
299
300 bool isRegOrInlineImmWithFP32InputMods() const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
302 }
303
304 bool isRegOrInlineImmWithFP64InputMods() const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
306 }
307
308 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
309
310 bool isVRegWithFP32InputMods() const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
312 }
313
314 bool isVRegWithFP64InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
316 }
317
318 bool isPackedFP16InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
320 }
321
322 bool isPackedVGPRFP32InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
324 }
325
326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isAV_LdSt_32_Align2_RegOp() const {
351 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
352 isRegClass(AMDGPU::AGPR_32RegClassID);
353 }
354
355 bool isVRegWithInputMods() const;
356 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDim() const { return isImmTy(ImmTyDim); }
379 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
380 bool isOff() const { return isImmTy(ImmTyOff); }
381 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
382 bool isOffen() const { return isImmTy(ImmTyOffen); }
383 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
384 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
385 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
386 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
387 bool isGDS() const { return isImmTy(ImmTyGDS); }
388 bool isLDS() const { return isImmTy(ImmTyLDS); }
389 bool isCPol() const { return isImmTy(ImmTyCPol); }
390 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
391 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
392 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
393 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
394 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
395 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
396 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
397 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
398 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
399 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
400 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
401 bool isTFE() const { return isImmTy(ImmTyTFE); }
402 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
403 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
404 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
405 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
406 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
407 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
408 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
409 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
410 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
411 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
412 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
413 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
414 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
415 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
416
417 bool isRegOrImm() const {
418 return isReg() || isImm();
419 }
420
421 bool isRegClass(unsigned RCID) const;
422
423 bool isInlineValue() const;
424
425 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
426 return isRegOrInline(RCID, type) && !hasModifiers();
427 }
428
429 bool isSCSrcB16() const {
430 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
431 }
432
433 bool isSCSrcV2B16() const {
434 return isSCSrcB16();
435 }
436
437 bool isSCSrc_b32() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
439 }
440
441 bool isSCSrc_b64() const {
442 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
443 }
444
445 bool isBoolReg() const;
446
447 bool isSCSrcF16() const {
448 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
449 }
450
451 bool isSCSrcV2F16() const {
452 return isSCSrcF16();
453 }
454
455 bool isSCSrcF32() const {
456 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
457 }
458
459 bool isSCSrcF64() const {
460 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
461 }
462
463 bool isSSrc_b32() const {
464 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
465 }
466
467 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
468
469 bool isSSrcV2B16() const {
470 llvm_unreachable("cannot happen");
471 return isSSrc_b16();
472 }
473
474 bool isSSrc_b64() const {
475 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
476 // See isVSrc64().
477 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
478 (((const MCTargetAsmParser *)AsmParser)
479 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
480 isExpr());
481 }
482
483 bool isSSrc_f32() const {
484 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
485 }
486
487 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
488
489 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
490
491 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
492
493 bool isSSrcV2F16() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f16();
496 }
497
498 bool isSSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSSrc_f32();
501 }
502
503 bool isSCSrcV2FP32() const {
504 llvm_unreachable("cannot happen");
505 return isSCSrcF32();
506 }
507
508 bool isSSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSSrc_b32();
511 }
512
513 bool isSCSrcV2INT32() const {
514 llvm_unreachable("cannot happen");
515 return isSCSrc_b32();
516 }
517
518 bool isSSrcOrLds_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
520 isLiteralImm(MVT::i32) || isExpr();
521 }
522
523 bool isVCSrc_b32() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
525 }
526
527 bool isVCSrc_b32_Lo256() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
529 }
530
531 bool isVCSrc_b64_Lo256() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
533 }
534
535 bool isVCSrc_b64() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
537 }
538
539 bool isVCSrcT_b16() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
541 }
542
543 bool isVCSrcTB16_Lo128() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
545 }
546
547 bool isVCSrcFake16B16_Lo128() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
549 }
550
551 bool isVCSrc_b16() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
553 }
554
555 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
556
557 bool isVCSrc_f32() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
559 }
560
561 bool isVCSrc_f64() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
563 }
564
565 bool isVCSrcTBF16() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
567 }
568
569 bool isVCSrcT_f16() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
571 }
572
573 bool isVCSrcT_bf16() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
575 }
576
577 bool isVCSrcTBF16_Lo128() const {
578 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
579 }
580
581 bool isVCSrcTF16_Lo128() const {
582 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
583 }
584
585 bool isVCSrcFake16BF16_Lo128() const {
586 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
587 }
588
589 bool isVCSrcFake16F16_Lo128() const {
590 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
591 }
592
593 bool isVCSrc_bf16() const {
594 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
595 }
596
597 bool isVCSrc_f16() const {
598 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
599 }
600
601 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
602
603 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
604
605 bool isVSrc_b32() const {
606 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
607 }
608
609 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
610
611 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
612
613 bool isVSrcT_b16_Lo128() const {
614 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
615 }
616
617 bool isVSrcFake16_b16_Lo128() const {
618 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
619 }
620
621 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
622
623 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
624
625 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
626
627 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
628
629 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
630
631 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
632
633 bool isVSrc_f32() const {
634 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
635 }
636
637 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
638
639 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
640
641 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
642
643 bool isVSrcT_bf16_Lo128() const {
644 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
645 }
646
647 bool isVSrcT_f16_Lo128() const {
648 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
649 }
650
651 bool isVSrcFake16_bf16_Lo128() const {
652 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
653 }
654
655 bool isVSrcFake16_f16_Lo128() const {
656 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
657 }
658
659 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
660
661 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
662
663 bool isVSrc_v2bf16() const {
664 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
665 }
666
667 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
668
669 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
670
671 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
672
673 bool isVISrcB32() const {
674 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
675 }
676
677 bool isVISrcB16() const {
678 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
679 }
680
681 bool isVISrcV2B16() const {
682 return isVISrcB16();
683 }
684
685 bool isVISrcF32() const {
686 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
687 }
688
689 bool isVISrcF16() const {
690 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
691 }
692
693 bool isVISrcV2F16() const {
694 return isVISrcF16() || isVISrcB32();
695 }
696
697 bool isVISrc_64_bf16() const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
699 }
700
701 bool isVISrc_64_f16() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
703 }
704
705 bool isVISrc_64_b32() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
707 }
708
709 bool isVISrc_64B64() const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
711 }
712
713 bool isVISrc_64_f64() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
715 }
716
717 bool isVISrc_64V2FP32() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
719 }
720
721 bool isVISrc_64V2INT32() const {
722 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
723 }
724
725 bool isVISrc_256_b32() const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
727 }
728
729 bool isVISrc_256_f32() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
731 }
732
733 bool isVISrc_256B64() const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
735 }
736
737 bool isVISrc_256_f64() const {
738 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
739 }
740
741 bool isVISrc_512_f64() const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
743 }
744
745 bool isVISrc_128B16() const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
747 }
748
749 bool isVISrc_128V2B16() const {
750 return isVISrc_128B16();
751 }
752
753 bool isVISrc_128_b32() const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
755 }
756
757 bool isVISrc_128_f32() const {
758 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
759 }
760
761 bool isVISrc_256V2FP32() const {
762 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
763 }
764
765 bool isVISrc_256V2INT32() const {
766 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
767 }
768
769 bool isVISrc_512_b32() const {
770 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
771 }
772
773 bool isVISrc_512B16() const {
774 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
775 }
776
777 bool isVISrc_512V2B16() const {
778 return isVISrc_512B16();
779 }
780
781 bool isVISrc_512_f32() const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
783 }
784
785 bool isVISrc_512F16() const {
786 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
787 }
788
789 bool isVISrc_512V2F16() const {
790 return isVISrc_512F16() || isVISrc_512_b32();
791 }
792
793 bool isVISrc_1024_b32() const {
794 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
795 }
796
797 bool isVISrc_1024B16() const {
798 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
799 }
800
801 bool isVISrc_1024V2B16() const {
802 return isVISrc_1024B16();
803 }
804
805 bool isVISrc_1024_f32() const {
806 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
807 }
808
809 bool isVISrc_1024F16() const {
810 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
811 }
812
813 bool isVISrc_1024V2F16() const {
814 return isVISrc_1024F16() || isVISrc_1024_b32();
815 }
816
817 bool isAISrcB32() const {
818 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
819 }
820
821 bool isAISrcB16() const {
822 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
823 }
824
825 bool isAISrcV2B16() const {
826 return isAISrcB16();
827 }
828
829 bool isAISrcF32() const {
830 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
831 }
832
833 bool isAISrcF16() const {
834 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
835 }
836
837 bool isAISrcV2F16() const {
838 return isAISrcF16() || isAISrcB32();
839 }
840
841 bool isAISrc_64B64() const {
842 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
843 }
844
845 bool isAISrc_64_f64() const {
846 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
847 }
848
849 bool isAISrc_128_b32() const {
850 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
851 }
852
853 bool isAISrc_128B16() const {
854 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
855 }
856
857 bool isAISrc_128V2B16() const {
858 return isAISrc_128B16();
859 }
860
861 bool isAISrc_128_f32() const {
862 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
863 }
864
865 bool isAISrc_128F16() const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
867 }
868
869 bool isAISrc_128V2F16() const {
870 return isAISrc_128F16() || isAISrc_128_b32();
871 }
872
873 bool isVISrc_128_bf16() const {
874 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
875 }
876
877 bool isVISrc_128_f16() const {
878 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
879 }
880
881 bool isVISrc_128V2F16() const {
882 return isVISrc_128_f16() || isVISrc_128_b32();
883 }
884
885 bool isAISrc_256B64() const {
886 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
887 }
888
889 bool isAISrc_256_f64() const {
890 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
891 }
892
893 bool isAISrc_512_b32() const {
894 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
895 }
896
897 bool isAISrc_512B16() const {
898 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
899 }
900
901 bool isAISrc_512V2B16() const {
902 return isAISrc_512B16();
903 }
904
905 bool isAISrc_512_f32() const {
906 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
907 }
908
909 bool isAISrc_512F16() const {
910 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
911 }
912
913 bool isAISrc_512V2F16() const {
914 return isAISrc_512F16() || isAISrc_512_b32();
915 }
916
917 bool isAISrc_1024_b32() const {
918 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
919 }
920
921 bool isAISrc_1024B16() const {
922 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
923 }
924
925 bool isAISrc_1024V2B16() const {
926 return isAISrc_1024B16();
927 }
928
929 bool isAISrc_1024_f32() const {
930 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
931 }
932
933 bool isAISrc_1024F16() const {
934 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
935 }
936
937 bool isAISrc_1024V2F16() const {
938 return isAISrc_1024F16() || isAISrc_1024_b32();
939 }
940
941 bool isKImmFP32() const {
942 return isLiteralImm(MVT::f32);
943 }
944
945 bool isKImmFP16() const {
946 return isLiteralImm(MVT::f16);
947 }
948
949 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
950
951 bool isMem() const override {
952 return false;
953 }
954
955 bool isExpr() const {
956 return Kind == Expression;
957 }
958
959 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
960
961 bool isSWaitCnt() const;
962 bool isDepCtr() const;
963 bool isSDelayALU() const;
964 bool isHwreg() const;
965 bool isSendMsg() const;
966 bool isSplitBarrier() const;
967 bool isSwizzle() const;
968 bool isSMRDOffset8() const;
969 bool isSMEMOffset() const;
970 bool isSMRDLiteralOffset() const;
971 bool isDPP8() const;
972 bool isDPPCtrl() const;
973 bool isBLGP() const;
974 bool isGPRIdxMode() const;
975 bool isS16Imm() const;
976 bool isU16Imm() const;
977 bool isEndpgm() const;
978
979 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
980 return [this, P]() { return P(*this); };
981 }
982
983 StringRef getToken() const {
984 assert(isToken());
985 return StringRef(Tok.Data, Tok.Length);
986 }
987
988 int64_t getImm() const {
989 assert(isImm());
990 return Imm.Val;
991 }
992
993 void setImm(int64_t Val) {
994 assert(isImm());
995 Imm.Val = Val;
996 }
997
998 ImmTy getImmTy() const {
999 assert(isImm());
1000 return Imm.Type;
1001 }
1002
1003 MCRegister getReg() const override {
1004 assert(isRegKind());
1005 return Reg.RegNo;
1006 }
1007
1008 SMLoc getStartLoc() const override {
1009 return StartLoc;
1010 }
1011
1012 SMLoc getEndLoc() const override {
1013 return EndLoc;
1014 }
1015
1016 SMRange getLocRange() const {
1017 return SMRange(StartLoc, EndLoc);
1018 }
1019
1020 int getMCOpIdx() const { return MCOpIdx; }
1021
1022 Modifiers getModifiers() const {
1023 assert(isRegKind() || isImmTy(ImmTyNone));
1024 return isRegKind() ? Reg.Mods : Imm.Mods;
1025 }
1026
1027 void setModifiers(Modifiers Mods) {
1028 assert(isRegKind() || isImmTy(ImmTyNone));
1029 if (isRegKind())
1030 Reg.Mods = Mods;
1031 else
1032 Imm.Mods = Mods;
1033 }
1034
1035 bool hasModifiers() const {
1036 return getModifiers().hasModifiers();
1037 }
1038
1039 bool hasFPModifiers() const {
1040 return getModifiers().hasFPModifiers();
1041 }
1042
1043 bool hasIntModifiers() const {
1044 return getModifiers().hasIntModifiers();
1045 }
1046
1047 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1048
1049 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1050
1051 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1052
1053 void addRegOperands(MCInst &Inst, unsigned N) const;
1054
1055 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1056 if (isRegKind())
1057 addRegOperands(Inst, N);
1058 else
1059 addImmOperands(Inst, N);
1060 }
1061
1062 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1063 Modifiers Mods = getModifiers();
1064 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1065 if (isRegKind()) {
1066 addRegOperands(Inst, N);
1067 } else {
1068 addImmOperands(Inst, N, false);
1069 }
1070 }
1071
1072 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1073 assert(!hasIntModifiers());
1074 addRegOrImmWithInputModsOperands(Inst, N);
1075 }
1076
1077 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1078 assert(!hasFPModifiers());
1079 addRegOrImmWithInputModsOperands(Inst, N);
1080 }
1081
1082 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1083 Modifiers Mods = getModifiers();
1084 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1085 assert(isRegKind());
1086 addRegOperands(Inst, N);
1087 }
1088
1089 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1090 assert(!hasIntModifiers());
1091 addRegWithInputModsOperands(Inst, N);
1092 }
1093
1094 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1095 assert(!hasFPModifiers());
1096 addRegWithInputModsOperands(Inst, N);
1097 }
1098
1099 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1100 // clang-format off
1101 switch (Type) {
1102 case ImmTyNone: OS << "None"; break;
1103 case ImmTyGDS: OS << "GDS"; break;
1104 case ImmTyLDS: OS << "LDS"; break;
1105 case ImmTyOffen: OS << "Offen"; break;
1106 case ImmTyIdxen: OS << "Idxen"; break;
1107 case ImmTyAddr64: OS << "Addr64"; break;
1108 case ImmTyOffset: OS << "Offset"; break;
1109 case ImmTyInstOffset: OS << "InstOffset"; break;
1110 case ImmTyOffset0: OS << "Offset0"; break;
1111 case ImmTyOffset1: OS << "Offset1"; break;
1112 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1113 case ImmTyCPol: OS << "CPol"; break;
1114 case ImmTyIndexKey8bit: OS << "index_key"; break;
1115 case ImmTyIndexKey16bit: OS << "index_key"; break;
1116 case ImmTyIndexKey32bit: OS << "index_key"; break;
1117 case ImmTyTFE: OS << "TFE"; break;
1118 case ImmTyD16: OS << "D16"; break;
1119 case ImmTyFORMAT: OS << "FORMAT"; break;
1120 case ImmTyClamp: OS << "Clamp"; break;
1121 case ImmTyOModSI: OS << "OModSI"; break;
1122 case ImmTyDPP8: OS << "DPP8"; break;
1123 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1124 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1125 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1126 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1127 case ImmTyDppFI: OS << "DppFI"; break;
1128 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1129 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1130 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1131 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1132 case ImmTyDMask: OS << "DMask"; break;
1133 case ImmTyDim: OS << "Dim"; break;
1134 case ImmTyUNorm: OS << "UNorm"; break;
1135 case ImmTyDA: OS << "DA"; break;
1136 case ImmTyR128A16: OS << "R128A16"; break;
1137 case ImmTyA16: OS << "A16"; break;
1138 case ImmTyLWE: OS << "LWE"; break;
1139 case ImmTyOff: OS << "Off"; break;
1140 case ImmTyExpTgt: OS << "ExpTgt"; break;
1141 case ImmTyExpCompr: OS << "ExpCompr"; break;
1142 case ImmTyExpVM: OS << "ExpVM"; break;
1143 case ImmTyHwreg: OS << "Hwreg"; break;
1144 case ImmTySendMsg: OS << "SendMsg"; break;
1145 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1146 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1147 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1148 case ImmTyOpSel: OS << "OpSel"; break;
1149 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1150 case ImmTyNegLo: OS << "NegLo"; break;
1151 case ImmTyNegHi: OS << "NegHi"; break;
1152 case ImmTySwizzle: OS << "Swizzle"; break;
1153 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1154 case ImmTyHigh: OS << "High"; break;
1155 case ImmTyBLGP: OS << "BLGP"; break;
1156 case ImmTyCBSZ: OS << "CBSZ"; break;
1157 case ImmTyABID: OS << "ABID"; break;
1158 case ImmTyEndpgm: OS << "Endpgm"; break;
1159 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1160 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1161 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1162 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1163 case ImmTyBitOp3: OS << "BitOp3"; break;
1164 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1165 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1166 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1167 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1168 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1169 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1170 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1171 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1172 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1173 case ImmTyByteSel: OS << "ByteSel" ; break;
1174 }
1175 // clang-format on
1176 }
1177
1178 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1179 switch (Kind) {
1180 case Register:
1181 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1182 << " mods: " << Reg.Mods << '>';
1183 break;
1184 case Immediate:
1185 OS << '<' << getImm();
1186 if (getImmTy() != ImmTyNone) {
1187 OS << " type: "; printImmTy(OS, getImmTy());
1188 }
1189 OS << " mods: " << Imm.Mods << '>';
1190 break;
1191 case Token:
1192 OS << '\'' << getToken() << '\'';
1193 break;
1194 case Expression:
1195 OS << "<expr ";
1196 MAI.printExpr(OS, *Expr);
1197 OS << '>';
1198 break;
1199 }
1200 }
1201
1202 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1203 int64_t Val, SMLoc Loc,
1204 ImmTy Type = ImmTyNone,
1205 bool IsFPImm = false) {
1206 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1207 Op->Imm.Val = Val;
1208 Op->Imm.IsFPImm = IsFPImm;
1209 Op->Imm.Type = Type;
1210 Op->Imm.Mods = Modifiers();
1211 Op->StartLoc = Loc;
1212 Op->EndLoc = Loc;
1213 return Op;
1214 }
1215
1216 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1217 StringRef Str, SMLoc Loc,
1218 bool HasExplicitEncodingSize = true) {
1219 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1220 Res->Tok.Data = Str.data();
1221 Res->Tok.Length = Str.size();
1222 Res->StartLoc = Loc;
1223 Res->EndLoc = Loc;
1224 return Res;
1225 }
1226
1227 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1228 MCRegister Reg, SMLoc S, SMLoc E) {
1229 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1230 Op->Reg.RegNo = Reg;
1231 Op->Reg.Mods = Modifiers();
1232 Op->StartLoc = S;
1233 Op->EndLoc = E;
1234 return Op;
1235 }
1236
1237 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1238 const class MCExpr *Expr, SMLoc S) {
1239 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1240 Op->Expr = Expr;
1241 Op->StartLoc = S;
1242 Op->EndLoc = S;
1243 return Op;
1244 }
1245};
1246
1247raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1248 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1249 return OS;
1250}
1251
1252//===----------------------------------------------------------------------===//
1253// AsmParser
1254//===----------------------------------------------------------------------===//
1255
1256// TODO: define GET_SUBTARGET_FEATURE_NAME
1257#define GET_REGISTER_MATCHER
1258#include "AMDGPUGenAsmMatcher.inc"
1259#undef GET_REGISTER_MATCHER
1260#undef GET_SUBTARGET_FEATURE_NAME
1261
1262// Holds info related to the current kernel, e.g. count of SGPRs used.
1263// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1264// .amdgpu_hsa_kernel or at EOF.
1265class KernelScopeInfo {
1266 int SgprIndexUnusedMin = -1;
1267 int VgprIndexUnusedMin = -1;
1268 int AgprIndexUnusedMin = -1;
1269 MCContext *Ctx = nullptr;
1270 MCSubtargetInfo const *MSTI = nullptr;
1271
1272 void usesSgprAt(int i) {
1273 if (i >= SgprIndexUnusedMin) {
1274 SgprIndexUnusedMin = ++i;
1275 if (Ctx) {
1276 MCSymbol* const Sym =
1277 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1278 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1279 }
1280 }
1281 }
1282
1283 void usesVgprAt(int i) {
1284 if (i >= VgprIndexUnusedMin) {
1285 VgprIndexUnusedMin = ++i;
1286 if (Ctx) {
1287 MCSymbol* const Sym =
1288 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1289 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1290 VgprIndexUnusedMin);
1291 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1292 }
1293 }
1294 }
1295
1296 void usesAgprAt(int i) {
1297 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1298 if (!hasMAIInsts(*MSTI))
1299 return;
1300
1301 if (i >= AgprIndexUnusedMin) {
1302 AgprIndexUnusedMin = ++i;
1303 if (Ctx) {
1304 MCSymbol* const Sym =
1305 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1306 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1307
1308 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1309 MCSymbol* const vSym =
1310 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1311 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1312 VgprIndexUnusedMin);
1313 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1314 }
1315 }
1316 }
1317
1318public:
1319 KernelScopeInfo() = default;
1320
1321 void initialize(MCContext &Context) {
1322 Ctx = &Context;
1323 MSTI = Ctx->getSubtargetInfo();
1324
1325 usesSgprAt(SgprIndexUnusedMin = -1);
1326 usesVgprAt(VgprIndexUnusedMin = -1);
1327 if (hasMAIInsts(*MSTI)) {
1328 usesAgprAt(AgprIndexUnusedMin = -1);
1329 }
1330 }
1331
1332 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1333 unsigned RegWidth) {
1334 switch (RegKind) {
1335 case IS_SGPR:
1336 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1337 break;
1338 case IS_AGPR:
1339 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1340 break;
1341 case IS_VGPR:
1342 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1343 break;
1344 default:
1345 break;
1346 }
1347 }
1348};
1349
1350class AMDGPUAsmParser : public MCTargetAsmParser {
1351 MCAsmParser &Parser;
1352
1353 unsigned ForcedEncodingSize = 0;
1354 bool ForcedDPP = false;
1355 bool ForcedSDWA = false;
1356 KernelScopeInfo KernelScope;
1357 const unsigned HwMode;
1358
1359 /// @name Auto-generated Match Functions
1360 /// {
1361
1362#define GET_ASSEMBLER_HEADER
1363#include "AMDGPUGenAsmMatcher.inc"
1364
1365 /// }
1366
1367 /// Get size of register operand
1368 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1369 assert(OpNo < Desc.NumOperands);
1370 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1371 return getRegBitWidth(RCID) / 8;
1372 }
1373
1374private:
1375 void createConstantSymbol(StringRef Id, int64_t Val);
1376
1377 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1378 bool OutOfRangeError(SMRange Range);
1379 /// Calculate VGPR/SGPR blocks required for given target, reserved
1380 /// registers, and user-specified NextFreeXGPR values.
1381 ///
1382 /// \param Features [in] Target features, used for bug corrections.
1383 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1384 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1385 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1386 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1387 /// descriptor field, if valid.
1388 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1389 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1390 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1391 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1392 /// \param VGPRBlocks [out] Result VGPR block count.
1393 /// \param SGPRBlocks [out] Result SGPR block count.
1394 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1395 const MCExpr *FlatScrUsed, bool XNACKUsed,
1396 std::optional<bool> EnableWavefrontSize32,
1397 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1398 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1399 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1400 bool ParseDirectiveAMDGCNTarget();
1401 bool ParseDirectiveAMDHSACodeObjectVersion();
1402 bool ParseDirectiveAMDHSAKernel();
1403 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1404 bool ParseDirectiveAMDKernelCodeT();
1405 // TODO: Possibly make subtargetHasRegister const.
1406 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1407 bool ParseDirectiveAMDGPUHsaKernel();
1408
1409 bool ParseDirectiveISAVersion();
1410 bool ParseDirectiveHSAMetadata();
1411 bool ParseDirectivePALMetadataBegin();
1412 bool ParseDirectivePALMetadata();
1413 bool ParseDirectiveAMDGPULDS();
1414
1415 /// Common code to parse out a block of text (typically YAML) between start and
1416 /// end directives.
1417 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1418 const char *AssemblerDirectiveEnd,
1419 std::string &CollectString);
1420
1421 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1422 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1423 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1424 unsigned &RegNum, unsigned &RegWidth,
1425 bool RestoreOnFailure = false);
1426 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1427 unsigned &RegNum, unsigned &RegWidth,
1428 SmallVectorImpl<AsmToken> &Tokens);
1429 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1430 unsigned &RegWidth,
1431 SmallVectorImpl<AsmToken> &Tokens);
1432 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1433 unsigned &RegWidth,
1434 SmallVectorImpl<AsmToken> &Tokens);
1435 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1436 unsigned &RegWidth,
1437 SmallVectorImpl<AsmToken> &Tokens);
1438 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1439 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1440 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1441
1442 bool isRegister();
1443 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1444 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1445 void initializeGprCountSymbol(RegisterKind RegKind);
1446 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1447 unsigned RegWidth);
1448 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1449 bool IsAtomic);
1450
1451public:
1452 enum OperandMode {
1453 OperandMode_Default,
1454 OperandMode_NSA,
1455 };
1456
1457 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1458
1459 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1460 const MCInstrInfo &MII, const MCTargetOptions &Options)
1461 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1462 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1464
1465 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1466
1467 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1468 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1469 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1470 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1471 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1472 } else {
1473 createConstantSymbol(".option.machine_version_major", ISA.Major);
1474 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1475 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1476 }
1477 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1478 initializeGprCountSymbol(IS_VGPR);
1479 initializeGprCountSymbol(IS_SGPR);
1480 } else
1481 KernelScope.initialize(getContext());
1482
1483 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1484 createConstantSymbol(Symbol, Code);
1485
1486 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1487 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1488 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1489 }
1490
1491 bool hasMIMG_R128() const {
1492 return AMDGPU::hasMIMG_R128(getSTI());
1493 }
1494
1495 bool hasPackedD16() const {
1496 return AMDGPU::hasPackedD16(getSTI());
1497 }
1498
1499 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1500
1501 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1502
1503 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1504
1505 bool isSI() const {
1506 return AMDGPU::isSI(getSTI());
1507 }
1508
1509 bool isCI() const {
1510 return AMDGPU::isCI(getSTI());
1511 }
1512
1513 bool isVI() const {
1514 return AMDGPU::isVI(getSTI());
1515 }
1516
1517 bool isGFX9() const {
1518 return AMDGPU::isGFX9(getSTI());
1519 }
1520
1521 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1522 bool isGFX90A() const {
1523 return AMDGPU::isGFX90A(getSTI());
1524 }
1525
1526 bool isGFX940() const {
1527 return AMDGPU::isGFX940(getSTI());
1528 }
1529
1530 bool isGFX9Plus() const {
1531 return AMDGPU::isGFX9Plus(getSTI());
1532 }
1533
1534 bool isGFX10() const {
1535 return AMDGPU::isGFX10(getSTI());
1536 }
1537
1538 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1539
1540 bool isGFX11() const {
1541 return AMDGPU::isGFX11(getSTI());
1542 }
1543
1544 bool isGFX11Plus() const {
1545 return AMDGPU::isGFX11Plus(getSTI());
1546 }
1547
1548 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1549
1550 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1551
1552 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1553
1554 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1555
1556 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1557
1558 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1559
1560 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1561
1562 bool isGFX10_BEncoding() const {
1563 return AMDGPU::isGFX10_BEncoding(getSTI());
1564 }
1565
1566 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1567
1568 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1569
1570 bool hasInv2PiInlineImm() const {
1571 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1572 }
1573
1574 bool has64BitLiterals() const {
1575 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1576 }
1577
1578 bool hasFlatOffsets() const {
1579 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1580 }
1581
1582 bool hasTrue16Insts() const {
1583 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1584 }
1585
1586 bool hasArchitectedFlatScratch() const {
1587 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1588 }
1589
1590 bool hasSGPR102_SGPR103() const {
1591 return !isVI() && !isGFX9();
1592 }
1593
1594 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1595
1596 bool hasIntClamp() const {
1597 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1598 }
1599
1600 bool hasPartialNSAEncoding() const {
1601 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1602 }
1603
1604 bool hasGloballyAddressableScratch() const {
1605 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1606 }
1607
1608 unsigned getNSAMaxSize(bool HasSampler = false) const {
1609 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1610 }
1611
1612 unsigned getMaxNumUserSGPRs() const {
1613 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1614 }
1615
1616 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1617
1618 AMDGPUTargetStreamer &getTargetStreamer() {
1619 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1620 return static_cast<AMDGPUTargetStreamer &>(TS);
1621 }
1622
1623 MCContext &getContext() const {
1624 // We need this const_cast because for some reason getContext() is not const
1625 // in MCAsmParser.
1626 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1627 }
1628
1629 const MCRegisterInfo *getMRI() const {
1630 return getContext().getRegisterInfo();
1631 }
1632
1633 const MCInstrInfo *getMII() const {
1634 return &MII;
1635 }
1636
1637 // FIXME: This should not be used. Instead, should use queries derived from
1638 // getAvailableFeatures().
1639 const FeatureBitset &getFeatureBits() const {
1640 return getSTI().getFeatureBits();
1641 }
1642
1643 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1644 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1645 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1646
1647 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1648 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1649 bool isForcedDPP() const { return ForcedDPP; }
1650 bool isForcedSDWA() const { return ForcedSDWA; }
1651 ArrayRef<unsigned> getMatchedVariants() const;
1652 StringRef getMatchedVariantName() const;
1653
1654 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1655 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1656 bool RestoreOnFailure);
1657 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1658 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1659 SMLoc &EndLoc) override;
1660 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1661 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1662 unsigned Kind) override;
1663 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1664 OperandVector &Operands, MCStreamer &Out,
1665 uint64_t &ErrorInfo,
1666 bool MatchingInlineAsm) override;
1667 bool ParseDirective(AsmToken DirectiveID) override;
1668 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1669 OperandMode Mode = OperandMode_Default);
1670 StringRef parseMnemonicSuffix(StringRef Name);
1671 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1672 SMLoc NameLoc, OperandVector &Operands) override;
1673 //bool ProcessInstruction(MCInst &Inst);
1674
1675 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1676
1677 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1678
1679 ParseStatus
1680 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1681 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1682 std::function<bool(int64_t &)> ConvertResult = nullptr);
1683
1684 ParseStatus parseOperandArrayWithPrefix(
1685 const char *Prefix, OperandVector &Operands,
1686 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1687 bool (*ConvertResult)(int64_t &) = nullptr);
1688
1689 ParseStatus
1690 parseNamedBit(StringRef Name, OperandVector &Operands,
1691 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1692 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1693 ParseStatus parseCPol(OperandVector &Operands);
1694 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1695 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1696 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1697 SMLoc &StringLoc);
1698 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1699 StringRef Name,
1700 ArrayRef<const char *> Ids,
1701 int64_t &IntVal);
1702 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1703 StringRef Name,
1704 ArrayRef<const char *> Ids,
1705 AMDGPUOperand::ImmTy Type);
1706
1707 bool isModifier();
1708 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1709 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1710 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1711 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1712 bool parseSP3NegModifier();
1713 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1714 LitModifier Lit = LitModifier::None);
1715 ParseStatus parseReg(OperandVector &Operands);
1716 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1717 LitModifier Lit = LitModifier::None);
1718 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1719 bool AllowImm = true);
1720 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1721 bool AllowImm = true);
1722 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1723 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1724 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1725 ParseStatus tryParseIndexKey(OperandVector &Operands,
1726 AMDGPUOperand::ImmTy ImmTy);
1727 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1728 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1729 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1730 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1731 AMDGPUOperand::ImmTy Type);
1732 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1733 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1734 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1735 AMDGPUOperand::ImmTy Type);
1736 ParseStatus parseMatrixAScale(OperandVector &Operands);
1737 ParseStatus parseMatrixBScale(OperandVector &Operands);
1738 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1739 AMDGPUOperand::ImmTy Type);
1740 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1741 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1742
1743 ParseStatus parseDfmtNfmt(int64_t &Format);
1744 ParseStatus parseUfmt(int64_t &Format);
1745 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1746 int64_t &Format);
1747 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1748 int64_t &Format);
1749 ParseStatus parseFORMAT(OperandVector &Operands);
1750 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1751 ParseStatus parseNumericFormat(int64_t &Format);
1752 ParseStatus parseFlatOffset(OperandVector &Operands);
1753 ParseStatus parseR128A16(OperandVector &Operands);
1754 ParseStatus parseBLGP(OperandVector &Operands);
1755 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1756 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1757
1758 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1759
1760 bool parseCnt(int64_t &IntVal);
1761 ParseStatus parseSWaitCnt(OperandVector &Operands);
1762
1763 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1764 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1765 ParseStatus parseDepCtr(OperandVector &Operands);
1766
1767 bool parseDelay(int64_t &Delay);
1768 ParseStatus parseSDelayALU(OperandVector &Operands);
1769
1770 ParseStatus parseHwreg(OperandVector &Operands);
1771
1772private:
1773 struct OperandInfoTy {
1774 SMLoc Loc;
1775 int64_t Val;
1776 bool IsSymbolic = false;
1777 bool IsDefined = false;
1778
1779 OperandInfoTy(int64_t Val) : Val(Val) {}
1780 };
1781
1782 struct StructuredOpField : OperandInfoTy {
1783 StringLiteral Id;
1784 StringLiteral Desc;
1785 unsigned Width;
1786 bool IsDefined = false;
1787
1788 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1789 int64_t Default)
1790 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1791 virtual ~StructuredOpField() = default;
1792
1793 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1794 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1795 return false;
1796 }
1797
1798 virtual bool validate(AMDGPUAsmParser &Parser) const {
1799 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1800 return Error(Parser, "not supported on this GPU");
1801 if (!isUIntN(Width, Val))
1802 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1803 return true;
1804 }
1805 };
1806
1807 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1808 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1809
1810 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1811 bool validateSendMsg(const OperandInfoTy &Msg,
1812 const OperandInfoTy &Op,
1813 const OperandInfoTy &Stream);
1814
1815 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1816 OperandInfoTy &Width);
1817
1818 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1819
1820 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1821 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1822 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1823
1824 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1825 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1826 const OperandVector &Operands) const;
1827 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1828 const OperandVector &Operands) const;
1829 SMLoc getInstLoc(const OperandVector &Operands) const;
1830
1831 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1832 const OperandVector &Operands);
1833 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1834 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1835 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1836 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1837 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1838 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1839 bool AsVOPD3);
1840 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1841 bool tryVOPD(const MCInst &Inst);
1842 bool tryVOPD3(const MCInst &Inst);
1843 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1844
1845 bool validateIntClampSupported(const MCInst &Inst);
1846 bool validateMIMGAtomicDMask(const MCInst &Inst);
1847 bool validateMIMGGatherDMask(const MCInst &Inst);
1848 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1850 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1851 bool validateMIMGD16(const MCInst &Inst);
1852 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1853 bool validateTensorR128(const MCInst &Inst);
1854 bool validateMIMGMSAA(const MCInst &Inst);
1855 bool validateOpSel(const MCInst &Inst);
1856 bool validateTrue16OpSel(const MCInst &Inst);
1857 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1858 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1859 bool validateVccOperand(MCRegister Reg) const;
1860 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1861 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1862 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1864 bool validateAGPRLdSt(const MCInst &Inst) const;
1865 bool validateVGPRAlign(const MCInst &Inst) const;
1866 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1867 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1868 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1869 bool validateDivScale(const MCInst &Inst);
1870 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1871 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1872 SMLoc IDLoc);
1873 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1874 const unsigned CPol);
1875 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1877 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1878 unsigned getConstantBusLimit(unsigned Opcode) const;
1879 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1880 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1881 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1882
1883 bool isSupportedMnemo(StringRef Mnemo,
1884 const FeatureBitset &FBS);
1885 bool isSupportedMnemo(StringRef Mnemo,
1886 const FeatureBitset &FBS,
1887 ArrayRef<unsigned> Variants);
1888 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1889
1890 bool isId(const StringRef Id) const;
1891 bool isId(const AsmToken &Token, const StringRef Id) const;
1892 bool isToken(const AsmToken::TokenKind Kind) const;
1893 StringRef getId() const;
1894 bool trySkipId(const StringRef Id);
1895 bool trySkipId(const StringRef Pref, const StringRef Id);
1896 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1897 bool trySkipToken(const AsmToken::TokenKind Kind);
1898 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1899 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1900 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1901
1902 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1903 AsmToken::TokenKind getTokenKind() const;
1904 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1905 bool parseExpr(OperandVector &Operands);
1906 StringRef getTokenStr() const;
1907 AsmToken peekToken(bool ShouldSkipSpace = true);
1908 AsmToken getToken() const;
1909 SMLoc getLoc() const;
1910 void lex();
1911
1912public:
1913 void onBeginOfFile() override;
1914 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1915
1916 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1917
1918 ParseStatus parseExpTgt(OperandVector &Operands);
1919 ParseStatus parseSendMsg(OperandVector &Operands);
1920 ParseStatus parseInterpSlot(OperandVector &Operands);
1921 ParseStatus parseInterpAttr(OperandVector &Operands);
1922 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1923 ParseStatus parseBoolReg(OperandVector &Operands);
1924
1925 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1926 const unsigned MaxVal, const Twine &ErrMsg,
1927 SMLoc &Loc);
1928 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1929 const unsigned MinVal,
1930 const unsigned MaxVal,
1931 const StringRef ErrMsg);
1932 ParseStatus parseSwizzle(OperandVector &Operands);
1933 bool parseSwizzleOffset(int64_t &Imm);
1934 bool parseSwizzleMacro(int64_t &Imm);
1935 bool parseSwizzleQuadPerm(int64_t &Imm);
1936 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1937 bool parseSwizzleBroadcast(int64_t &Imm);
1938 bool parseSwizzleSwap(int64_t &Imm);
1939 bool parseSwizzleReverse(int64_t &Imm);
1940 bool parseSwizzleFFT(int64_t &Imm);
1941 bool parseSwizzleRotate(int64_t &Imm);
1942
1943 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1944 int64_t parseGPRIdxMacro();
1945
1946 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1947 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1948
1949 ParseStatus parseOModSI(OperandVector &Operands);
1950
1951 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1952 OptionalImmIndexMap &OptionalIdx);
1953 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1954 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1955 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1956 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1957 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1958
1959 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1960 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1961 OptionalImmIndexMap &OptionalIdx);
1962 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1963 OptionalImmIndexMap &OptionalIdx);
1964
1965 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1966 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1967 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1968
1969 bool parseDimId(unsigned &Encoding);
1970 ParseStatus parseDim(OperandVector &Operands);
1971 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1972 ParseStatus parseDPP8(OperandVector &Operands);
1973 ParseStatus parseDPPCtrl(OperandVector &Operands);
1974 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1975 int64_t parseDPPCtrlSel(StringRef Ctrl);
1976 int64_t parseDPPCtrlPerm();
1977 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1978 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1979 cvtDPP(Inst, Operands, true);
1980 }
1981 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1982 bool IsDPP8 = false);
1983 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1984 cvtVOP3DPP(Inst, Operands, true);
1985 }
1986
1987 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1988 AMDGPUOperand::ImmTy Type);
1989 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1990 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1991 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1992 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1993 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1994 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1995 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1996 uint64_t BasicInstType,
1997 bool SkipDstVcc = false,
1998 bool SkipSrcVcc = false);
1999
2000 ParseStatus parseEndpgm(OperandVector &Operands);
2001
2002 ParseStatus parseVOPD(OperandVector &Operands);
2003};
2004
2005} // end anonymous namespace
2006
2007// May be called with integer type with equivalent bitwidth.
2008static const fltSemantics *getFltSemantics(unsigned Size) {
2009 switch (Size) {
2010 case 4:
2011 return &APFloat::IEEEsingle();
2012 case 8:
2013 return &APFloat::IEEEdouble();
2014 case 2:
2015 return &APFloat::IEEEhalf();
2016 default:
2017 llvm_unreachable("unsupported fp type");
2018 }
2019}
2020
2022 return getFltSemantics(VT.getSizeInBits() / 8);
2023}
2024
2026 switch (OperandType) {
2027 // When floating-point immediate is used as operand of type i16, the 32-bit
2028 // representation of the constant truncated to the 16 LSBs should be used.
2043 return &APFloat::IEEEsingle();
2050 return &APFloat::IEEEdouble();
2058 return &APFloat::IEEEhalf();
2063 return &APFloat::BFloat();
2064 default:
2065 llvm_unreachable("unsupported fp type");
2066 }
2067}
2068
2069//===----------------------------------------------------------------------===//
2070// Operand
2071//===----------------------------------------------------------------------===//
2072
2073static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2074 bool Lost;
2075
2076 // Convert literal to single precision
2079 &Lost);
2080 // We allow precision lost but not overflow or underflow
2081 if (Status != APFloat::opOK &&
2082 Lost &&
2083 ((Status & APFloat::opOverflow) != 0 ||
2084 (Status & APFloat::opUnderflow) != 0)) {
2085 return false;
2086 }
2087
2088 return true;
2089}
2090
2091static bool isSafeTruncation(int64_t Val, unsigned Size) {
2092 return isUIntN(Size, Val) || isIntN(Size, Val);
2093}
2094
2095static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2096 if (VT.getScalarType() == MVT::i16)
2097 return isInlinableLiteral32(Val, HasInv2Pi);
2098
2099 if (VT.getScalarType() == MVT::f16)
2100 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2101
2102 assert(VT.getScalarType() == MVT::bf16);
2103
2104 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2105}
2106
2107bool AMDGPUOperand::isInlinableImm(MVT type) const {
2108
2109 // This is a hack to enable named inline values like
2110 // shared_base with both 32-bit and 64-bit operands.
2111 // Note that these values are defined as
2112 // 32-bit operands only.
2113 if (isInlineValue()) {
2114 return true;
2115 }
2116
2117 if (!isImmTy(ImmTyNone)) {
2118 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2119 return false;
2120 }
2121
2122 if (getModifiers().Lit != LitModifier::None)
2123 return false;
2124
2125 // TODO: We should avoid using host float here. It would be better to
2126 // check the float bit values which is what a few other places do.
2127 // We've had bot failures before due to weird NaN support on mips hosts.
2128
2129 APInt Literal(64, Imm.Val);
2130
2131 if (Imm.IsFPImm) { // We got fp literal token
2132 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2134 AsmParser->hasInv2PiInlineImm());
2135 }
2136
2137 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2138 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2139 return false;
2140
2141 if (type.getScalarSizeInBits() == 16) {
2142 bool Lost = false;
2143 switch (type.getScalarType().SimpleTy) {
2144 default:
2145 llvm_unreachable("unknown 16-bit type");
2146 case MVT::bf16:
2147 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2148 &Lost);
2149 break;
2150 case MVT::f16:
2151 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2152 &Lost);
2153 break;
2154 case MVT::i16:
2155 FPLiteral.convert(APFloatBase::IEEEsingle(),
2156 APFloat::rmNearestTiesToEven, &Lost);
2157 break;
2158 }
2159 // We need to use 32-bit representation here because when a floating-point
2160 // inline constant is used as an i16 operand, its 32-bit representation
2161 // representation will be used. We will need the 32-bit value to check if
2162 // it is FP inline constant.
2163 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2164 return isInlineableLiteralOp16(ImmVal, type,
2165 AsmParser->hasInv2PiInlineImm());
2166 }
2167
2168 // Check if single precision literal is inlinable
2170 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2171 AsmParser->hasInv2PiInlineImm());
2172 }
2173
2174 // We got int literal token.
2175 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2177 AsmParser->hasInv2PiInlineImm());
2178 }
2179
2180 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2181 return false;
2182 }
2183
2184 if (type.getScalarSizeInBits() == 16) {
2186 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2187 type, AsmParser->hasInv2PiInlineImm());
2188 }
2189
2191 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2192 AsmParser->hasInv2PiInlineImm());
2193}
2194
2195bool AMDGPUOperand::isLiteralImm(MVT type) const {
2196 // Check that this immediate can be added as literal
2197 if (!isImmTy(ImmTyNone)) {
2198 return false;
2199 }
2200
2201 bool Allow64Bit =
2202 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2203
2204 if (!Imm.IsFPImm) {
2205 // We got int literal token.
2206
2207 if (type == MVT::f64 && hasFPModifiers()) {
2208 // Cannot apply fp modifiers to int literals preserving the same semantics
2209 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2210 // disable these cases.
2211 return false;
2212 }
2213
2214 unsigned Size = type.getSizeInBits();
2215 if (Size == 64) {
2216 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2217 return true;
2218 Size = 32;
2219 }
2220
2221 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2222 // types.
2223 return isSafeTruncation(Imm.Val, Size);
2224 }
2225
2226 // We got fp literal token
2227 if (type == MVT::f64) { // Expected 64-bit fp operand
2228 // We would set low 64-bits of literal to zeroes but we accept this literals
2229 return true;
2230 }
2231
2232 if (type == MVT::i64) { // Expected 64-bit int operand
2233 // We don't allow fp literals in 64-bit integer instructions. It is
2234 // unclear how we should encode them.
2235 return false;
2236 }
2237
2238 // We allow fp literals with f16x2 operands assuming that the specified
2239 // literal goes into the lower half and the upper half is zero. We also
2240 // require that the literal may be losslessly converted to f16.
2241 //
2242 // For i16x2 operands, we assume that the specified literal is encoded as a
2243 // single-precision float. This is pretty odd, but it matches SP3 and what
2244 // happens in hardware.
2245 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2246 : (type == MVT::v2i16) ? MVT::f32
2247 : (type == MVT::v2f32) ? MVT::f32
2248 : type;
2249
2250 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2251 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2252}
2253
2254bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2255 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2256}
2257
2258bool AMDGPUOperand::isVRegWithInputMods() const {
2259 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2260 // GFX90A allows DPP on 64-bit operands.
2261 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2262 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2263}
2264
2265template <bool IsFake16>
2266bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2267 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2268 : AMDGPU::VGPR_16_Lo128RegClassID);
2269}
2270
2271template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2272 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2273 : AMDGPU::VGPR_16RegClassID);
2274}
2275
2276bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2277 if (AsmParser->isVI())
2278 return isVReg32();
2279 if (AsmParser->isGFX9Plus())
2280 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2281 return false;
2282}
2283
2284bool AMDGPUOperand::isSDWAFP16Operand() const {
2285 return isSDWAOperand(MVT::f16);
2286}
2287
2288bool AMDGPUOperand::isSDWAFP32Operand() const {
2289 return isSDWAOperand(MVT::f32);
2290}
2291
2292bool AMDGPUOperand::isSDWAInt16Operand() const {
2293 return isSDWAOperand(MVT::i16);
2294}
2295
2296bool AMDGPUOperand::isSDWAInt32Operand() const {
2297 return isSDWAOperand(MVT::i32);
2298}
2299
2300bool AMDGPUOperand::isBoolReg() const {
2301 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2302 (AsmParser->isWave32() && isSCSrc_b32()));
2303}
2304
2305uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2306{
2307 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2308 assert(Size == 2 || Size == 4 || Size == 8);
2309
2310 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2311
2312 if (Imm.Mods.Abs) {
2313 Val &= ~FpSignMask;
2314 }
2315 if (Imm.Mods.Neg) {
2316 Val ^= FpSignMask;
2317 }
2318
2319 return Val;
2320}
2321
2322void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2323 MCOpIdx = Inst.getNumOperands();
2324
2325 if (isExpr()) {
2327 return;
2328 }
2329
2330 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2331 Inst.getNumOperands())) {
2332 addLiteralImmOperand(Inst, Imm.Val,
2333 ApplyModifiers &
2334 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2335 } else {
2336 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2338 }
2339}
2340
2341void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2342 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2343 auto OpNum = Inst.getNumOperands();
2344 // Check that this operand accepts literals
2345 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2346
2347 if (ApplyModifiers) {
2348 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2349 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2350 Val = applyInputFPModifiers(Val, Size);
2351 }
2352
2353 APInt Literal(64, Val);
2354 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2355
2356 bool CanUse64BitLiterals =
2357 AsmParser->has64BitLiterals() &&
2358 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2359 LitModifier Lit = getModifiers().Lit;
2360 MCContext &Ctx = AsmParser->getContext();
2361
2362 if (Imm.IsFPImm) { // We got fp literal token
2363 switch (OpTy) {
2369 if (Lit == LitModifier::None &&
2371 AsmParser->hasInv2PiInlineImm())) {
2372 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2373 return;
2374 }
2375
2376 // Non-inlineable
2377 if (AMDGPU::isSISrcFPOperand(InstDesc,
2378 OpNum)) { // Expected 64-bit fp operand
2379 bool HasMandatoryLiteral =
2380 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2381 // For fp operands we check if low 32 bits are zeros
2382 if (Literal.getLoBits(32) != 0 &&
2383 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2384 !HasMandatoryLiteral) {
2385 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2386 Inst.getLoc(),
2387 "Can't encode literal as exact 64-bit floating-point operand. "
2388 "Low 32-bits will be set to zero");
2389 Val &= 0xffffffff00000000u;
2390 }
2391
2392 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2395 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2396 (isInt<32>(Val) || isUInt<32>(Val))) {
2397 // The floating-point operand will be verbalized as an
2398 // integer one. If that integer happens to fit 32 bits, on
2399 // re-assembling it will be intepreted as the high half of
2400 // the actual value, so we have to wrap it into lit64().
2401 Lit = LitModifier::Lit64;
2402 } else if (Lit == LitModifier::Lit) {
2403 // For FP64 operands lit() specifies the high half of the value.
2404 Val = Hi_32(Val);
2405 }
2406 }
2407 break;
2408 }
2409
2410 // We don't allow fp literals in 64-bit integer instructions. It is
2411 // unclear how we should encode them. This case should be checked earlier
2412 // in predicate methods (isLiteralImm())
2413 llvm_unreachable("fp literal in 64-bit integer instruction.");
2414
2416 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2417 (isInt<32>(Val) || isUInt<32>(Val)))
2418 Lit = LitModifier::Lit64;
2419 break;
2420
2425 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2426 Literal == 0x3fc45f306725feed) {
2427 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2428 // loss of precision. The constant represents ideomatic fp32 value of
2429 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2430 // bits. Prevent rounding below.
2431 Inst.addOperand(MCOperand::createImm(0x3e22));
2432 return;
2433 }
2434 [[fallthrough]];
2435
2457 bool lost;
2458 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2459 // Convert literal to single precision
2460 FPLiteral.convert(*getOpFltSemantics(OpTy),
2461 APFloat::rmNearestTiesToEven, &lost);
2462 // We allow precision lost but not overflow or underflow. This should be
2463 // checked earlier in isLiteralImm()
2464
2465 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2466 break;
2467 }
2468 default:
2469 llvm_unreachable("invalid operand size");
2470 }
2471
2472 if (Lit != LitModifier::None) {
2473 Inst.addOperand(
2475 } else {
2477 }
2478 return;
2479 }
2480
2481 // We got int literal token.
2482 // Only sign extend inline immediates.
2483 switch (OpTy) {
2498 break;
2499
2502 if (Lit == LitModifier::None &&
2503 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2505 return;
2506 }
2507
2508 // When the 32 MSBs are not zero (effectively means it can't be safely
2509 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2510 // the lit modifier is explicitly used, we need to truncate it to the 32
2511 // LSBs.
2512 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2513 Val = Lo_32(Val);
2514 break;
2515
2519 if (Lit == LitModifier::None &&
2520 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2522 return;
2523 }
2524
2525 // If the target doesn't support 64-bit literals, we need to use the
2526 // constant as the high 32 MSBs of a double-precision floating point value.
2527 if (!AsmParser->has64BitLiterals()) {
2528 Val = static_cast<uint64_t>(Val) << 32;
2529 } else {
2530 // Now the target does support 64-bit literals, there are two cases
2531 // where we still want to use src_literal encoding:
2532 // 1) explicitly forced by using lit modifier;
2533 // 2) the value is a valid 32-bit representation (signed or unsigned),
2534 // meanwhile not forced by lit64 modifier.
2535 if (Lit == LitModifier::Lit ||
2536 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2537 Val = static_cast<uint64_t>(Val) << 32;
2538 }
2539
2540 // For FP64 operands lit() specifies the high half of the value.
2541 if (Lit == LitModifier::Lit)
2542 Val = Hi_32(Val);
2543 break;
2544
2556 break;
2557
2559 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2560 Val <<= 32;
2561 break;
2562
2563 default:
2564 llvm_unreachable("invalid operand type");
2565 }
2566
2567 if (Lit != LitModifier::None) {
2568 Inst.addOperand(
2570 } else {
2572 }
2573}
2574
2575void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2576 MCOpIdx = Inst.getNumOperands();
2577 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2578}
2579
2580bool AMDGPUOperand::isInlineValue() const {
2581 return isRegKind() && ::isInlineValue(getReg());
2582}
2583
2584//===----------------------------------------------------------------------===//
2585// AsmParser
2586//===----------------------------------------------------------------------===//
2587
2588void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2589 // TODO: make those pre-defined variables read-only.
2590 // Currently there is none suitable machinery in the core llvm-mc for this.
2591 // MCSymbol::isRedefinable is intended for another purpose, and
2592 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2593 MCContext &Ctx = getContext();
2594 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2596}
2597
2598static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2599 if (Is == IS_VGPR) {
2600 switch (RegWidth) {
2601 default: return -1;
2602 case 32:
2603 return AMDGPU::VGPR_32RegClassID;
2604 case 64:
2605 return AMDGPU::VReg_64RegClassID;
2606 case 96:
2607 return AMDGPU::VReg_96RegClassID;
2608 case 128:
2609 return AMDGPU::VReg_128RegClassID;
2610 case 160:
2611 return AMDGPU::VReg_160RegClassID;
2612 case 192:
2613 return AMDGPU::VReg_192RegClassID;
2614 case 224:
2615 return AMDGPU::VReg_224RegClassID;
2616 case 256:
2617 return AMDGPU::VReg_256RegClassID;
2618 case 288:
2619 return AMDGPU::VReg_288RegClassID;
2620 case 320:
2621 return AMDGPU::VReg_320RegClassID;
2622 case 352:
2623 return AMDGPU::VReg_352RegClassID;
2624 case 384:
2625 return AMDGPU::VReg_384RegClassID;
2626 case 512:
2627 return AMDGPU::VReg_512RegClassID;
2628 case 1024:
2629 return AMDGPU::VReg_1024RegClassID;
2630 }
2631 } else if (Is == IS_TTMP) {
2632 switch (RegWidth) {
2633 default: return -1;
2634 case 32:
2635 return AMDGPU::TTMP_32RegClassID;
2636 case 64:
2637 return AMDGPU::TTMP_64RegClassID;
2638 case 128:
2639 return AMDGPU::TTMP_128RegClassID;
2640 case 256:
2641 return AMDGPU::TTMP_256RegClassID;
2642 case 512:
2643 return AMDGPU::TTMP_512RegClassID;
2644 }
2645 } else if (Is == IS_SGPR) {
2646 switch (RegWidth) {
2647 default: return -1;
2648 case 32:
2649 return AMDGPU::SGPR_32RegClassID;
2650 case 64:
2651 return AMDGPU::SGPR_64RegClassID;
2652 case 96:
2653 return AMDGPU::SGPR_96RegClassID;
2654 case 128:
2655 return AMDGPU::SGPR_128RegClassID;
2656 case 160:
2657 return AMDGPU::SGPR_160RegClassID;
2658 case 192:
2659 return AMDGPU::SGPR_192RegClassID;
2660 case 224:
2661 return AMDGPU::SGPR_224RegClassID;
2662 case 256:
2663 return AMDGPU::SGPR_256RegClassID;
2664 case 288:
2665 return AMDGPU::SGPR_288RegClassID;
2666 case 320:
2667 return AMDGPU::SGPR_320RegClassID;
2668 case 352:
2669 return AMDGPU::SGPR_352RegClassID;
2670 case 384:
2671 return AMDGPU::SGPR_384RegClassID;
2672 case 512:
2673 return AMDGPU::SGPR_512RegClassID;
2674 }
2675 } else if (Is == IS_AGPR) {
2676 switch (RegWidth) {
2677 default: return -1;
2678 case 32:
2679 return AMDGPU::AGPR_32RegClassID;
2680 case 64:
2681 return AMDGPU::AReg_64RegClassID;
2682 case 96:
2683 return AMDGPU::AReg_96RegClassID;
2684 case 128:
2685 return AMDGPU::AReg_128RegClassID;
2686 case 160:
2687 return AMDGPU::AReg_160RegClassID;
2688 case 192:
2689 return AMDGPU::AReg_192RegClassID;
2690 case 224:
2691 return AMDGPU::AReg_224RegClassID;
2692 case 256:
2693 return AMDGPU::AReg_256RegClassID;
2694 case 288:
2695 return AMDGPU::AReg_288RegClassID;
2696 case 320:
2697 return AMDGPU::AReg_320RegClassID;
2698 case 352:
2699 return AMDGPU::AReg_352RegClassID;
2700 case 384:
2701 return AMDGPU::AReg_384RegClassID;
2702 case 512:
2703 return AMDGPU::AReg_512RegClassID;
2704 case 1024:
2705 return AMDGPU::AReg_1024RegClassID;
2706 }
2707 }
2708 return -1;
2709}
2710
2713 .Case("exec", AMDGPU::EXEC)
2714 .Case("vcc", AMDGPU::VCC)
2715 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2716 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2717 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2718 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2719 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2720 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2721 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2722 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2723 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2724 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2725 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2726 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2727 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2728 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2729 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2730 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2731 .Case("m0", AMDGPU::M0)
2732 .Case("vccz", AMDGPU::SRC_VCCZ)
2733 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2734 .Case("execz", AMDGPU::SRC_EXECZ)
2735 .Case("src_execz", AMDGPU::SRC_EXECZ)
2736 .Case("scc", AMDGPU::SRC_SCC)
2737 .Case("src_scc", AMDGPU::SRC_SCC)
2738 .Case("tba", AMDGPU::TBA)
2739 .Case("tma", AMDGPU::TMA)
2740 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2741 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2742 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2743 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2744 .Case("vcc_lo", AMDGPU::VCC_LO)
2745 .Case("vcc_hi", AMDGPU::VCC_HI)
2746 .Case("exec_lo", AMDGPU::EXEC_LO)
2747 .Case("exec_hi", AMDGPU::EXEC_HI)
2748 .Case("tma_lo", AMDGPU::TMA_LO)
2749 .Case("tma_hi", AMDGPU::TMA_HI)
2750 .Case("tba_lo", AMDGPU::TBA_LO)
2751 .Case("tba_hi", AMDGPU::TBA_HI)
2752 .Case("pc", AMDGPU::PC_REG)
2753 .Case("null", AMDGPU::SGPR_NULL)
2754 .Default(AMDGPU::NoRegister);
2755}
2756
2757bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2758 SMLoc &EndLoc, bool RestoreOnFailure) {
2759 auto R = parseRegister();
2760 if (!R) return true;
2761 assert(R->isReg());
2762 RegNo = R->getReg();
2763 StartLoc = R->getStartLoc();
2764 EndLoc = R->getEndLoc();
2765 return false;
2766}
2767
2768bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2769 SMLoc &EndLoc) {
2770 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2771}
2772
2773ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2774 SMLoc &EndLoc) {
2775 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2776 bool PendingErrors = getParser().hasPendingError();
2777 getParser().clearPendingErrors();
2778 if (PendingErrors)
2779 return ParseStatus::Failure;
2780 if (Result)
2781 return ParseStatus::NoMatch;
2782 return ParseStatus::Success;
2783}
2784
2785bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2786 RegisterKind RegKind,
2787 MCRegister Reg1, SMLoc Loc) {
2788 switch (RegKind) {
2789 case IS_SPECIAL:
2790 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2791 Reg = AMDGPU::EXEC;
2792 RegWidth = 64;
2793 return true;
2794 }
2795 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2796 Reg = AMDGPU::FLAT_SCR;
2797 RegWidth = 64;
2798 return true;
2799 }
2800 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2801 Reg = AMDGPU::XNACK_MASK;
2802 RegWidth = 64;
2803 return true;
2804 }
2805 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2806 Reg = AMDGPU::VCC;
2807 RegWidth = 64;
2808 return true;
2809 }
2810 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2811 Reg = AMDGPU::TBA;
2812 RegWidth = 64;
2813 return true;
2814 }
2815 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2816 Reg = AMDGPU::TMA;
2817 RegWidth = 64;
2818 return true;
2819 }
2820 Error(Loc, "register does not fit in the list");
2821 return false;
2822 case IS_VGPR:
2823 case IS_SGPR:
2824 case IS_AGPR:
2825 case IS_TTMP:
2826 if (Reg1 != Reg + RegWidth / 32) {
2827 Error(Loc, "registers in a list must have consecutive indices");
2828 return false;
2829 }
2830 RegWidth += 32;
2831 return true;
2832 default:
2833 llvm_unreachable("unexpected register kind");
2834 }
2835}
2836
2837struct RegInfo {
2839 RegisterKind Kind;
2840};
2841
2842static constexpr RegInfo RegularRegisters[] = {
2843 {{"v"}, IS_VGPR},
2844 {{"s"}, IS_SGPR},
2845 {{"ttmp"}, IS_TTMP},
2846 {{"acc"}, IS_AGPR},
2847 {{"a"}, IS_AGPR},
2848};
2849
2850static bool isRegularReg(RegisterKind Kind) {
2851 return Kind == IS_VGPR ||
2852 Kind == IS_SGPR ||
2853 Kind == IS_TTMP ||
2854 Kind == IS_AGPR;
2855}
2856
2858 for (const RegInfo &Reg : RegularRegisters)
2859 if (Str.starts_with(Reg.Name))
2860 return &Reg;
2861 return nullptr;
2862}
2863
2864static bool getRegNum(StringRef Str, unsigned& Num) {
2865 return !Str.getAsInteger(10, Num);
2866}
2867
2868bool
2869AMDGPUAsmParser::isRegister(const AsmToken &Token,
2870 const AsmToken &NextToken) const {
2871
2872 // A list of consecutive registers: [s0,s1,s2,s3]
2873 if (Token.is(AsmToken::LBrac))
2874 return true;
2875
2876 if (!Token.is(AsmToken::Identifier))
2877 return false;
2878
2879 // A single register like s0 or a range of registers like s[0:1]
2880
2881 StringRef Str = Token.getString();
2882 const RegInfo *Reg = getRegularRegInfo(Str);
2883 if (Reg) {
2884 StringRef RegName = Reg->Name;
2885 StringRef RegSuffix = Str.substr(RegName.size());
2886 if (!RegSuffix.empty()) {
2887 RegSuffix.consume_back(".l");
2888 RegSuffix.consume_back(".h");
2889 unsigned Num;
2890 // A single register with an index: rXX
2891 if (getRegNum(RegSuffix, Num))
2892 return true;
2893 } else {
2894 // A range of registers: r[XX:YY].
2895 if (NextToken.is(AsmToken::LBrac))
2896 return true;
2897 }
2898 }
2899
2900 return getSpecialRegForName(Str).isValid();
2901}
2902
2903bool
2904AMDGPUAsmParser::isRegister()
2905{
2906 return isRegister(getToken(), peekToken());
2907}
2908
2909MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2910 unsigned SubReg, unsigned RegWidth,
2911 SMLoc Loc) {
2912 assert(isRegularReg(RegKind));
2913
2914 unsigned AlignSize = 1;
2915 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2916 // SGPR and TTMP registers must be aligned.
2917 // Max required alignment is 4 dwords.
2918 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2919 }
2920
2921 if (RegNum % AlignSize != 0) {
2922 Error(Loc, "invalid register alignment");
2923 return MCRegister();
2924 }
2925
2926 unsigned RegIdx = RegNum / AlignSize;
2927 int RCID = getRegClass(RegKind, RegWidth);
2928 if (RCID == -1) {
2929 Error(Loc, "invalid or unsupported register size");
2930 return MCRegister();
2931 }
2932
2933 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2934 const MCRegisterClass RC = TRI->getRegClass(RCID);
2935 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2936 Error(Loc, "register index is out of range");
2937 return AMDGPU::NoRegister;
2938 }
2939
2940 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2941 Error(Loc, "register index is out of range");
2942 return MCRegister();
2943 }
2944
2945 MCRegister Reg = RC.getRegister(RegIdx);
2946
2947 if (SubReg) {
2948 Reg = TRI->getSubReg(Reg, SubReg);
2949
2950 // Currently all regular registers have their .l and .h subregisters, so
2951 // we should never need to generate an error here.
2952 assert(Reg && "Invalid subregister!");
2953 }
2954
2955 return Reg;
2956}
2957
2958bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2959 unsigned &SubReg) {
2960 int64_t RegLo, RegHi;
2961 if (!skipToken(AsmToken::LBrac, "missing register index"))
2962 return false;
2963
2964 SMLoc FirstIdxLoc = getLoc();
2965 SMLoc SecondIdxLoc;
2966
2967 if (!parseExpr(RegLo))
2968 return false;
2969
2970 if (trySkipToken(AsmToken::Colon)) {
2971 SecondIdxLoc = getLoc();
2972 if (!parseExpr(RegHi))
2973 return false;
2974 } else {
2975 RegHi = RegLo;
2976 }
2977
2978 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2979 return false;
2980
2981 if (!isUInt<32>(RegLo)) {
2982 Error(FirstIdxLoc, "invalid register index");
2983 return false;
2984 }
2985
2986 if (!isUInt<32>(RegHi)) {
2987 Error(SecondIdxLoc, "invalid register index");
2988 return false;
2989 }
2990
2991 if (RegLo > RegHi) {
2992 Error(FirstIdxLoc, "first register index should not exceed second index");
2993 return false;
2994 }
2995
2996 if (RegHi == RegLo) {
2997 StringRef RegSuffix = getTokenStr();
2998 if (RegSuffix == ".l") {
2999 SubReg = AMDGPU::lo16;
3000 lex();
3001 } else if (RegSuffix == ".h") {
3002 SubReg = AMDGPU::hi16;
3003 lex();
3004 }
3005 }
3006
3007 Num = static_cast<unsigned>(RegLo);
3008 RegWidth = 32 * ((RegHi - RegLo) + 1);
3009
3010 return true;
3011}
3012
3013MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3014 unsigned &RegNum,
3015 unsigned &RegWidth,
3016 SmallVectorImpl<AsmToken> &Tokens) {
3017 assert(isToken(AsmToken::Identifier));
3018 MCRegister Reg = getSpecialRegForName(getTokenStr());
3019 if (Reg) {
3020 RegNum = 0;
3021 RegWidth = 32;
3022 RegKind = IS_SPECIAL;
3023 Tokens.push_back(getToken());
3024 lex(); // skip register name
3025 }
3026 return Reg;
3027}
3028
3029MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3030 unsigned &RegNum,
3031 unsigned &RegWidth,
3032 SmallVectorImpl<AsmToken> &Tokens) {
3033 assert(isToken(AsmToken::Identifier));
3034 StringRef RegName = getTokenStr();
3035 auto Loc = getLoc();
3036
3037 const RegInfo *RI = getRegularRegInfo(RegName);
3038 if (!RI) {
3039 Error(Loc, "invalid register name");
3040 return MCRegister();
3041 }
3042
3043 Tokens.push_back(getToken());
3044 lex(); // skip register name
3045
3046 RegKind = RI->Kind;
3047 StringRef RegSuffix = RegName.substr(RI->Name.size());
3048 unsigned SubReg = NoSubRegister;
3049 if (!RegSuffix.empty()) {
3050 if (RegSuffix.consume_back(".l"))
3051 SubReg = AMDGPU::lo16;
3052 else if (RegSuffix.consume_back(".h"))
3053 SubReg = AMDGPU::hi16;
3054
3055 // Single 32-bit register: vXX.
3056 if (!getRegNum(RegSuffix, RegNum)) {
3057 Error(Loc, "invalid register index");
3058 return MCRegister();
3059 }
3060 RegWidth = 32;
3061 } else {
3062 // Range of registers: v[XX:YY]. ":YY" is optional.
3063 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3064 return MCRegister();
3065 }
3066
3067 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3068}
3069
3070MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3071 unsigned &RegNum, unsigned &RegWidth,
3072 SmallVectorImpl<AsmToken> &Tokens) {
3073 MCRegister Reg;
3074 auto ListLoc = getLoc();
3075
3076 if (!skipToken(AsmToken::LBrac,
3077 "expected a register or a list of registers")) {
3078 return MCRegister();
3079 }
3080
3081 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3082
3083 auto Loc = getLoc();
3084 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3085 return MCRegister();
3086 if (RegWidth != 32) {
3087 Error(Loc, "expected a single 32-bit register");
3088 return MCRegister();
3089 }
3090
3091 for (; trySkipToken(AsmToken::Comma); ) {
3092 RegisterKind NextRegKind;
3093 MCRegister NextReg;
3094 unsigned NextRegNum, NextRegWidth;
3095 Loc = getLoc();
3096
3097 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3098 NextRegNum, NextRegWidth,
3099 Tokens)) {
3100 return MCRegister();
3101 }
3102 if (NextRegWidth != 32) {
3103 Error(Loc, "expected a single 32-bit register");
3104 return MCRegister();
3105 }
3106 if (NextRegKind != RegKind) {
3107 Error(Loc, "registers in a list must be of the same kind");
3108 return MCRegister();
3109 }
3110 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3111 return MCRegister();
3112 }
3113
3114 if (!skipToken(AsmToken::RBrac,
3115 "expected a comma or a closing square bracket")) {
3116 return MCRegister();
3117 }
3118
3119 if (isRegularReg(RegKind))
3120 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3121
3122 return Reg;
3123}
3124
3125bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3126 MCRegister &Reg, unsigned &RegNum,
3127 unsigned &RegWidth,
3128 SmallVectorImpl<AsmToken> &Tokens) {
3129 auto Loc = getLoc();
3130 Reg = MCRegister();
3131
3132 if (isToken(AsmToken::Identifier)) {
3133 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3134 if (!Reg)
3135 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3136 } else {
3137 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3138 }
3139
3140 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3141 if (!Reg) {
3142 assert(Parser.hasPendingError());
3143 return false;
3144 }
3145
3146 if (!subtargetHasRegister(*TRI, Reg)) {
3147 if (Reg == AMDGPU::SGPR_NULL) {
3148 Error(Loc, "'null' operand is not supported on this GPU");
3149 } else {
3151 " register not available on this GPU");
3152 }
3153 return false;
3154 }
3155
3156 return true;
3157}
3158
3159bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3160 MCRegister &Reg, unsigned &RegNum,
3161 unsigned &RegWidth,
3162 bool RestoreOnFailure /*=false*/) {
3163 Reg = MCRegister();
3164
3166 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3167 if (RestoreOnFailure) {
3168 while (!Tokens.empty()) {
3169 getLexer().UnLex(Tokens.pop_back_val());
3170 }
3171 }
3172 return true;
3173 }
3174 return false;
3175}
3176
3177std::optional<StringRef>
3178AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3179 switch (RegKind) {
3180 case IS_VGPR:
3181 return StringRef(".amdgcn.next_free_vgpr");
3182 case IS_SGPR:
3183 return StringRef(".amdgcn.next_free_sgpr");
3184 default:
3185 return std::nullopt;
3186 }
3187}
3188
3189void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3190 auto SymbolName = getGprCountSymbolName(RegKind);
3191 assert(SymbolName && "initializing invalid register kind");
3192 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3194 Sym->setRedefinable(true);
3195}
3196
3197bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3198 unsigned DwordRegIndex,
3199 unsigned RegWidth) {
3200 // Symbols are only defined for GCN targets
3201 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3202 return true;
3203
3204 auto SymbolName = getGprCountSymbolName(RegKind);
3205 if (!SymbolName)
3206 return true;
3207 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3208
3209 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3210 int64_t OldCount;
3211
3212 if (!Sym->isVariable())
3213 return !Error(getLoc(),
3214 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3215 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3216 return !Error(
3217 getLoc(),
3218 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3219
3220 if (OldCount <= NewMax)
3222
3223 return true;
3224}
3225
3226std::unique_ptr<AMDGPUOperand>
3227AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3228 const auto &Tok = getToken();
3229 SMLoc StartLoc = Tok.getLoc();
3230 SMLoc EndLoc = Tok.getEndLoc();
3231 RegisterKind RegKind;
3232 MCRegister Reg;
3233 unsigned RegNum, RegWidth;
3234
3235 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3236 return nullptr;
3237 }
3238 if (isHsaAbi(getSTI())) {
3239 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3240 return nullptr;
3241 } else
3242 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3243 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3244}
3245
3246ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3247 bool HasSP3AbsModifier, LitModifier Lit) {
3248 // TODO: add syntactic sugar for 1/(2*PI)
3249
3250 if (isRegister() || isModifier())
3251 return ParseStatus::NoMatch;
3252
3253 if (Lit == LitModifier::None) {
3254 if (trySkipId("lit"))
3255 Lit = LitModifier::Lit;
3256 else if (trySkipId("lit64"))
3257 Lit = LitModifier::Lit64;
3258
3259 if (Lit != LitModifier::None) {
3260 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3261 return ParseStatus::Failure;
3262 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3263 if (S.isSuccess() &&
3264 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3265 return ParseStatus::Failure;
3266 return S;
3267 }
3268 }
3269
3270 const auto& Tok = getToken();
3271 const auto& NextTok = peekToken();
3272 bool IsReal = Tok.is(AsmToken::Real);
3273 SMLoc S = getLoc();
3274 bool Negate = false;
3275
3276 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3277 lex();
3278 IsReal = true;
3279 Negate = true;
3280 }
3281
3282 AMDGPUOperand::Modifiers Mods;
3283 Mods.Lit = Lit;
3284
3285 if (IsReal) {
3286 // Floating-point expressions are not supported.
3287 // Can only allow floating-point literals with an
3288 // optional sign.
3289
3290 StringRef Num = getTokenStr();
3291 lex();
3292
3293 APFloat RealVal(APFloat::IEEEdouble());
3294 auto roundMode = APFloat::rmNearestTiesToEven;
3295 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3296 return ParseStatus::Failure;
3297 if (Negate)
3298 RealVal.changeSign();
3299
3300 Operands.push_back(
3301 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3302 AMDGPUOperand::ImmTyNone, true));
3303 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3304 Op.setModifiers(Mods);
3305
3306 return ParseStatus::Success;
3307
3308 } else {
3309 int64_t IntVal;
3310 const MCExpr *Expr;
3311 SMLoc S = getLoc();
3312
3313 if (HasSP3AbsModifier) {
3314 // This is a workaround for handling expressions
3315 // as arguments of SP3 'abs' modifier, for example:
3316 // |1.0|
3317 // |-1|
3318 // |1+x|
3319 // This syntax is not compatible with syntax of standard
3320 // MC expressions (due to the trailing '|').
3321 SMLoc EndLoc;
3322 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3323 return ParseStatus::Failure;
3324 } else {
3325 if (Parser.parseExpression(Expr))
3326 return ParseStatus::Failure;
3327 }
3328
3329 if (Expr->evaluateAsAbsolute(IntVal)) {
3330 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3331 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3332 Op.setModifiers(Mods);
3333 } else {
3334 if (Lit != LitModifier::None)
3335 return ParseStatus::NoMatch;
3336 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3337 }
3338
3339 return ParseStatus::Success;
3340 }
3341
3342 return ParseStatus::NoMatch;
3343}
3344
3345ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3346 if (!isRegister())
3347 return ParseStatus::NoMatch;
3348
3349 if (auto R = parseRegister()) {
3350 assert(R->isReg());
3351 Operands.push_back(std::move(R));
3352 return ParseStatus::Success;
3353 }
3354 return ParseStatus::Failure;
3355}
3356
3357ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3358 bool HasSP3AbsMod, LitModifier Lit) {
3359 ParseStatus Res = parseReg(Operands);
3360 if (!Res.isNoMatch())
3361 return Res;
3362 if (isModifier())
3363 return ParseStatus::NoMatch;
3364 return parseImm(Operands, HasSP3AbsMod, Lit);
3365}
3366
3367bool
3368AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3369 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3370 const auto &str = Token.getString();
3371 return str == "abs" || str == "neg" || str == "sext";
3372 }
3373 return false;
3374}
3375
3376bool
3377AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3378 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3379}
3380
3381bool
3382AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3383 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3384}
3385
3386bool
3387AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3388 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3389}
3390
3391// Check if this is an operand modifier or an opcode modifier
3392// which may look like an expression but it is not. We should
3393// avoid parsing these modifiers as expressions. Currently
3394// recognized sequences are:
3395// |...|
3396// abs(...)
3397// neg(...)
3398// sext(...)
3399// -reg
3400// -|...|
3401// -abs(...)
3402// name:...
3403//
3404bool
3405AMDGPUAsmParser::isModifier() {
3406
3407 AsmToken Tok = getToken();
3408 AsmToken NextToken[2];
3409 peekTokens(NextToken);
3410
3411 return isOperandModifier(Tok, NextToken[0]) ||
3412 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3413 isOpcodeModifierWithVal(Tok, NextToken[0]);
3414}
3415
3416// Check if the current token is an SP3 'neg' modifier.
3417// Currently this modifier is allowed in the following context:
3418//
3419// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3420// 2. Before an 'abs' modifier: -abs(...)
3421// 3. Before an SP3 'abs' modifier: -|...|
3422//
3423// In all other cases "-" is handled as a part
3424// of an expression that follows the sign.
3425//
3426// Note: When "-" is followed by an integer literal,
3427// this is interpreted as integer negation rather
3428// than a floating-point NEG modifier applied to N.
3429// Beside being contr-intuitive, such use of floating-point
3430// NEG modifier would have resulted in different meaning
3431// of integer literals used with VOP1/2/C and VOP3,
3432// for example:
3433// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3434// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3435// Negative fp literals with preceding "-" are
3436// handled likewise for uniformity
3437//
3438bool
3439AMDGPUAsmParser::parseSP3NegModifier() {
3440
3441 AsmToken NextToken[2];
3442 peekTokens(NextToken);
3443
3444 if (isToken(AsmToken::Minus) &&
3445 (isRegister(NextToken[0], NextToken[1]) ||
3446 NextToken[0].is(AsmToken::Pipe) ||
3447 isId(NextToken[0], "abs"))) {
3448 lex();
3449 return true;
3450 }
3451
3452 return false;
3453}
3454
3455ParseStatus
3456AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3457 bool AllowImm) {
3458 bool Neg, SP3Neg;
3459 bool Abs, SP3Abs;
3460 SMLoc Loc;
3461
3462 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3463 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3464 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3465
3466 SP3Neg = parseSP3NegModifier();
3467
3468 Loc = getLoc();
3469 Neg = trySkipId("neg");
3470 if (Neg && SP3Neg)
3471 return Error(Loc, "expected register or immediate");
3472 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3473 return ParseStatus::Failure;
3474
3475 Abs = trySkipId("abs");
3476 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3477 return ParseStatus::Failure;
3478
3479 LitModifier Lit = LitModifier::None;
3480 if (trySkipId("lit")) {
3481 Lit = LitModifier::Lit;
3482 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3483 return ParseStatus::Failure;
3484 } else if (trySkipId("lit64")) {
3485 Lit = LitModifier::Lit64;
3486 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3487 return ParseStatus::Failure;
3488 if (!has64BitLiterals())
3489 return Error(Loc, "lit64 is not supported on this GPU");
3490 }
3491
3492 Loc = getLoc();
3493 SP3Abs = trySkipToken(AsmToken::Pipe);
3494 if (Abs && SP3Abs)
3495 return Error(Loc, "expected register or immediate");
3496
3497 ParseStatus Res;
3498 if (AllowImm) {
3499 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3500 } else {
3501 Res = parseReg(Operands);
3502 }
3503 if (!Res.isSuccess())
3504 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3506 : Res;
3507
3508 if (Lit != LitModifier::None && !Operands.back()->isImm())
3509 Error(Loc, "expected immediate with lit modifier");
3510
3511 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3512 return ParseStatus::Failure;
3513 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3514 return ParseStatus::Failure;
3515 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3516 return ParseStatus::Failure;
3517 if (Lit != LitModifier::None &&
3518 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3519 return ParseStatus::Failure;
3520
3521 AMDGPUOperand::Modifiers Mods;
3522 Mods.Abs = Abs || SP3Abs;
3523 Mods.Neg = Neg || SP3Neg;
3524 Mods.Lit = Lit;
3525
3526 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3527 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3528 if (Op.isExpr())
3529 return Error(Op.getStartLoc(), "expected an absolute expression");
3530 Op.setModifiers(Mods);
3531 }
3532 return ParseStatus::Success;
3533}
3534
3535ParseStatus
3536AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3537 bool AllowImm) {
3538 bool Sext = trySkipId("sext");
3539 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3540 return ParseStatus::Failure;
3541
3542 ParseStatus Res;
3543 if (AllowImm) {
3544 Res = parseRegOrImm(Operands);
3545 } else {
3546 Res = parseReg(Operands);
3547 }
3548 if (!Res.isSuccess())
3549 return Sext ? ParseStatus::Failure : Res;
3550
3551 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3552 return ParseStatus::Failure;
3553
3554 AMDGPUOperand::Modifiers Mods;
3555 Mods.Sext = Sext;
3556
3557 if (Mods.hasIntModifiers()) {
3558 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3559 if (Op.isExpr())
3560 return Error(Op.getStartLoc(), "expected an absolute expression");
3561 Op.setModifiers(Mods);
3562 }
3563
3564 return ParseStatus::Success;
3565}
3566
3567ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3568 return parseRegOrImmWithFPInputMods(Operands, false);
3569}
3570
3571ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3572 return parseRegOrImmWithIntInputMods(Operands, false);
3573}
3574
3575ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3576 auto Loc = getLoc();
3577 if (trySkipId("off")) {
3578 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3579 AMDGPUOperand::ImmTyOff, false));
3580 return ParseStatus::Success;
3581 }
3582
3583 if (!isRegister())
3584 return ParseStatus::NoMatch;
3585
3586 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3587 if (Reg) {
3588 Operands.push_back(std::move(Reg));
3589 return ParseStatus::Success;
3590 }
3591
3592 return ParseStatus::Failure;
3593}
3594
3595unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3596 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3597
3598 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3599 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3600 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3601 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3602 return Match_InvalidOperand;
3603
3604 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3605 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3606 // v_mac_f32/16 allow only dst_sel == DWORD;
3607 auto OpNum =
3608 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3609 const auto &Op = Inst.getOperand(OpNum);
3610 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3611 return Match_InvalidOperand;
3612 }
3613 }
3614
3615 // Asm can first try to match VOPD or VOPD3. By failing early here with
3616 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3617 // Checking later during validateInstruction does not give a chance to retry
3618 // parsing as a different encoding.
3619 if (tryAnotherVOPDEncoding(Inst))
3620 return Match_InvalidOperand;
3621
3622 return Match_Success;
3623}
3624
3634
3635// What asm variants we should check
3636ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3637 if (isForcedDPP() && isForcedVOP3()) {
3638 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3639 return ArrayRef(Variants);
3640 }
3641 if (getForcedEncodingSize() == 32) {
3642 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3643 return ArrayRef(Variants);
3644 }
3645
3646 if (isForcedVOP3()) {
3647 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3648 return ArrayRef(Variants);
3649 }
3650
3651 if (isForcedSDWA()) {
3652 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3654 return ArrayRef(Variants);
3655 }
3656
3657 if (isForcedDPP()) {
3658 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3659 return ArrayRef(Variants);
3660 }
3661
3662 return getAllVariants();
3663}
3664
3665StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3666 if (isForcedDPP() && isForcedVOP3())
3667 return "e64_dpp";
3668
3669 if (getForcedEncodingSize() == 32)
3670 return "e32";
3671
3672 if (isForcedVOP3())
3673 return "e64";
3674
3675 if (isForcedSDWA())
3676 return "sdwa";
3677
3678 if (isForcedDPP())
3679 return "dpp";
3680
3681 return "";
3682}
3683
3684MCRegister
3685AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3686 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3687 for (MCPhysReg Reg : Desc.implicit_uses()) {
3688 switch (Reg) {
3689 case AMDGPU::FLAT_SCR:
3690 case AMDGPU::VCC:
3691 case AMDGPU::VCC_LO:
3692 case AMDGPU::VCC_HI:
3693 case AMDGPU::M0:
3694 return Reg;
3695 default:
3696 break;
3697 }
3698 }
3699 return MCRegister();
3700}
3701
3702// NB: This code is correct only when used to check constant
3703// bus limitations because GFX7 support no f16 inline constants.
3704// Note that there are no cases when a GFX7 opcode violates
3705// constant bus limitations due to the use of an f16 constant.
3706bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3707 unsigned OpIdx) const {
3708 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3709
3712 return false;
3713 }
3714
3715 const MCOperand &MO = Inst.getOperand(OpIdx);
3716
3717 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3718 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3719
3720 switch (OpSize) { // expected operand size
3721 case 8:
3722 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3723 case 4:
3724 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3725 case 2: {
3726 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3729 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3730
3734
3738
3741
3745
3748 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3749
3752 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3753
3755 return false;
3756
3757 llvm_unreachable("invalid operand type");
3758 }
3759 default:
3760 llvm_unreachable("invalid operand size");
3761 }
3762}
3763
3764unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3765 if (!isGFX10Plus())
3766 return 1;
3767
3768 switch (Opcode) {
3769 // 64-bit shift instructions can use only one scalar value input
3770 case AMDGPU::V_LSHLREV_B64_e64:
3771 case AMDGPU::V_LSHLREV_B64_gfx10:
3772 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3773 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3774 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3775 case AMDGPU::V_LSHRREV_B64_e64:
3776 case AMDGPU::V_LSHRREV_B64_gfx10:
3777 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3778 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3779 case AMDGPU::V_ASHRREV_I64_e64:
3780 case AMDGPU::V_ASHRREV_I64_gfx10:
3781 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3782 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3783 case AMDGPU::V_LSHL_B64_e64:
3784 case AMDGPU::V_LSHR_B64_e64:
3785 case AMDGPU::V_ASHR_I64_e64:
3786 return 1;
3787 default:
3788 return 2;
3789 }
3790}
3791
3792constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3794
3795// Get regular operand indices in the same order as specified
3796// in the instruction (but append mandatory literals to the end).
3798 bool AddMandatoryLiterals = false) {
3799
3800 int16_t ImmIdx =
3801 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3802
3803 if (isVOPD(Opcode)) {
3804 int16_t ImmXIdx =
3805 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3806
3807 return {getNamedOperandIdx(Opcode, OpName::src0X),
3808 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3809 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3810 getNamedOperandIdx(Opcode, OpName::src0Y),
3811 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3812 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3813 ImmXIdx,
3814 ImmIdx};
3815 }
3816
3817 return {getNamedOperandIdx(Opcode, OpName::src0),
3818 getNamedOperandIdx(Opcode, OpName::src1),
3819 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3820}
3821
3822bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3823 const MCOperand &MO = Inst.getOperand(OpIdx);
3824 if (MO.isImm())
3825 return !isInlineConstant(Inst, OpIdx);
3826 if (MO.isReg()) {
3827 auto Reg = MO.getReg();
3828 if (!Reg)
3829 return false;
3830 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3831 auto PReg = mc2PseudoReg(Reg);
3832 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3833 }
3834 return true;
3835}
3836
3837// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3838// Writelane is special in that it can use SGPR and M0 (which would normally
3839// count as using the constant bus twice - but in this case it is allowed since
3840// the lane selector doesn't count as a use of the constant bus). However, it is
3841// still required to abide by the 1 SGPR rule.
3842static bool checkWriteLane(const MCInst &Inst) {
3843 const unsigned Opcode = Inst.getOpcode();
3844 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3845 return false;
3846 const MCOperand &LaneSelOp = Inst.getOperand(2);
3847 if (!LaneSelOp.isReg())
3848 return false;
3849 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3850 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3851}
3852
3853bool AMDGPUAsmParser::validateConstantBusLimitations(
3854 const MCInst &Inst, const OperandVector &Operands) {
3855 const unsigned Opcode = Inst.getOpcode();
3856 const MCInstrDesc &Desc = MII.get(Opcode);
3857 MCRegister LastSGPR;
3858 unsigned ConstantBusUseCount = 0;
3859 unsigned NumLiterals = 0;
3860 unsigned LiteralSize;
3861
3862 if (!(Desc.TSFlags &
3865 !isVOPD(Opcode))
3866 return true;
3867
3868 if (checkWriteLane(Inst))
3869 return true;
3870
3871 // Check special imm operands (used by madmk, etc)
3872 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3873 ++NumLiterals;
3874 LiteralSize = 4;
3875 }
3876
3877 SmallDenseSet<MCRegister> SGPRsUsed;
3878 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3879 if (SGPRUsed) {
3880 SGPRsUsed.insert(SGPRUsed);
3881 ++ConstantBusUseCount;
3882 }
3883
3884 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3885
3886 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3887
3888 for (int OpIdx : OpIndices) {
3889 if (OpIdx == -1)
3890 continue;
3891
3892 const MCOperand &MO = Inst.getOperand(OpIdx);
3893 if (usesConstantBus(Inst, OpIdx)) {
3894 if (MO.isReg()) {
3895 LastSGPR = mc2PseudoReg(MO.getReg());
3896 // Pairs of registers with a partial intersections like these
3897 // s0, s[0:1]
3898 // flat_scratch_lo, flat_scratch
3899 // flat_scratch_lo, flat_scratch_hi
3900 // are theoretically valid but they are disabled anyway.
3901 // Note that this code mimics SIInstrInfo::verifyInstruction
3902 if (SGPRsUsed.insert(LastSGPR).second) {
3903 ++ConstantBusUseCount;
3904 }
3905 } else { // Expression or a literal
3906
3907 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3908 continue; // special operand like VINTERP attr_chan
3909
3910 // An instruction may use only one literal.
3911 // This has been validated on the previous step.
3912 // See validateVOPLiteral.
3913 // This literal may be used as more than one operand.
3914 // If all these operands are of the same size,
3915 // this literal counts as one scalar value.
3916 // Otherwise it counts as 2 scalar values.
3917 // See "GFX10 Shader Programming", section 3.6.2.3.
3918
3920 if (Size < 4)
3921 Size = 4;
3922
3923 if (NumLiterals == 0) {
3924 NumLiterals = 1;
3925 LiteralSize = Size;
3926 } else if (LiteralSize != Size) {
3927 NumLiterals = 2;
3928 }
3929 }
3930 }
3931
3932 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3933 Error(getOperandLoc(Operands, OpIdx),
3934 "invalid operand (violates constant bus restrictions)");
3935 return false;
3936 }
3937 }
3938 return true;
3939}
3940
3941std::optional<unsigned>
3942AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3943
3944 const unsigned Opcode = Inst.getOpcode();
3945 if (!isVOPD(Opcode))
3946 return {};
3947
3948 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3949
3950 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3951 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3952 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3953 ? Opr.getReg()
3954 : MCRegister();
3955 };
3956
3957 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3958 // source-cache.
3959 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3960 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3961 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3962 bool AllowSameVGPR = isGFX1250Plus();
3963
3964 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3965 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3966 int I = getNamedOperandIdx(Opcode, OpName);
3967 const MCOperand &Op = Inst.getOperand(I);
3968 if (!Op.isImm())
3969 continue;
3970 int64_t Imm = Op.getImm();
3971 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3972 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3973 return (unsigned)I;
3974 }
3975
3976 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3977 OpName::vsrc2Y, OpName::imm}) {
3978 int I = getNamedOperandIdx(Opcode, OpName);
3979 if (I == -1)
3980 continue;
3981 const MCOperand &Op = Inst.getOperand(I);
3982 if (Op.isImm())
3983 return (unsigned)I;
3984 }
3985 }
3986
3987 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3988 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3989 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3990
3991 return InvalidCompOprIdx;
3992}
3993
3994bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3995 const OperandVector &Operands) {
3996
3997 unsigned Opcode = Inst.getOpcode();
3998 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3999
4000 if (AsVOPD3) {
4001 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4002 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4003 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4004 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4005 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4006 }
4007 }
4008
4009 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4010 if (!InvalidCompOprIdx.has_value())
4011 return true;
4012
4013 auto CompOprIdx = *InvalidCompOprIdx;
4014 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4015 auto ParsedIdx =
4016 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4017 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4018 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4019
4020 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4021 if (CompOprIdx == VOPD::Component::DST) {
4022 if (AsVOPD3)
4023 Error(Loc, "dst registers must be distinct");
4024 else
4025 Error(Loc, "one dst register must be even and the other odd");
4026 } else {
4027 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4028 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4029 " operands must use different VGPR banks");
4030 }
4031
4032 return false;
4033}
4034
4035// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4036// potentially used as VOPD3 with the same operands.
4037bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4038 // First check if it fits VOPD
4039 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4040 if (!InvalidCompOprIdx.has_value())
4041 return false;
4042
4043 // Then if it fits VOPD3
4044 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4045 if (InvalidCompOprIdx.has_value()) {
4046 // If failed operand is dst it is better to show error about VOPD3
4047 // instruction as it has more capabilities and error message will be
4048 // more informative. If the dst is not legal for VOPD3, then it is not
4049 // legal for VOPD either.
4050 if (*InvalidCompOprIdx == VOPD::Component::DST)
4051 return true;
4052
4053 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4054 // with a conflict in tied implicit src2 of fmac and no asm operand to
4055 // to point to.
4056 return false;
4057 }
4058 return true;
4059}
4060
4061// \returns true is a VOPD3 instruction can be also represented as a shorter
4062// VOPD encoding.
4063bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4064 const unsigned Opcode = Inst.getOpcode();
4065 const auto &II = getVOPDInstInfo(Opcode, &MII);
4066 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4067 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4068 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4069 return false;
4070
4071 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4072 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4073 // be parsed as VOPD which does not accept src2.
4074 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4075 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4076 return false;
4077
4078 // If any modifiers are set this cannot be VOPD.
4079 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4080 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4081 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4082 int I = getNamedOperandIdx(Opcode, OpName);
4083 if (I == -1)
4084 continue;
4085 if (Inst.getOperand(I).getImm())
4086 return false;
4087 }
4088
4089 return !tryVOPD3(Inst);
4090}
4091
4092// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4093// form but switch to VOPD3 otherwise.
4094bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4095 const unsigned Opcode = Inst.getOpcode();
4096 if (!isGFX1250Plus() || !isVOPD(Opcode))
4097 return false;
4098
4099 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4100 return tryVOPD(Inst);
4101 return tryVOPD3(Inst);
4102}
4103
4104bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4105
4106 const unsigned Opc = Inst.getOpcode();
4107 const MCInstrDesc &Desc = MII.get(Opc);
4108
4109 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4110 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4111 assert(ClampIdx != -1);
4112 return Inst.getOperand(ClampIdx).getImm() == 0;
4113 }
4114
4115 return true;
4116}
4117
4120
4121bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4122
4123 const unsigned Opc = Inst.getOpcode();
4124 const MCInstrDesc &Desc = MII.get(Opc);
4125
4126 if ((Desc.TSFlags & MIMGFlags) == 0)
4127 return true;
4128
4129 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4130 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4131 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4132
4133 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4134 return true;
4135
4136 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4137 return true;
4138
4139 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4140 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4141 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4142 if (DMask == 0)
4143 DMask = 1;
4144
4145 bool IsPackedD16 = false;
4146 unsigned DataSize =
4147 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4148 if (hasPackedD16()) {
4149 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4150 IsPackedD16 = D16Idx >= 0;
4151 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4152 DataSize = (DataSize + 1) / 2;
4153 }
4154
4155 if ((VDataSize / 4) == DataSize + TFESize)
4156 return true;
4157
4158 StringRef Modifiers;
4159 if (isGFX90A())
4160 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4161 else
4162 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4163
4164 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4165 return false;
4166}
4167
4168bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4169 const unsigned Opc = Inst.getOpcode();
4170 const MCInstrDesc &Desc = MII.get(Opc);
4171
4172 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4173 return true;
4174
4175 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4176
4177 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4179 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4180 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4181 ? AMDGPU::OpName::srsrc
4182 : AMDGPU::OpName::rsrc;
4183 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4184 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4185 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4186
4187 assert(VAddr0Idx != -1);
4188 assert(SrsrcIdx != -1);
4189 assert(SrsrcIdx > VAddr0Idx);
4190
4191 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4192 if (BaseOpcode->BVH) {
4193 if (IsA16 == BaseOpcode->A16)
4194 return true;
4195 Error(IDLoc, "image address size does not match a16");
4196 return false;
4197 }
4198
4199 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4200 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4201 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4202 unsigned ActualAddrSize =
4203 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4204
4205 unsigned ExpectedAddrSize =
4206 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4207
4208 if (IsNSA) {
4209 if (hasPartialNSAEncoding() &&
4210 ExpectedAddrSize >
4212 int VAddrLastIdx = SrsrcIdx - 1;
4213 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4214
4215 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4216 }
4217 } else {
4218 if (ExpectedAddrSize > 12)
4219 ExpectedAddrSize = 16;
4220
4221 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4222 // This provides backward compatibility for assembly created
4223 // before 160b/192b/224b types were directly supported.
4224 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4225 return true;
4226 }
4227
4228 if (ActualAddrSize == ExpectedAddrSize)
4229 return true;
4230
4231 Error(IDLoc, "image address size does not match dim and a16");
4232 return false;
4233}
4234
4235bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4236
4237 const unsigned Opc = Inst.getOpcode();
4238 const MCInstrDesc &Desc = MII.get(Opc);
4239
4240 if ((Desc.TSFlags & MIMGFlags) == 0)
4241 return true;
4242 if (!Desc.mayLoad() || !Desc.mayStore())
4243 return true; // Not atomic
4244
4245 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4246 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4247
4248 // This is an incomplete check because image_atomic_cmpswap
4249 // may only use 0x3 and 0xf while other atomic operations
4250 // may use 0x1 and 0x3. However these limitations are
4251 // verified when we check that dmask matches dst size.
4252 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4253}
4254
4255bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4256
4257 const unsigned Opc = Inst.getOpcode();
4258 const MCInstrDesc &Desc = MII.get(Opc);
4259
4260 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4261 return true;
4262
4263 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4264 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4265
4266 // GATHER4 instructions use dmask in a different fashion compared to
4267 // other MIMG instructions. The only useful DMASK values are
4268 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4269 // (red,red,red,red) etc.) The ISA document doesn't mention
4270 // this.
4271 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4272}
4273
4274bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4275 const OperandVector &Operands) {
4276 if (!isGFX10Plus())
4277 return true;
4278
4279 const unsigned Opc = Inst.getOpcode();
4280 const MCInstrDesc &Desc = MII.get(Opc);
4281
4282 if ((Desc.TSFlags & MIMGFlags) == 0)
4283 return true;
4284
4285 // image_bvh_intersect_ray instructions do not have dim
4287 return true;
4288
4289 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4290 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4291 if (Op.isDim())
4292 return true;
4293 }
4294 return false;
4295}
4296
4297bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4298 const unsigned Opc = Inst.getOpcode();
4299 const MCInstrDesc &Desc = MII.get(Opc);
4300
4301 if ((Desc.TSFlags & MIMGFlags) == 0)
4302 return true;
4303
4304 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4305 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4307
4308 if (!BaseOpcode->MSAA)
4309 return true;
4310
4311 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4312 assert(DimIdx != -1);
4313
4314 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4315 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4316
4317 return DimInfo->MSAA;
4318}
4319
4320static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4321{
4322 switch (Opcode) {
4323 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4324 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4325 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4326 return true;
4327 default:
4328 return false;
4329 }
4330}
4331
4332// movrels* opcodes should only allow VGPRS as src0.
4333// This is specified in .td description for vop1/vop3,
4334// but sdwa is handled differently. See isSDWAOperand.
4335bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4336 const OperandVector &Operands) {
4337
4338 const unsigned Opc = Inst.getOpcode();
4339 const MCInstrDesc &Desc = MII.get(Opc);
4340
4341 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4342 return true;
4343
4344 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4345 assert(Src0Idx != -1);
4346
4347 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4348 if (Src0.isReg()) {
4349 auto Reg = mc2PseudoReg(Src0.getReg());
4350 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4351 if (!isSGPR(Reg, TRI))
4352 return true;
4353 }
4354
4355 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4356 return false;
4357}
4358
4359bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4360 const OperandVector &Operands) {
4361
4362 const unsigned Opc = Inst.getOpcode();
4363
4364 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4365 return true;
4366
4367 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4368 assert(Src0Idx != -1);
4369
4370 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4371 if (!Src0.isReg())
4372 return true;
4373
4374 auto Reg = mc2PseudoReg(Src0.getReg());
4375 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4376 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4377 Error(getOperandLoc(Operands, Src0Idx),
4378 "source operand must be either a VGPR or an inline constant");
4379 return false;
4380 }
4381
4382 return true;
4383}
4384
4385bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4386 const OperandVector &Operands) {
4387 unsigned Opcode = Inst.getOpcode();
4388 const MCInstrDesc &Desc = MII.get(Opcode);
4389
4390 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4391 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4392 return true;
4393
4394 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4395 if (Src2Idx == -1)
4396 return true;
4397
4398 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4399 Error(getOperandLoc(Operands, Src2Idx),
4400 "inline constants are not allowed for this operand");
4401 return false;
4402 }
4403
4404 return true;
4405}
4406
4407bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4408 const OperandVector &Operands) {
4409 const unsigned Opc = Inst.getOpcode();
4410 const MCInstrDesc &Desc = MII.get(Opc);
4411
4412 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4413 return true;
4414
4415 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4416 if (BlgpIdx != -1) {
4417 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4418 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4419
4420 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4421 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4422
4423 // Validate the correct register size was used for the floating point
4424 // format operands
4425
4426 bool Success = true;
4427 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4428 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4429 Error(getOperandLoc(Operands, Src0Idx),
4430 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4431 Success = false;
4432 }
4433
4434 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4435 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4436 Error(getOperandLoc(Operands, Src1Idx),
4437 "wrong register tuple size for blgp value " + Twine(BLGP));
4438 Success = false;
4439 }
4440
4441 return Success;
4442 }
4443 }
4444
4445 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4446 if (Src2Idx == -1)
4447 return true;
4448
4449 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4450 if (!Src2.isReg())
4451 return true;
4452
4453 MCRegister Src2Reg = Src2.getReg();
4454 MCRegister DstReg = Inst.getOperand(0).getReg();
4455 if (Src2Reg == DstReg)
4456 return true;
4457
4458 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4459 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4460 .getSizeInBits() <= 128)
4461 return true;
4462
4463 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4464 Error(getOperandLoc(Operands, Src2Idx),
4465 "source 2 operand must not partially overlap with dst");
4466 return false;
4467 }
4468
4469 return true;
4470}
4471
4472bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4473 switch (Inst.getOpcode()) {
4474 default:
4475 return true;
4476 case V_DIV_SCALE_F32_gfx6_gfx7:
4477 case V_DIV_SCALE_F32_vi:
4478 case V_DIV_SCALE_F32_gfx10:
4479 case V_DIV_SCALE_F64_gfx6_gfx7:
4480 case V_DIV_SCALE_F64_vi:
4481 case V_DIV_SCALE_F64_gfx10:
4482 break;
4483 }
4484
4485 // TODO: Check that src0 = src1 or src2.
4486
4487 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4488 AMDGPU::OpName::src2_modifiers,
4489 AMDGPU::OpName::src2_modifiers}) {
4490 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4491 .getImm() &
4493 return false;
4494 }
4495 }
4496
4497 return true;
4498}
4499
4500bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4501
4502 const unsigned Opc = Inst.getOpcode();
4503 const MCInstrDesc &Desc = MII.get(Opc);
4504
4505 if ((Desc.TSFlags & MIMGFlags) == 0)
4506 return true;
4507
4508 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4509 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4510 if (isCI() || isSI())
4511 return false;
4512 }
4513
4514 return true;
4515}
4516
4517bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4518 const unsigned Opc = Inst.getOpcode();
4519 const MCInstrDesc &Desc = MII.get(Opc);
4520
4521 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4522 return true;
4523
4524 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4525
4526 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4527}
4528
4529static bool IsRevOpcode(const unsigned Opcode)
4530{
4531 switch (Opcode) {
4532 case AMDGPU::V_SUBREV_F32_e32:
4533 case AMDGPU::V_SUBREV_F32_e64:
4534 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4535 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4536 case AMDGPU::V_SUBREV_F32_e32_vi:
4537 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4538 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4539 case AMDGPU::V_SUBREV_F32_e64_vi:
4540
4541 case AMDGPU::V_SUBREV_CO_U32_e32:
4542 case AMDGPU::V_SUBREV_CO_U32_e64:
4543 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4544 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4545
4546 case AMDGPU::V_SUBBREV_U32_e32:
4547 case AMDGPU::V_SUBBREV_U32_e64:
4548 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4549 case AMDGPU::V_SUBBREV_U32_e32_vi:
4550 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4551 case AMDGPU::V_SUBBREV_U32_e64_vi:
4552
4553 case AMDGPU::V_SUBREV_U32_e32:
4554 case AMDGPU::V_SUBREV_U32_e64:
4555 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4556 case AMDGPU::V_SUBREV_U32_e32_vi:
4557 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4558 case AMDGPU::V_SUBREV_U32_e64_vi:
4559
4560 case AMDGPU::V_SUBREV_F16_e32:
4561 case AMDGPU::V_SUBREV_F16_e64:
4562 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4563 case AMDGPU::V_SUBREV_F16_e32_vi:
4564 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4565 case AMDGPU::V_SUBREV_F16_e64_vi:
4566
4567 case AMDGPU::V_SUBREV_U16_e32:
4568 case AMDGPU::V_SUBREV_U16_e64:
4569 case AMDGPU::V_SUBREV_U16_e32_vi:
4570 case AMDGPU::V_SUBREV_U16_e64_vi:
4571
4572 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4573 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4574 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4575
4576 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4577 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4578
4579 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4580 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4581
4582 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4583 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4584
4585 case AMDGPU::V_LSHRREV_B32_e32:
4586 case AMDGPU::V_LSHRREV_B32_e64:
4587 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4588 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4589 case AMDGPU::V_LSHRREV_B32_e32_vi:
4590 case AMDGPU::V_LSHRREV_B32_e64_vi:
4591 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4592 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4593
4594 case AMDGPU::V_ASHRREV_I32_e32:
4595 case AMDGPU::V_ASHRREV_I32_e64:
4596 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4597 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4598 case AMDGPU::V_ASHRREV_I32_e32_vi:
4599 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4600 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4601 case AMDGPU::V_ASHRREV_I32_e64_vi:
4602
4603 case AMDGPU::V_LSHLREV_B32_e32:
4604 case AMDGPU::V_LSHLREV_B32_e64:
4605 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4606 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4607 case AMDGPU::V_LSHLREV_B32_e32_vi:
4608 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4609 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4610 case AMDGPU::V_LSHLREV_B32_e64_vi:
4611
4612 case AMDGPU::V_LSHLREV_B16_e32:
4613 case AMDGPU::V_LSHLREV_B16_e64:
4614 case AMDGPU::V_LSHLREV_B16_e32_vi:
4615 case AMDGPU::V_LSHLREV_B16_e64_vi:
4616 case AMDGPU::V_LSHLREV_B16_gfx10:
4617
4618 case AMDGPU::V_LSHRREV_B16_e32:
4619 case AMDGPU::V_LSHRREV_B16_e64:
4620 case AMDGPU::V_LSHRREV_B16_e32_vi:
4621 case AMDGPU::V_LSHRREV_B16_e64_vi:
4622 case AMDGPU::V_LSHRREV_B16_gfx10:
4623
4624 case AMDGPU::V_ASHRREV_I16_e32:
4625 case AMDGPU::V_ASHRREV_I16_e64:
4626 case AMDGPU::V_ASHRREV_I16_e32_vi:
4627 case AMDGPU::V_ASHRREV_I16_e64_vi:
4628 case AMDGPU::V_ASHRREV_I16_gfx10:
4629
4630 case AMDGPU::V_LSHLREV_B64_e64:
4631 case AMDGPU::V_LSHLREV_B64_gfx10:
4632 case AMDGPU::V_LSHLREV_B64_vi:
4633
4634 case AMDGPU::V_LSHRREV_B64_e64:
4635 case AMDGPU::V_LSHRREV_B64_gfx10:
4636 case AMDGPU::V_LSHRREV_B64_vi:
4637
4638 case AMDGPU::V_ASHRREV_I64_e64:
4639 case AMDGPU::V_ASHRREV_I64_gfx10:
4640 case AMDGPU::V_ASHRREV_I64_vi:
4641
4642 case AMDGPU::V_PK_LSHLREV_B16:
4643 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4644 case AMDGPU::V_PK_LSHLREV_B16_vi:
4645
4646 case AMDGPU::V_PK_LSHRREV_B16:
4647 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4648 case AMDGPU::V_PK_LSHRREV_B16_vi:
4649 case AMDGPU::V_PK_ASHRREV_I16:
4650 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4651 case AMDGPU::V_PK_ASHRREV_I16_vi:
4652 return true;
4653 default:
4654 return false;
4655 }
4656}
4657
4658bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4659 const OperandVector &Operands) {
4660 using namespace SIInstrFlags;
4661 const unsigned Opcode = Inst.getOpcode();
4662 const MCInstrDesc &Desc = MII.get(Opcode);
4663
4664 // lds_direct register is defined so that it can be used
4665 // with 9-bit operands only. Ignore encodings which do not accept these.
4666 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4667 if ((Desc.TSFlags & Enc) == 0)
4668 return true;
4669
4670 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4671 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4672 if (SrcIdx == -1)
4673 break;
4674 const auto &Src = Inst.getOperand(SrcIdx);
4675 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4676
4677 if (isGFX90A() || isGFX11Plus()) {
4678 Error(getOperandLoc(Operands, SrcIdx),
4679 "lds_direct is not supported on this GPU");
4680 return false;
4681 }
4682
4683 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4684 Error(getOperandLoc(Operands, SrcIdx),
4685 "lds_direct cannot be used with this instruction");
4686 return false;
4687 }
4688
4689 if (SrcName != OpName::src0) {
4690 Error(getOperandLoc(Operands, SrcIdx),
4691 "lds_direct may be used as src0 only");
4692 return false;
4693 }
4694 }
4695 }
4696
4697 return true;
4698}
4699
4700SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4701 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4702 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4703 if (Op.isFlatOffset())
4704 return Op.getStartLoc();
4705 }
4706 return getLoc();
4707}
4708
4709bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4710 const OperandVector &Operands) {
4711 auto Opcode = Inst.getOpcode();
4712 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4713 if (OpNum == -1)
4714 return true;
4715
4716 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4717 if ((TSFlags & SIInstrFlags::FLAT))
4718 return validateFlatOffset(Inst, Operands);
4719
4720 if ((TSFlags & SIInstrFlags::SMRD))
4721 return validateSMEMOffset(Inst, Operands);
4722
4723 const auto &Op = Inst.getOperand(OpNum);
4724 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4725 if (isGFX12Plus() &&
4726 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4727 const unsigned OffsetSize = 24;
4728 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4729 Error(getFlatOffsetLoc(Operands),
4730 Twine("expected a ") + Twine(OffsetSize - 1) +
4731 "-bit unsigned offset for buffer ops");
4732 return false;
4733 }
4734 } else {
4735 const unsigned OffsetSize = 16;
4736 if (!isUIntN(OffsetSize, Op.getImm())) {
4737 Error(getFlatOffsetLoc(Operands),
4738 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4739 return false;
4740 }
4741 }
4742 return true;
4743}
4744
4745bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4746 const OperandVector &Operands) {
4747 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4748 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4749 return true;
4750
4751 auto Opcode = Inst.getOpcode();
4752 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4753 assert(OpNum != -1);
4754
4755 const auto &Op = Inst.getOperand(OpNum);
4756 if (!hasFlatOffsets() && Op.getImm() != 0) {
4757 Error(getFlatOffsetLoc(Operands),
4758 "flat offset modifier is not supported on this GPU");
4759 return false;
4760 }
4761
4762 // For pre-GFX12 FLAT instructions the offset must be positive;
4763 // MSB is ignored and forced to zero.
4764 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4765 bool AllowNegative =
4767 isGFX12Plus();
4768 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4769 Error(getFlatOffsetLoc(Operands),
4770 Twine("expected a ") +
4771 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4772 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4773 return false;
4774 }
4775
4776 return true;
4777}
4778
4779SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4780 // Start with second operand because SMEM Offset cannot be dst or src0.
4781 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4782 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4783 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4784 return Op.getStartLoc();
4785 }
4786 return getLoc();
4787}
4788
4789bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4790 const OperandVector &Operands) {
4791 if (isCI() || isSI())
4792 return true;
4793
4794 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4795 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4796 return true;
4797
4798 auto Opcode = Inst.getOpcode();
4799 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4800 if (OpNum == -1)
4801 return true;
4802
4803 const auto &Op = Inst.getOperand(OpNum);
4804 if (!Op.isImm())
4805 return true;
4806
4807 uint64_t Offset = Op.getImm();
4808 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4811 return true;
4812
4813 Error(getSMEMOffsetLoc(Operands),
4814 isGFX12Plus() && IsBuffer
4815 ? "expected a 23-bit unsigned offset for buffer ops"
4816 : isGFX12Plus() ? "expected a 24-bit signed offset"
4817 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4818 : "expected a 21-bit signed offset");
4819
4820 return false;
4821}
4822
4823bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4824 const OperandVector &Operands) {
4825 unsigned Opcode = Inst.getOpcode();
4826 const MCInstrDesc &Desc = MII.get(Opcode);
4827 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4828 return true;
4829
4830 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4831 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4832
4833 const int OpIndices[] = { Src0Idx, Src1Idx };
4834
4835 unsigned NumExprs = 0;
4836 unsigned NumLiterals = 0;
4837 int64_t LiteralValue;
4838
4839 for (int OpIdx : OpIndices) {
4840 if (OpIdx == -1) break;
4841
4842 const MCOperand &MO = Inst.getOperand(OpIdx);
4843 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4845 bool IsLit = false;
4846 std::optional<int64_t> Imm;
4847 if (MO.isImm()) {
4848 Imm = MO.getImm();
4849 } else if (MO.isExpr()) {
4850 if (isLitExpr(MO.getExpr())) {
4851 IsLit = true;
4852 Imm = getLitValue(MO.getExpr());
4853 }
4854 } else {
4855 continue;
4856 }
4857
4858 if (!Imm.has_value()) {
4859 ++NumExprs;
4860 } else if (!isInlineConstant(Inst, OpIdx)) {
4861 auto OpType = static_cast<AMDGPU::OperandType>(
4862 Desc.operands()[OpIdx].OperandType);
4863 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4864 if (NumLiterals == 0 || LiteralValue != Value) {
4866 ++NumLiterals;
4867 }
4868 }
4869 }
4870 }
4871
4872 if (NumLiterals + NumExprs <= 1)
4873 return true;
4874
4875 Error(getOperandLoc(Operands, Src1Idx),
4876 "only one unique literal operand is allowed");
4877 return false;
4878}
4879
4880bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4881 const unsigned Opc = Inst.getOpcode();
4882 if (isPermlane16(Opc)) {
4883 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4884 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4885
4886 if (OpSel & ~3)
4887 return false;
4888 }
4889
4890 uint64_t TSFlags = MII.get(Opc).TSFlags;
4891
4892 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4893 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4894 if (OpSelIdx != -1) {
4895 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4896 return false;
4897 }
4898 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4899 if (OpSelHiIdx != -1) {
4900 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4901 return false;
4902 }
4903 }
4904
4905 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4906 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4907 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4908 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4909 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4910 if (OpSel & 3)
4911 return false;
4912 }
4913
4914 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4915 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4916 // the first SGPR and use it for both the low and high operations.
4917 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4918 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4919 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4920 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4921 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4922
4923 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4924 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4925 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4926 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4927
4928 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4929
4930 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4931 unsigned Mask = 1U << Index;
4932 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4933 };
4934
4935 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4936 !VerifyOneSGPR(/*Index=*/0))
4937 return false;
4938 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4939 !VerifyOneSGPR(/*Index=*/1))
4940 return false;
4941
4942 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4943 if (Src2Idx != -1) {
4944 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4945 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4946 !VerifyOneSGPR(/*Index=*/2))
4947 return false;
4948 }
4949 }
4950
4951 return true;
4952}
4953
4954bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4955 if (!hasTrue16Insts())
4956 return true;
4957 const MCRegisterInfo *MRI = getMRI();
4958 const unsigned Opc = Inst.getOpcode();
4959 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4960 if (OpSelIdx == -1)
4961 return true;
4962 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4963 // If the value is 0 we could have a default OpSel Operand, so conservatively
4964 // allow it.
4965 if (OpSelOpValue == 0)
4966 return true;
4967 unsigned OpCount = 0;
4968 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4969 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4970 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4971 if (OpIdx == -1)
4972 continue;
4973 const MCOperand &Op = Inst.getOperand(OpIdx);
4974 if (Op.isReg() &&
4975 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4976 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4977 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4978 if (OpSelOpIsHi != VGPRSuffixIsHi)
4979 return false;
4980 }
4981 ++OpCount;
4982 }
4983
4984 return true;
4985}
4986
4987bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4988 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4989
4990 const unsigned Opc = Inst.getOpcode();
4991 uint64_t TSFlags = MII.get(Opc).TSFlags;
4992
4993 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4994 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4995 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4996 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4997 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4998 !(TSFlags & SIInstrFlags::IsSWMMAC))
4999 return true;
5000
5001 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5002 if (NegIdx == -1)
5003 return true;
5004
5005 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5006
5007 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5008 // on some src operands but not allowed on other.
5009 // It is convenient that such instructions don't have src_modifiers operand
5010 // for src operands that don't allow neg because they also don't allow opsel.
5011
5012 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5013 AMDGPU::OpName::src1_modifiers,
5014 AMDGPU::OpName::src2_modifiers};
5015
5016 for (unsigned i = 0; i < 3; ++i) {
5017 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5018 if (Neg & (1 << i))
5019 return false;
5020 }
5021 }
5022
5023 return true;
5024}
5025
5026bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5027 const OperandVector &Operands) {
5028 const unsigned Opc = Inst.getOpcode();
5029 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5030 if (DppCtrlIdx >= 0) {
5031 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5032
5033 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5034 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5035 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5036 // only on GFX12.
5037 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5038 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5039 : "DP ALU dpp only supports row_newbcast");
5040 return false;
5041 }
5042 }
5043
5044 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5045 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5046
5047 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5048 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5049 if (Src1Idx >= 0) {
5050 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5051 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5052 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5053 Error(getOperandLoc(Operands, Src1Idx),
5054 "invalid operand for instruction");
5055 return false;
5056 }
5057 if (Src1.isImm()) {
5058 Error(getInstLoc(Operands),
5059 "src1 immediate operand invalid for instruction");
5060 return false;
5061 }
5062 }
5063 }
5064
5065 return true;
5066}
5067
5068// Check if VCC register matches wavefront size
5069bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5070 return (Reg == AMDGPU::VCC && isWave64()) ||
5071 (Reg == AMDGPU::VCC_LO && isWave32());
5072}
5073
5074// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5075bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5076 const OperandVector &Operands) {
5077 unsigned Opcode = Inst.getOpcode();
5078 const MCInstrDesc &Desc = MII.get(Opcode);
5079 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5080 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5081 !HasMandatoryLiteral && !isVOPD(Opcode))
5082 return true;
5083
5084 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5085
5086 std::optional<unsigned> LiteralOpIdx;
5087 std::optional<uint64_t> LiteralValue;
5088
5089 for (int OpIdx : OpIndices) {
5090 if (OpIdx == -1)
5091 continue;
5092
5093 const MCOperand &MO = Inst.getOperand(OpIdx);
5094 if (!MO.isImm() && !MO.isExpr())
5095 continue;
5096 if (!isSISrcOperand(Desc, OpIdx))
5097 continue;
5098
5099 std::optional<int64_t> Imm;
5100 if (MO.isImm())
5101 Imm = MO.getImm();
5102 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5103 Imm = getLitValue(MO.getExpr());
5104
5105 bool IsAnotherLiteral = false;
5106 if (!Imm.has_value()) {
5107 // Literal value not known, so we conservately assume it's different.
5108 IsAnotherLiteral = true;
5109 } else if (!isInlineConstant(Inst, OpIdx)) {
5110 uint64_t Value = *Imm;
5111 bool IsForcedFP64 =
5112 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5114 HasMandatoryLiteral);
5115 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5116 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5117 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5118
5119 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5120 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5121 Error(getOperandLoc(Operands, OpIdx),
5122 "invalid operand for instruction");
5123 return false;
5124 }
5125
5126 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5127 Value = Hi_32(Value);
5128
5129 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5131 }
5132
5133 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5134 !getFeatureBits()[FeatureVOP3Literal]) {
5135 Error(getOperandLoc(Operands, OpIdx),
5136 "literal operands are not supported");
5137 return false;
5138 }
5139
5140 if (LiteralOpIdx && IsAnotherLiteral) {
5141 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5142 getOperandLoc(Operands, *LiteralOpIdx)),
5143 "only one unique literal operand is allowed");
5144 return false;
5145 }
5146
5147 if (IsAnotherLiteral)
5148 LiteralOpIdx = OpIdx;
5149 }
5150
5151 return true;
5152}
5153
5154// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5155static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5156 const MCRegisterInfo *MRI) {
5157 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5158 if (OpIdx < 0)
5159 return -1;
5160
5161 const MCOperand &Op = Inst.getOperand(OpIdx);
5162 if (!Op.isReg())
5163 return -1;
5164
5165 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5166 auto Reg = Sub ? Sub : Op.getReg();
5167 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5168 return AGPR32.contains(Reg) ? 1 : 0;
5169}
5170
5171bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5172 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5173 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5175 SIInstrFlags::DS)) == 0)
5176 return true;
5177
5178 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5179 ? AMDGPU::OpName::data0
5180 : AMDGPU::OpName::vdata;
5181
5182 const MCRegisterInfo *MRI = getMRI();
5183 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5184 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5185
5186 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5187 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5188 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5189 return false;
5190 }
5191
5192 auto FB = getFeatureBits();
5193 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5194 if (DataAreg < 0 || DstAreg < 0)
5195 return true;
5196 return DstAreg == DataAreg;
5197 }
5198
5199 return DstAreg < 1 && DataAreg < 1;
5200}
5201
5202bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5203 auto FB = getFeatureBits();
5204 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5205 return true;
5206
5207 unsigned Opc = Inst.getOpcode();
5208 const MCRegisterInfo *MRI = getMRI();
5209 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5210 // unaligned VGPR. All others only allow even aligned VGPRs.
5211 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5212 return true;
5213
5214 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5215 switch (Opc) {
5216 default:
5217 break;
5218 case AMDGPU::DS_LOAD_TR6_B96:
5219 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5220 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5221 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5222 return true;
5223 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5224 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5225 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5226 // allows unaligned VGPR for vdst, but other operands still only allow
5227 // even aligned VGPRs.
5228 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5229 if (VAddrIdx != -1) {
5230 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5231 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5232 if ((Sub - AMDGPU::VGPR0) & 1)
5233 return false;
5234 }
5235 return true;
5236 }
5237 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5238 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5239 return true;
5240 }
5241 }
5242
5243 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5244 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5245 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5246 const MCOperand &Op = Inst.getOperand(I);
5247 if (!Op.isReg())
5248 continue;
5249
5250 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5251 if (!Sub)
5252 continue;
5253
5254 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5255 return false;
5256 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5257 return false;
5258 }
5259
5260 return true;
5261}
5262
5263SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5264 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5265 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5266 if (Op.isBLGP())
5267 return Op.getStartLoc();
5268 }
5269 return SMLoc();
5270}
5271
5272bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5273 const OperandVector &Operands) {
5274 unsigned Opc = Inst.getOpcode();
5275 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5276 if (BlgpIdx == -1)
5277 return true;
5278 SMLoc BLGPLoc = getBLGPLoc(Operands);
5279 if (!BLGPLoc.isValid())
5280 return true;
5281 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5282 auto FB = getFeatureBits();
5283 bool UsesNeg = false;
5284 if (FB[AMDGPU::FeatureGFX940Insts]) {
5285 switch (Opc) {
5286 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5287 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5288 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5289 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5290 UsesNeg = true;
5291 }
5292 }
5293
5294 if (IsNeg == UsesNeg)
5295 return true;
5296
5297 Error(BLGPLoc,
5298 UsesNeg ? "invalid modifier: blgp is not supported"
5299 : "invalid modifier: neg is not supported");
5300
5301 return false;
5302}
5303
5304bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5305 const OperandVector &Operands) {
5306 if (!isGFX11Plus())
5307 return true;
5308
5309 unsigned Opc = Inst.getOpcode();
5310 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5311 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5312 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5313 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5314 return true;
5315
5316 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5317 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5318 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5319 if (Reg == AMDGPU::SGPR_NULL)
5320 return true;
5321
5322 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5323 return false;
5324}
5325
5326bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5327 const OperandVector &Operands) {
5328 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5329 if ((TSFlags & SIInstrFlags::DS) == 0)
5330 return true;
5331 if (TSFlags & SIInstrFlags::GWS)
5332 return validateGWS(Inst, Operands);
5333 // Only validate GDS for non-GWS instructions.
5334 if (hasGDS())
5335 return true;
5336 int GDSIdx =
5337 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5338 if (GDSIdx < 0)
5339 return true;
5340 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5341 if (GDS) {
5342 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5343 Error(S, "gds modifier is not supported on this GPU");
5344 return false;
5345 }
5346 return true;
5347}
5348
5349// gfx90a has an undocumented limitation:
5350// DS_GWS opcodes must use even aligned registers.
5351bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5352 const OperandVector &Operands) {
5353 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5354 return true;
5355
5356 int Opc = Inst.getOpcode();
5357 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5358 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5359 return true;
5360
5361 const MCRegisterInfo *MRI = getMRI();
5362 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5363 int Data0Pos =
5364 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5365 assert(Data0Pos != -1);
5366 auto Reg = Inst.getOperand(Data0Pos).getReg();
5367 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5368 if (RegIdx & 1) {
5369 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5370 return false;
5371 }
5372
5373 return true;
5374}
5375
5376bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5377 const OperandVector &Operands,
5378 SMLoc IDLoc) {
5379 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5380 AMDGPU::OpName::cpol);
5381 if (CPolPos == -1)
5382 return true;
5383
5384 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5385
5386 if (!isGFX1250Plus()) {
5387 if (CPol & CPol::SCAL) {
5388 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5389 StringRef CStr(S.getPointer());
5390 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5391 Error(S, "scale_offset is not supported on this GPU");
5392 }
5393 if (CPol & CPol::NV) {
5394 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5395 StringRef CStr(S.getPointer());
5396 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5397 Error(S, "nv is not supported on this GPU");
5398 }
5399 }
5400
5401 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5402 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5403 StringRef CStr(S.getPointer());
5404 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5405 Error(S, "scale_offset is not supported for this instruction");
5406 }
5407
5408 if (isGFX12Plus())
5409 return validateTHAndScopeBits(Inst, Operands, CPol);
5410
5411 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5412 if (TSFlags & SIInstrFlags::SMRD) {
5413 if (CPol && (isSI() || isCI())) {
5414 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5415 Error(S, "cache policy is not supported for SMRD instructions");
5416 return false;
5417 }
5418 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5419 Error(IDLoc, "invalid cache policy for SMEM instruction");
5420 return false;
5421 }
5422 }
5423
5424 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5425 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5428 if (!(TSFlags & AllowSCCModifier)) {
5429 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5430 StringRef CStr(S.getPointer());
5431 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5432 Error(S,
5433 "scc modifier is not supported for this instruction on this GPU");
5434 return false;
5435 }
5436 }
5437
5439 return true;
5440
5441 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5442 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5443 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5444 : "instruction must use glc");
5445 return false;
5446 }
5447 } else {
5448 if (CPol & CPol::GLC) {
5449 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5450 StringRef CStr(S.getPointer());
5452 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5453 Error(S, isGFX940() ? "instruction must not use sc0"
5454 : "instruction must not use glc");
5455 return false;
5456 }
5457 }
5458
5459 return true;
5460}
5461
5462bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5463 const OperandVector &Operands,
5464 const unsigned CPol) {
5465 const unsigned TH = CPol & AMDGPU::CPol::TH;
5466 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5467
5468 const unsigned Opcode = Inst.getOpcode();
5469 const MCInstrDesc &TID = MII.get(Opcode);
5470
5471 auto PrintError = [&](StringRef Msg) {
5472 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5473 Error(S, Msg);
5474 return false;
5475 };
5476
5477 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5480 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5481
5482 if (TH == 0)
5483 return true;
5484
5485 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5486 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5487 (TH == AMDGPU::CPol::TH_NT_HT)))
5488 return PrintError("invalid th value for SMEM instruction");
5489
5490 if (TH == AMDGPU::CPol::TH_BYPASS) {
5491 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5493 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5495 return PrintError("scope and th combination is not valid");
5496 }
5497
5498 unsigned THType = AMDGPU::getTemporalHintType(TID);
5499 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5500 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5501 return PrintError("invalid th value for atomic instructions");
5502 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5503 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5504 return PrintError("invalid th value for store instructions");
5505 } else {
5506 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5507 return PrintError("invalid th value for load instructions");
5508 }
5509
5510 return true;
5511}
5512
5513bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5514 const OperandVector &Operands) {
5515 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5516 if (Desc.mayStore() &&
5518 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5519 if (Loc != getInstLoc(Operands)) {
5520 Error(Loc, "TFE modifier has no meaning for store instructions");
5521 return false;
5522 }
5523 }
5524
5525 return true;
5526}
5527
5528bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5529 const OperandVector &Operands) {
5530 unsigned Opc = Inst.getOpcode();
5531 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5532 const MCInstrDesc &Desc = MII.get(Opc);
5533
5534 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5535 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5536 if (FmtIdx == -1)
5537 return true;
5538 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5539 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5540 unsigned RegSize =
5541 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5542 .getSizeInBits();
5543
5545 return true;
5546
5547 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5548 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5549 "MATRIX_FMT_FP4"};
5550
5551 Error(getOperandLoc(Operands, SrcIdx),
5552 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5553 return false;
5554 };
5555
5556 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5557 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5558}
5559
5560bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5561 const OperandVector &Operands) {
5562 if (!validateLdsDirect(Inst, Operands))
5563 return false;
5564 if (!validateTrue16OpSel(Inst)) {
5565 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5566 "op_sel operand conflicts with 16-bit operand suffix");
5567 return false;
5568 }
5569 if (!validateSOPLiteral(Inst, Operands))
5570 return false;
5571 if (!validateVOPLiteral(Inst, Operands)) {
5572 return false;
5573 }
5574 if (!validateConstantBusLimitations(Inst, Operands)) {
5575 return false;
5576 }
5577 if (!validateVOPD(Inst, Operands)) {
5578 return false;
5579 }
5580 if (!validateIntClampSupported(Inst)) {
5581 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5582 "integer clamping is not supported on this GPU");
5583 return false;
5584 }
5585 if (!validateOpSel(Inst)) {
5586 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5587 "invalid op_sel operand");
5588 return false;
5589 }
5590 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5591 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5592 "invalid neg_lo operand");
5593 return false;
5594 }
5595 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5596 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5597 "invalid neg_hi operand");
5598 return false;
5599 }
5600 if (!validateDPP(Inst, Operands)) {
5601 return false;
5602 }
5603 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5604 if (!validateMIMGD16(Inst)) {
5605 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5606 "d16 modifier is not supported on this GPU");
5607 return false;
5608 }
5609 if (!validateMIMGDim(Inst, Operands)) {
5610 Error(IDLoc, "missing dim operand");
5611 return false;
5612 }
5613 if (!validateTensorR128(Inst)) {
5614 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5615 "instruction must set modifier r128=0");
5616 return false;
5617 }
5618 if (!validateMIMGMSAA(Inst)) {
5619 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5620 "invalid dim; must be MSAA type");
5621 return false;
5622 }
5623 if (!validateMIMGDataSize(Inst, IDLoc)) {
5624 return false;
5625 }
5626 if (!validateMIMGAddrSize(Inst, IDLoc))
5627 return false;
5628 if (!validateMIMGAtomicDMask(Inst)) {
5629 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5630 "invalid atomic image dmask");
5631 return false;
5632 }
5633 if (!validateMIMGGatherDMask(Inst)) {
5634 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5635 "invalid image_gather dmask: only one bit must be set");
5636 return false;
5637 }
5638 if (!validateMovrels(Inst, Operands)) {
5639 return false;
5640 }
5641 if (!validateOffset(Inst, Operands)) {
5642 return false;
5643 }
5644 if (!validateMAIAccWrite(Inst, Operands)) {
5645 return false;
5646 }
5647 if (!validateMAISrc2(Inst, Operands)) {
5648 return false;
5649 }
5650 if (!validateMFMA(Inst, Operands)) {
5651 return false;
5652 }
5653 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5654 return false;
5655 }
5656
5657 if (!validateAGPRLdSt(Inst)) {
5658 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5659 ? "invalid register class: data and dst should be all VGPR or AGPR"
5660 : "invalid register class: agpr loads and stores not supported on this GPU"
5661 );
5662 return false;
5663 }
5664 if (!validateVGPRAlign(Inst)) {
5665 Error(IDLoc,
5666 "invalid register class: vgpr tuples must be 64 bit aligned");
5667 return false;
5668 }
5669 if (!validateDS(Inst, Operands)) {
5670 return false;
5671 }
5672
5673 if (!validateBLGP(Inst, Operands)) {
5674 return false;
5675 }
5676
5677 if (!validateDivScale(Inst)) {
5678 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5679 return false;
5680 }
5681 if (!validateWaitCnt(Inst, Operands)) {
5682 return false;
5683 }
5684 if (!validateTFE(Inst, Operands)) {
5685 return false;
5686 }
5687 if (!validateWMMA(Inst, Operands)) {
5688 return false;
5689 }
5690
5691 return true;
5692}
5693
5695 const FeatureBitset &FBS,
5696 unsigned VariantID = 0);
5697
5698static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5699 const FeatureBitset &AvailableFeatures,
5700 unsigned VariantID);
5701
5702bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5703 const FeatureBitset &FBS) {
5704 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5705}
5706
5707bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5708 const FeatureBitset &FBS,
5709 ArrayRef<unsigned> Variants) {
5710 for (auto Variant : Variants) {
5711 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5712 return true;
5713 }
5714
5715 return false;
5716}
5717
5718bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5719 SMLoc IDLoc) {
5720 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5721
5722 // Check if requested instruction variant is supported.
5723 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5724 return false;
5725
5726 // This instruction is not supported.
5727 // Clear any other pending errors because they are no longer relevant.
5728 getParser().clearPendingErrors();
5729
5730 // Requested instruction variant is not supported.
5731 // Check if any other variants are supported.
5732 StringRef VariantName = getMatchedVariantName();
5733 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5734 return Error(IDLoc,
5735 Twine(VariantName,
5736 " variant of this instruction is not supported"));
5737 }
5738
5739 // Check if this instruction may be used with a different wavesize.
5740 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5741 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5742 // FIXME: Use getAvailableFeatures, and do not manually recompute
5743 FeatureBitset FeaturesWS32 = getFeatureBits();
5744 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5745 .flip(AMDGPU::FeatureWavefrontSize32);
5746 FeatureBitset AvailableFeaturesWS32 =
5747 ComputeAvailableFeatures(FeaturesWS32);
5748
5749 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5750 return Error(IDLoc, "instruction requires wavesize=32");
5751 }
5752
5753 // Finally check if this instruction is supported on any other GPU.
5754 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5755 return Error(IDLoc, "instruction not supported on this GPU");
5756 }
5757
5758 // Instruction not supported on any GPU. Probably a typo.
5759 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5760 return Error(IDLoc, "invalid instruction" + Suggestion);
5761}
5762
5763static bool isInvalidVOPDY(const OperandVector &Operands,
5764 uint64_t InvalidOprIdx) {
5765 assert(InvalidOprIdx < Operands.size());
5766 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5767 if (Op.isToken() && InvalidOprIdx > 1) {
5768 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5769 return PrevOp.isToken() && PrevOp.getToken() == "::";
5770 }
5771 return false;
5772}
5773
5774bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5775 OperandVector &Operands,
5776 MCStreamer &Out,
5777 uint64_t &ErrorInfo,
5778 bool MatchingInlineAsm) {
5779 MCInst Inst;
5780 Inst.setLoc(IDLoc);
5781 unsigned Result = Match_Success;
5782 for (auto Variant : getMatchedVariants()) {
5783 uint64_t EI;
5784 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5785 Variant);
5786 // We order match statuses from least to most specific. We use most specific
5787 // status as resulting
5788 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5789 if (R == Match_Success || R == Match_MissingFeature ||
5790 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5791 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5792 Result != Match_MissingFeature)) {
5793 Result = R;
5794 ErrorInfo = EI;
5795 }
5796 if (R == Match_Success)
5797 break;
5798 }
5799
5800 if (Result == Match_Success) {
5801 if (!validateInstruction(Inst, IDLoc, Operands)) {
5802 return true;
5803 }
5804 Out.emitInstruction(Inst, getSTI());
5805 return false;
5806 }
5807
5808 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5809 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5810 return true;
5811 }
5812
5813 switch (Result) {
5814 default: break;
5815 case Match_MissingFeature:
5816 // It has been verified that the specified instruction
5817 // mnemonic is valid. A match was found but it requires
5818 // features which are not supported on this GPU.
5819 return Error(IDLoc, "operands are not valid for this GPU or mode");
5820
5821 case Match_InvalidOperand: {
5822 SMLoc ErrorLoc = IDLoc;
5823 if (ErrorInfo != ~0ULL) {
5824 if (ErrorInfo >= Operands.size()) {
5825 return Error(IDLoc, "too few operands for instruction");
5826 }
5827 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5828 if (ErrorLoc == SMLoc())
5829 ErrorLoc = IDLoc;
5830
5831 if (isInvalidVOPDY(Operands, ErrorInfo))
5832 return Error(ErrorLoc, "invalid VOPDY instruction");
5833 }
5834 return Error(ErrorLoc, "invalid operand for instruction");
5835 }
5836
5837 case Match_MnemonicFail:
5838 llvm_unreachable("Invalid instructions should have been handled already");
5839 }
5840 llvm_unreachable("Implement any new match types added!");
5841}
5842
5843bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5844 int64_t Tmp = -1;
5845 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5846 return true;
5847 }
5848 if (getParser().parseAbsoluteExpression(Tmp)) {
5849 return true;
5850 }
5851 Ret = static_cast<uint32_t>(Tmp);
5852 return false;
5853}
5854
5855bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5856 if (!getSTI().getTargetTriple().isAMDGCN())
5857 return TokError("directive only supported for amdgcn architecture");
5858
5859 std::string TargetIDDirective;
5860 SMLoc TargetStart = getTok().getLoc();
5861 if (getParser().parseEscapedString(TargetIDDirective))
5862 return true;
5863
5864 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5865 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5866 return getParser().Error(TargetRange.Start,
5867 (Twine(".amdgcn_target directive's target id ") +
5868 Twine(TargetIDDirective) +
5869 Twine(" does not match the specified target id ") +
5870 Twine(getTargetStreamer().getTargetID()->toString())).str());
5871
5872 return false;
5873}
5874
5875bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5876 return Error(Range.Start, "value out of range", Range);
5877}
5878
5879bool AMDGPUAsmParser::calculateGPRBlocks(
5880 const FeatureBitset &Features, const MCExpr *VCCUsed,
5881 const MCExpr *FlatScrUsed, bool XNACKUsed,
5882 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5883 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5884 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5885 // TODO(scott.linder): These calculations are duplicated from
5886 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5887 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5888 MCContext &Ctx = getContext();
5889
5890 const MCExpr *NumSGPRs = NextFreeSGPR;
5891 int64_t EvaluatedSGPRs;
5892
5893 if (Version.Major >= 10)
5895 else {
5896 unsigned MaxAddressableNumSGPRs =
5898
5899 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5900 !Features.test(FeatureSGPRInitBug) &&
5901 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5902 return OutOfRangeError(SGPRRange);
5903
5904 const MCExpr *ExtraSGPRs =
5905 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5906 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5907
5908 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5909 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5910 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5911 return OutOfRangeError(SGPRRange);
5912
5913 if (Features.test(FeatureSGPRInitBug))
5914 NumSGPRs =
5916 }
5917
5918 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5919 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5920 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5921 unsigned Granule) -> const MCExpr * {
5922 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5923 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5924 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5925 const MCExpr *AlignToGPR =
5926 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5927 const MCExpr *DivGPR =
5928 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5929 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5930 return SubGPR;
5931 };
5932
5933 VGPRBlocks = GetNumGPRBlocks(
5934 NextFreeVGPR,
5935 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5936 SGPRBlocks =
5937 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5938
5939 return false;
5940}
5941
5942bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5943 if (!getSTI().getTargetTriple().isAMDGCN())
5944 return TokError("directive only supported for amdgcn architecture");
5945
5946 if (!isHsaAbi(getSTI()))
5947 return TokError("directive only supported for amdhsa OS");
5948
5949 StringRef KernelName;
5950 if (getParser().parseIdentifier(KernelName))
5951 return true;
5952
5953 AMDGPU::MCKernelDescriptor KD =
5955 &getSTI(), getContext());
5956
5957 StringSet<> Seen;
5958
5959 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5960
5961 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5962 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5963
5964 SMRange VGPRRange;
5965 const MCExpr *NextFreeVGPR = ZeroExpr;
5966 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5967 const MCExpr *NamedBarCnt = ZeroExpr;
5968 uint64_t SharedVGPRCount = 0;
5969 uint64_t PreloadLength = 0;
5970 uint64_t PreloadOffset = 0;
5971 SMRange SGPRRange;
5972 const MCExpr *NextFreeSGPR = ZeroExpr;
5973
5974 // Count the number of user SGPRs implied from the enabled feature bits.
5975 unsigned ImpliedUserSGPRCount = 0;
5976
5977 // Track if the asm explicitly contains the directive for the user SGPR
5978 // count.
5979 std::optional<unsigned> ExplicitUserSGPRCount;
5980 const MCExpr *ReserveVCC = OneExpr;
5981 const MCExpr *ReserveFlatScr = OneExpr;
5982 std::optional<bool> EnableWavefrontSize32;
5983
5984 while (true) {
5985 while (trySkipToken(AsmToken::EndOfStatement));
5986
5987 StringRef ID;
5988 SMRange IDRange = getTok().getLocRange();
5989 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5990 return true;
5991
5992 if (ID == ".end_amdhsa_kernel")
5993 break;
5994
5995 if (!Seen.insert(ID).second)
5996 return TokError(".amdhsa_ directives cannot be repeated");
5997
5998 SMLoc ValStart = getLoc();
5999 const MCExpr *ExprVal;
6000 if (getParser().parseExpression(ExprVal))
6001 return true;
6002 SMLoc ValEnd = getLoc();
6003 SMRange ValRange = SMRange(ValStart, ValEnd);
6004
6005 int64_t IVal = 0;
6006 uint64_t Val = IVal;
6007 bool EvaluatableExpr;
6008 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6009 if (IVal < 0)
6010 return OutOfRangeError(ValRange);
6011 Val = IVal;
6012 }
6013
6014#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6015 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6016 return OutOfRangeError(RANGE); \
6017 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6018 getContext());
6019
6020// Some fields use the parsed value immediately which requires the expression to
6021// be solvable.
6022#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6023 if (!(RESOLVED)) \
6024 return Error(IDRange.Start, "directive should have resolvable expression", \
6025 IDRange);
6026
6027 if (ID == ".amdhsa_group_segment_fixed_size") {
6029 CHAR_BIT>(Val))
6030 return OutOfRangeError(ValRange);
6031 KD.group_segment_fixed_size = ExprVal;
6032 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6034 CHAR_BIT>(Val))
6035 return OutOfRangeError(ValRange);
6036 KD.private_segment_fixed_size = ExprVal;
6037 } else if (ID == ".amdhsa_kernarg_size") {
6038 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6039 return OutOfRangeError(ValRange);
6040 KD.kernarg_size = ExprVal;
6041 } else if (ID == ".amdhsa_user_sgpr_count") {
6042 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6043 ExplicitUserSGPRCount = Val;
6044 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6045 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6047 return Error(IDRange.Start,
6048 "directive is not supported with architected flat scratch",
6049 IDRange);
6051 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6052 ExprVal, ValRange);
6053 if (Val)
6054 ImpliedUserSGPRCount += 4;
6055 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6056 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6057 if (!hasKernargPreload())
6058 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6059
6060 if (Val > getMaxNumUserSGPRs())
6061 return OutOfRangeError(ValRange);
6062 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6063 ValRange);
6064 if (Val) {
6065 ImpliedUserSGPRCount += Val;
6066 PreloadLength = Val;
6067 }
6068 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6069 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6070 if (!hasKernargPreload())
6071 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6072
6073 if (Val >= 1024)
6074 return OutOfRangeError(ValRange);
6075 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6076 ValRange);
6077 if (Val)
6078 PreloadOffset = Val;
6079 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6080 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6083 ValRange);
6084 if (Val)
6085 ImpliedUserSGPRCount += 2;
6086 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6087 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6089 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6090 ValRange);
6091 if (Val)
6092 ImpliedUserSGPRCount += 2;
6093 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6094 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6096 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6097 ExprVal, ValRange);
6098 if (Val)
6099 ImpliedUserSGPRCount += 2;
6100 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6101 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6103 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6104 ValRange);
6105 if (Val)
6106 ImpliedUserSGPRCount += 2;
6107 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6109 return Error(IDRange.Start,
6110 "directive is not supported with architected flat scratch",
6111 IDRange);
6112 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6114 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6115 ExprVal, ValRange);
6116 if (Val)
6117 ImpliedUserSGPRCount += 2;
6118 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6119 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6121 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6122 ExprVal, ValRange);
6123 if (Val)
6124 ImpliedUserSGPRCount += 1;
6125 } else if (ID == ".amdhsa_wavefront_size32") {
6126 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6127 if (IVersion.Major < 10)
6128 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6129 EnableWavefrontSize32 = Val;
6131 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6132 ValRange);
6133 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6135 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6136 ValRange);
6137 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6139 return Error(IDRange.Start,
6140 "directive is not supported with architected flat scratch",
6141 IDRange);
6143 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6144 ValRange);
6145 } else if (ID == ".amdhsa_enable_private_segment") {
6147 return Error(
6148 IDRange.Start,
6149 "directive is not supported without architected flat scratch",
6150 IDRange);
6152 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6153 ValRange);
6154 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6156 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6157 ValRange);
6158 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6160 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6161 ValRange);
6162 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6164 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6165 ValRange);
6166 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6168 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6169 ValRange);
6170 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6172 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6173 ValRange);
6174 } else if (ID == ".amdhsa_next_free_vgpr") {
6175 VGPRRange = ValRange;
6176 NextFreeVGPR = ExprVal;
6177 } else if (ID == ".amdhsa_next_free_sgpr") {
6178 SGPRRange = ValRange;
6179 NextFreeSGPR = ExprVal;
6180 } else if (ID == ".amdhsa_accum_offset") {
6181 if (!isGFX90A())
6182 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6183 AccumOffset = ExprVal;
6184 } else if (ID == ".amdhsa_named_barrier_count") {
6185 if (!isGFX1250Plus())
6186 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6187 NamedBarCnt = ExprVal;
6188 } else if (ID == ".amdhsa_reserve_vcc") {
6189 if (EvaluatableExpr && !isUInt<1>(Val))
6190 return OutOfRangeError(ValRange);
6191 ReserveVCC = ExprVal;
6192 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6193 if (IVersion.Major < 7)
6194 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6196 return Error(IDRange.Start,
6197 "directive is not supported with architected flat scratch",
6198 IDRange);
6199 if (EvaluatableExpr && !isUInt<1>(Val))
6200 return OutOfRangeError(ValRange);
6201 ReserveFlatScr = ExprVal;
6202 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6203 if (IVersion.Major < 8)
6204 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6205 if (!isUInt<1>(Val))
6206 return OutOfRangeError(ValRange);
6207 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6208 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6209 IDRange);
6210 } else if (ID == ".amdhsa_float_round_mode_32") {
6212 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6213 ValRange);
6214 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6216 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6217 ValRange);
6218 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6220 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6221 ValRange);
6222 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6224 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6225 ValRange);
6226 } else if (ID == ".amdhsa_dx10_clamp") {
6227 if (IVersion.Major >= 12)
6228 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6230 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6231 ValRange);
6232 } else if (ID == ".amdhsa_ieee_mode") {
6233 if (IVersion.Major >= 12)
6234 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6236 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6237 ValRange);
6238 } else if (ID == ".amdhsa_fp16_overflow") {
6239 if (IVersion.Major < 9)
6240 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6242 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6243 ValRange);
6244 } else if (ID == ".amdhsa_tg_split") {
6245 if (!isGFX90A())
6246 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6247 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6248 ExprVal, ValRange);
6249 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6250 if (!supportsWGP(getSTI()))
6251 return Error(IDRange.Start,
6252 "directive unsupported on " + getSTI().getCPU(), IDRange);
6254 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6255 ValRange);
6256 } else if (ID == ".amdhsa_memory_ordered") {
6257 if (IVersion.Major < 10)
6258 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6260 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6261 ValRange);
6262 } else if (ID == ".amdhsa_forward_progress") {
6263 if (IVersion.Major < 10)
6264 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6266 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6267 ValRange);
6268 } else if (ID == ".amdhsa_shared_vgpr_count") {
6269 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6270 if (IVersion.Major < 10 || IVersion.Major >= 12)
6271 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6272 IDRange);
6273 SharedVGPRCount = Val;
6275 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6276 ValRange);
6277 } else if (ID == ".amdhsa_inst_pref_size") {
6278 if (IVersion.Major < 11)
6279 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6280 if (IVersion.Major == 11) {
6282 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6283 ValRange);
6284 } else {
6286 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6287 ValRange);
6288 }
6289 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6292 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6293 ExprVal, ValRange);
6294 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6296 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6297 ExprVal, ValRange);
6298 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6301 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6302 ExprVal, ValRange);
6303 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6305 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6306 ExprVal, ValRange);
6307 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6309 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6310 ExprVal, ValRange);
6311 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6313 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6314 ExprVal, ValRange);
6315 } else if (ID == ".amdhsa_exception_int_div_zero") {
6317 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6318 ExprVal, ValRange);
6319 } else if (ID == ".amdhsa_round_robin_scheduling") {
6320 if (IVersion.Major < 12)
6321 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6323 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6324 ValRange);
6325 } else {
6326 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6327 }
6328
6329#undef PARSE_BITS_ENTRY
6330 }
6331
6332 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6333 return TokError(".amdhsa_next_free_vgpr directive is required");
6334
6335 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6336 return TokError(".amdhsa_next_free_sgpr directive is required");
6337
6338 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6339
6340 // Consider the case where the total number of UserSGPRs with trailing
6341 // allocated preload SGPRs, is greater than the number of explicitly
6342 // referenced SGPRs.
6343 if (PreloadLength) {
6344 MCContext &Ctx = getContext();
6345 NextFreeSGPR = AMDGPUMCExpr::createMax(
6346 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6347 }
6348
6349 const MCExpr *VGPRBlocks;
6350 const MCExpr *SGPRBlocks;
6351 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6352 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6353 EnableWavefrontSize32, NextFreeVGPR,
6354 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6355 SGPRBlocks))
6356 return true;
6357
6358 int64_t EvaluatedVGPRBlocks;
6359 bool VGPRBlocksEvaluatable =
6360 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6361 if (VGPRBlocksEvaluatable &&
6363 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6364 return OutOfRangeError(VGPRRange);
6365 }
6367 KD.compute_pgm_rsrc1, VGPRBlocks,
6368 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6369 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6370
6371 int64_t EvaluatedSGPRBlocks;
6372 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6374 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6375 return OutOfRangeError(SGPRRange);
6377 KD.compute_pgm_rsrc1, SGPRBlocks,
6378 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6379 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6380
6381 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6382 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6383 "enabled user SGPRs");
6384
6385 if (isGFX1250Plus()) {
6387 return TokError("too many user SGPRs enabled");
6390 MCConstantExpr::create(UserSGPRCount, getContext()),
6391 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6392 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6393 } else {
6395 UserSGPRCount))
6396 return TokError("too many user SGPRs enabled");
6399 MCConstantExpr::create(UserSGPRCount, getContext()),
6400 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6401 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6402 }
6403
6404 int64_t IVal = 0;
6405 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6406 return TokError("Kernarg size should be resolvable");
6407 uint64_t kernarg_size = IVal;
6408 if (PreloadLength && kernarg_size &&
6409 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6410 return TokError("Kernarg preload length + offset is larger than the "
6411 "kernarg segment size");
6412
6413 if (isGFX90A()) {
6414 if (!Seen.contains(".amdhsa_accum_offset"))
6415 return TokError(".amdhsa_accum_offset directive is required");
6416 int64_t EvaluatedAccum;
6417 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6418 uint64_t UEvaluatedAccum = EvaluatedAccum;
6419 if (AccumEvaluatable &&
6420 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6421 return TokError("accum_offset should be in range [4..256] in "
6422 "increments of 4");
6423
6424 int64_t EvaluatedNumVGPR;
6425 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6426 AccumEvaluatable &&
6427 UEvaluatedAccum >
6428 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6429 return TokError("accum_offset exceeds total VGPR allocation");
6430 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6432 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6435 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6436 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6437 getContext());
6438 }
6439
6440 if (isGFX1250Plus())
6442 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6443 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6444 getContext());
6445
6446 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6447 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6448 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6449 return TokError("shared_vgpr_count directive not valid on "
6450 "wavefront size 32");
6451 }
6452
6453 if (VGPRBlocksEvaluatable &&
6454 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6455 63)) {
6456 return TokError("shared_vgpr_count*2 + "
6457 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6458 "exceed 63\n");
6459 }
6460 }
6461
6462 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6463 NextFreeVGPR, NextFreeSGPR,
6464 ReserveVCC, ReserveFlatScr);
6465 return false;
6466}
6467
6468bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6469 uint32_t Version;
6470 if (ParseAsAbsoluteExpression(Version))
6471 return true;
6472
6473 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6474 return false;
6475}
6476
6477bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6478 AMDGPUMCKernelCodeT &C) {
6479 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6480 // assembly for backwards compatibility.
6481 if (ID == "max_scratch_backing_memory_byte_size") {
6482 Parser.eatToEndOfStatement();
6483 return false;
6484 }
6485
6486 SmallString<40> ErrStr;
6487 raw_svector_ostream Err(ErrStr);
6488 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6489 return TokError(Err.str());
6490 }
6491 Lex();
6492
6493 if (ID == "enable_wavefront_size32") {
6494 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6495 if (!isGFX10Plus())
6496 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6497 if (!isWave32())
6498 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6499 } else {
6500 if (!isWave64())
6501 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6502 }
6503 }
6504
6505 if (ID == "wavefront_size") {
6506 if (C.wavefront_size == 5) {
6507 if (!isGFX10Plus())
6508 return TokError("wavefront_size=5 is only allowed on GFX10+");
6509 if (!isWave32())
6510 return TokError("wavefront_size=5 requires +WavefrontSize32");
6511 } else if (C.wavefront_size == 6) {
6512 if (!isWave64())
6513 return TokError("wavefront_size=6 requires +WavefrontSize64");
6514 }
6515 }
6516
6517 return false;
6518}
6519
6520bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6521 AMDGPUMCKernelCodeT KernelCode;
6522 KernelCode.initDefault(&getSTI(), getContext());
6523
6524 while (true) {
6525 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6526 // will set the current token to EndOfStatement.
6527 while(trySkipToken(AsmToken::EndOfStatement));
6528
6529 StringRef ID;
6530 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6531 return true;
6532
6533 if (ID == ".end_amd_kernel_code_t")
6534 break;
6535
6536 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6537 return true;
6538 }
6539
6540 KernelCode.validate(&getSTI(), getContext());
6541 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6542
6543 return false;
6544}
6545
6546bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6547 StringRef KernelName;
6548 if (!parseId(KernelName, "expected symbol name"))
6549 return true;
6550
6551 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6553
6554 KernelScope.initialize(getContext());
6555 return false;
6556}
6557
6558bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6559 if (!getSTI().getTargetTriple().isAMDGCN()) {
6560 return Error(getLoc(),
6561 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6562 "architectures");
6563 }
6564
6565 auto TargetIDDirective = getLexer().getTok().getStringContents();
6566 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6567 return Error(getParser().getTok().getLoc(), "target id must match options");
6568
6569 getTargetStreamer().EmitISAVersion();
6570 Lex();
6571
6572 return false;
6573}
6574
6575bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6576 assert(isHsaAbi(getSTI()));
6577
6578 std::string HSAMetadataString;
6579 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6580 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6581 return true;
6582
6583 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6584 return Error(getLoc(), "invalid HSA metadata");
6585
6586 return false;
6587}
6588
6589/// Common code to parse out a block of text (typically YAML) between start and
6590/// end directives.
6591bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6592 const char *AssemblerDirectiveEnd,
6593 std::string &CollectString) {
6594
6595 raw_string_ostream CollectStream(CollectString);
6596
6597 getLexer().setSkipSpace(false);
6598
6599 bool FoundEnd = false;
6600 while (!isToken(AsmToken::Eof)) {
6601 while (isToken(AsmToken::Space)) {
6602 CollectStream << getTokenStr();
6603 Lex();
6604 }
6605
6606 if (trySkipId(AssemblerDirectiveEnd)) {
6607 FoundEnd = true;
6608 break;
6609 }
6610
6611 CollectStream << Parser.parseStringToEndOfStatement()
6612 << getContext().getAsmInfo()->getSeparatorString();
6613
6614 Parser.eatToEndOfStatement();
6615 }
6616
6617 getLexer().setSkipSpace(true);
6618
6619 if (isToken(AsmToken::Eof) && !FoundEnd) {
6620 return TokError(Twine("expected directive ") +
6621 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6622 }
6623
6624 return false;
6625}
6626
6627/// Parse the assembler directive for new MsgPack-format PAL metadata.
6628bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6629 std::string String;
6630 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6632 return true;
6633
6634 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6635 if (!PALMetadata->setFromString(String))
6636 return Error(getLoc(), "invalid PAL metadata");
6637 return false;
6638}
6639
6640/// Parse the assembler directive for old linear-format PAL metadata.
6641bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6642 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6643 return Error(getLoc(),
6644 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6645 "not available on non-amdpal OSes")).str());
6646 }
6647
6648 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6649 PALMetadata->setLegacy();
6650 for (;;) {
6651 uint32_t Key, Value;
6652 if (ParseAsAbsoluteExpression(Key)) {
6653 return TokError(Twine("invalid value in ") +
6655 }
6656 if (!trySkipToken(AsmToken::Comma)) {
6657 return TokError(Twine("expected an even number of values in ") +
6659 }
6660 if (ParseAsAbsoluteExpression(Value)) {
6661 return TokError(Twine("invalid value in ") +
6663 }
6664 PALMetadata->setRegister(Key, Value);
6665 if (!trySkipToken(AsmToken::Comma))
6666 break;
6667 }
6668 return false;
6669}
6670
6671/// ParseDirectiveAMDGPULDS
6672/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6673bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6674 if (getParser().checkForValidSection())
6675 return true;
6676
6677 StringRef Name;
6678 SMLoc NameLoc = getLoc();
6679 if (getParser().parseIdentifier(Name))
6680 return TokError("expected identifier in directive");
6681
6682 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6683 if (getParser().parseComma())
6684 return true;
6685
6686 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6687
6688 int64_t Size;
6689 SMLoc SizeLoc = getLoc();
6690 if (getParser().parseAbsoluteExpression(Size))
6691 return true;
6692 if (Size < 0)
6693 return Error(SizeLoc, "size must be non-negative");
6694 if (Size > LocalMemorySize)
6695 return Error(SizeLoc, "size is too large");
6696
6697 int64_t Alignment = 4;
6698 if (trySkipToken(AsmToken::Comma)) {
6699 SMLoc AlignLoc = getLoc();
6700 if (getParser().parseAbsoluteExpression(Alignment))
6701 return true;
6702 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6703 return Error(AlignLoc, "alignment must be a power of two");
6704
6705 // Alignment larger than the size of LDS is possible in theory, as long
6706 // as the linker manages to place to symbol at address 0, but we do want
6707 // to make sure the alignment fits nicely into a 32-bit integer.
6708 if (Alignment >= 1u << 31)
6709 return Error(AlignLoc, "alignment is too large");
6710 }
6711
6712 if (parseEOL())
6713 return true;
6714
6715 Symbol->redefineIfPossible();
6716 if (!Symbol->isUndefined())
6717 return Error(NameLoc, "invalid symbol redefinition");
6718
6719 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6720 return false;
6721}
6722
6723bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6724 StringRef IDVal = DirectiveID.getString();
6725
6726 if (isHsaAbi(getSTI())) {
6727 if (IDVal == ".amdhsa_kernel")
6728 return ParseDirectiveAMDHSAKernel();
6729
6730 if (IDVal == ".amdhsa_code_object_version")
6731 return ParseDirectiveAMDHSACodeObjectVersion();
6732
6733 // TODO: Restructure/combine with PAL metadata directive.
6735 return ParseDirectiveHSAMetadata();
6736 } else {
6737 if (IDVal == ".amd_kernel_code_t")
6738 return ParseDirectiveAMDKernelCodeT();
6739
6740 if (IDVal == ".amdgpu_hsa_kernel")
6741 return ParseDirectiveAMDGPUHsaKernel();
6742
6743 if (IDVal == ".amd_amdgpu_isa")
6744 return ParseDirectiveISAVersion();
6745
6747 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6748 Twine(" directive is "
6749 "not available on non-amdhsa OSes"))
6750 .str());
6751 }
6752 }
6753
6754 if (IDVal == ".amdgcn_target")
6755 return ParseDirectiveAMDGCNTarget();
6756
6757 if (IDVal == ".amdgpu_lds")
6758 return ParseDirectiveAMDGPULDS();
6759
6760 if (IDVal == PALMD::AssemblerDirectiveBegin)
6761 return ParseDirectivePALMetadataBegin();
6762
6763 if (IDVal == PALMD::AssemblerDirective)
6764 return ParseDirectivePALMetadata();
6765
6766 return true;
6767}
6768
6769bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6770 MCRegister Reg) {
6771 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6772 return isGFX9Plus();
6773
6774 // GFX10+ has 2 more SGPRs 104 and 105.
6775 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6776 return hasSGPR104_SGPR105();
6777
6778 switch (Reg.id()) {
6779 case SRC_SHARED_BASE_LO:
6780 case SRC_SHARED_BASE:
6781 case SRC_SHARED_LIMIT_LO:
6782 case SRC_SHARED_LIMIT:
6783 case SRC_PRIVATE_BASE_LO:
6784 case SRC_PRIVATE_BASE:
6785 case SRC_PRIVATE_LIMIT_LO:
6786 case SRC_PRIVATE_LIMIT:
6787 return isGFX9Plus();
6788 case SRC_FLAT_SCRATCH_BASE_LO:
6789 case SRC_FLAT_SCRATCH_BASE_HI:
6790 return hasGloballyAddressableScratch();
6791 case SRC_POPS_EXITING_WAVE_ID:
6792 return isGFX9Plus() && !isGFX11Plus();
6793 case TBA:
6794 case TBA_LO:
6795 case TBA_HI:
6796 case TMA:
6797 case TMA_LO:
6798 case TMA_HI:
6799 return !isGFX9Plus();
6800 case XNACK_MASK:
6801 case XNACK_MASK_LO:
6802 case XNACK_MASK_HI:
6803 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6804 case SGPR_NULL:
6805 return isGFX10Plus();
6806 case SRC_EXECZ:
6807 case SRC_VCCZ:
6808 return !isGFX11Plus();
6809 default:
6810 break;
6811 }
6812
6813 if (isCI())
6814 return true;
6815
6816 if (isSI() || isGFX10Plus()) {
6817 // No flat_scr on SI.
6818 // On GFX10Plus flat scratch is not a valid register operand and can only be
6819 // accessed with s_setreg/s_getreg.
6820 switch (Reg.id()) {
6821 case FLAT_SCR:
6822 case FLAT_SCR_LO:
6823 case FLAT_SCR_HI:
6824 return false;
6825 default:
6826 return true;
6827 }
6828 }
6829
6830 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6831 // SI/CI have.
6832 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6833 return hasSGPR102_SGPR103();
6834
6835 return true;
6836}
6837
6838ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6839 StringRef Mnemonic,
6840 OperandMode Mode) {
6841 ParseStatus Res = parseVOPD(Operands);
6842 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6843 return Res;
6844
6845 // Try to parse with a custom parser
6846 Res = MatchOperandParserImpl(Operands, Mnemonic);
6847
6848 // If we successfully parsed the operand or if there as an error parsing,
6849 // we are done.
6850 //
6851 // If we are parsing after we reach EndOfStatement then this means we
6852 // are appending default values to the Operands list. This is only done
6853 // by custom parser, so we shouldn't continue on to the generic parsing.
6854 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6855 return Res;
6856
6857 SMLoc RBraceLoc;
6858 SMLoc LBraceLoc = getLoc();
6859 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6860 unsigned Prefix = Operands.size();
6861
6862 for (;;) {
6863 auto Loc = getLoc();
6864 Res = parseReg(Operands);
6865 if (Res.isNoMatch())
6866 Error(Loc, "expected a register");
6867 if (!Res.isSuccess())
6868 return ParseStatus::Failure;
6869
6870 RBraceLoc = getLoc();
6871 if (trySkipToken(AsmToken::RBrac))
6872 break;
6873
6874 if (!skipToken(AsmToken::Comma,
6875 "expected a comma or a closing square bracket"))
6876 return ParseStatus::Failure;
6877 }
6878
6879 if (Operands.size() - Prefix > 1) {
6880 Operands.insert(Operands.begin() + Prefix,
6881 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6882 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6883 }
6884
6885 return ParseStatus::Success;
6886 }
6887
6888 return parseRegOrImm(Operands);
6889}
6890
6891StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6892 // Clear any forced encodings from the previous instruction.
6893 setForcedEncodingSize(0);
6894 setForcedDPP(false);
6895 setForcedSDWA(false);
6896
6897 if (Name.consume_back("_e64_dpp")) {
6898 setForcedDPP(true);
6899 setForcedEncodingSize(64);
6900 return Name;
6901 }
6902 if (Name.consume_back("_e64")) {
6903 setForcedEncodingSize(64);
6904 return Name;
6905 }
6906 if (Name.consume_back("_e32")) {
6907 setForcedEncodingSize(32);
6908 return Name;
6909 }
6910 if (Name.consume_back("_dpp")) {
6911 setForcedDPP(true);
6912 return Name;
6913 }
6914 if (Name.consume_back("_sdwa")) {
6915 setForcedSDWA(true);
6916 return Name;
6917 }
6918 return Name;
6919}
6920
6921static void applyMnemonicAliases(StringRef &Mnemonic,
6922 const FeatureBitset &Features,
6923 unsigned VariantID);
6924
6925bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6926 StringRef Name, SMLoc NameLoc,
6927 OperandVector &Operands) {
6928 // Add the instruction mnemonic
6929 Name = parseMnemonicSuffix(Name);
6930
6931 // If the target architecture uses MnemonicAlias, call it here to parse
6932 // operands correctly.
6933 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6934
6935 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6936
6937 bool IsMIMG = Name.starts_with("image_");
6938
6939 while (!trySkipToken(AsmToken::EndOfStatement)) {
6940 OperandMode Mode = OperandMode_Default;
6941 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6942 Mode = OperandMode_NSA;
6943 ParseStatus Res = parseOperand(Operands, Name, Mode);
6944
6945 if (!Res.isSuccess()) {
6946 checkUnsupportedInstruction(Name, NameLoc);
6947 if (!Parser.hasPendingError()) {
6948 // FIXME: use real operand location rather than the current location.
6949 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6950 : "not a valid operand.";
6951 Error(getLoc(), Msg);
6952 }
6953 while (!trySkipToken(AsmToken::EndOfStatement)) {
6954 lex();
6955 }
6956 return true;
6957 }
6958
6959 // Eat the comma or space if there is one.
6960 trySkipToken(AsmToken::Comma);
6961 }
6962
6963 return false;
6964}
6965
6966//===----------------------------------------------------------------------===//
6967// Utility functions
6968//===----------------------------------------------------------------------===//
6969
6970ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6971 OperandVector &Operands) {
6972 SMLoc S = getLoc();
6973 if (!trySkipId(Name))
6974 return ParseStatus::NoMatch;
6975
6976 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6977 return ParseStatus::Success;
6978}
6979
6980ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6981 int64_t &IntVal) {
6982
6983 if (!trySkipId(Prefix, AsmToken::Colon))
6984 return ParseStatus::NoMatch;
6985
6987}
6988
6989ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6990 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6991 std::function<bool(int64_t &)> ConvertResult) {
6992 SMLoc S = getLoc();
6993 int64_t Value = 0;
6994
6995 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6996 if (!Res.isSuccess())
6997 return Res;
6998
6999 if (ConvertResult && !ConvertResult(Value)) {
7000 Error(S, "invalid " + StringRef(Prefix) + " value.");
7001 }
7002
7003 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7004 return ParseStatus::Success;
7005}
7006
7007ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7008 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7009 bool (*ConvertResult)(int64_t &)) {
7010 SMLoc S = getLoc();
7011 if (!trySkipId(Prefix, AsmToken::Colon))
7012 return ParseStatus::NoMatch;
7013
7014 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7015 return ParseStatus::Failure;
7016
7017 unsigned Val = 0;
7018 const unsigned MaxSize = 4;
7019
7020 // FIXME: How to verify the number of elements matches the number of src
7021 // operands?
7022 for (int I = 0; ; ++I) {
7023 int64_t Op;
7024 SMLoc Loc = getLoc();
7025 if (!parseExpr(Op))
7026 return ParseStatus::Failure;
7027
7028 if (Op != 0 && Op != 1)
7029 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7030
7031 Val |= (Op << I);
7032
7033 if (trySkipToken(AsmToken::RBrac))
7034 break;
7035
7036 if (I + 1 == MaxSize)
7037 return Error(getLoc(), "expected a closing square bracket");
7038
7039 if (!skipToken(AsmToken::Comma, "expected a comma"))
7040 return ParseStatus::Failure;
7041 }
7042
7043 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7044 return ParseStatus::Success;
7045}
7046
7047ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7048 OperandVector &Operands,
7049 AMDGPUOperand::ImmTy ImmTy) {
7050 int64_t Bit;
7051 SMLoc S = getLoc();
7052
7053 if (trySkipId(Name)) {
7054 Bit = 1;
7055 } else if (trySkipId("no", Name)) {
7056 Bit = 0;
7057 } else {
7058 return ParseStatus::NoMatch;
7059 }
7060
7061 if (Name == "r128" && !hasMIMG_R128())
7062 return Error(S, "r128 modifier is not supported on this GPU");
7063 if (Name == "a16" && !hasA16())
7064 return Error(S, "a16 modifier is not supported on this GPU");
7065
7066 if (Bit == 0 && Name == "gds") {
7067 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7068 if (Mnemo.starts_with("ds_gws"))
7069 return Error(S, "nogds is not allowed");
7070 }
7071
7072 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7073 ImmTy = AMDGPUOperand::ImmTyR128A16;
7074
7075 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7076 return ParseStatus::Success;
7077}
7078
7079unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7080 bool &Disabling) const {
7081 Disabling = Id.consume_front("no");
7082
7083 if (isGFX940() && !Mnemo.starts_with("s_")) {
7084 return StringSwitch<unsigned>(Id)
7085 .Case("nt", AMDGPU::CPol::NT)
7086 .Case("sc0", AMDGPU::CPol::SC0)
7087 .Case("sc1", AMDGPU::CPol::SC1)
7088 .Default(0);
7089 }
7090
7091 return StringSwitch<unsigned>(Id)
7092 .Case("dlc", AMDGPU::CPol::DLC)
7093 .Case("glc", AMDGPU::CPol::GLC)
7094 .Case("scc", AMDGPU::CPol::SCC)
7095 .Case("slc", AMDGPU::CPol::SLC)
7096 .Default(0);
7097}
7098
7099ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7100 if (isGFX12Plus()) {
7101 SMLoc StringLoc = getLoc();
7102
7103 int64_t CPolVal = 0;
7104 ParseStatus ResTH = ParseStatus::NoMatch;
7105 ParseStatus ResScope = ParseStatus::NoMatch;
7106 ParseStatus ResNV = ParseStatus::NoMatch;
7107 ParseStatus ResScal = ParseStatus::NoMatch;
7108
7109 for (;;) {
7110 if (ResTH.isNoMatch()) {
7111 int64_t TH;
7112 ResTH = parseTH(Operands, TH);
7113 if (ResTH.isFailure())
7114 return ResTH;
7115 if (ResTH.isSuccess()) {
7116 CPolVal |= TH;
7117 continue;
7118 }
7119 }
7120
7121 if (ResScope.isNoMatch()) {
7122 int64_t Scope;
7123 ResScope = parseScope(Operands, Scope);
7124 if (ResScope.isFailure())
7125 return ResScope;
7126 if (ResScope.isSuccess()) {
7127 CPolVal |= Scope;
7128 continue;
7129 }
7130 }
7131
7132 // NV bit exists on GFX12+, but does something starting from GFX1250.
7133 // Allow parsing on all GFX12 and fail on validation for better
7134 // diagnostics.
7135 if (ResNV.isNoMatch()) {
7136 if (trySkipId("nv")) {
7137 ResNV = ParseStatus::Success;
7138 CPolVal |= CPol::NV;
7139 continue;
7140 } else if (trySkipId("no", "nv")) {
7141 ResNV = ParseStatus::Success;
7142 continue;
7143 }
7144 }
7145
7146 if (ResScal.isNoMatch()) {
7147 if (trySkipId("scale_offset")) {
7148 ResScal = ParseStatus::Success;
7149 CPolVal |= CPol::SCAL;
7150 continue;
7151 } else if (trySkipId("no", "scale_offset")) {
7152 ResScal = ParseStatus::Success;
7153 continue;
7154 }
7155 }
7156
7157 break;
7158 }
7159
7160 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7161 ResScal.isNoMatch())
7162 return ParseStatus::NoMatch;
7163
7164 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7165 AMDGPUOperand::ImmTyCPol));
7166 return ParseStatus::Success;
7167 }
7168
7169 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7170 SMLoc OpLoc = getLoc();
7171 unsigned Enabled = 0, Seen = 0;
7172 for (;;) {
7173 SMLoc S = getLoc();
7174 bool Disabling;
7175 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7176 if (!CPol)
7177 break;
7178
7179 lex();
7180
7181 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7182 return Error(S, "dlc modifier is not supported on this GPU");
7183
7184 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7185 return Error(S, "scc modifier is not supported on this GPU");
7186
7187 if (Seen & CPol)
7188 return Error(S, "duplicate cache policy modifier");
7189
7190 if (!Disabling)
7191 Enabled |= CPol;
7192
7193 Seen |= CPol;
7194 }
7195
7196 if (!Seen)
7197 return ParseStatus::NoMatch;
7198
7199 Operands.push_back(
7200 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7201 return ParseStatus::Success;
7202}
7203
7204ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7205 int64_t &Scope) {
7206 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7208
7209 ParseStatus Res = parseStringOrIntWithPrefix(
7210 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7211 Scope);
7212
7213 if (Res.isSuccess())
7214 Scope = Scopes[Scope];
7215
7216 return Res;
7217}
7218
7219ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7220 TH = AMDGPU::CPol::TH_RT; // default
7221
7222 StringRef Value;
7223 SMLoc StringLoc;
7224 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7225 if (!Res.isSuccess())
7226 return Res;
7227
7228 if (Value == "TH_DEFAULT")
7230 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7231 Value == "TH_LOAD_NT_WB") {
7232 return Error(StringLoc, "invalid th value");
7233 } else if (Value.consume_front("TH_ATOMIC_")) {
7235 } else if (Value.consume_front("TH_LOAD_")) {
7237 } else if (Value.consume_front("TH_STORE_")) {
7239 } else {
7240 return Error(StringLoc, "invalid th value");
7241 }
7242
7243 if (Value == "BYPASS")
7245
7246 if (TH != 0) {
7248 TH |= StringSwitch<int64_t>(Value)
7249 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7250 .Case("RT", AMDGPU::CPol::TH_RT)
7251 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7252 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7253 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7255 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7256 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7258 .Default(0xffffffff);
7259 else
7260 TH |= StringSwitch<int64_t>(Value)
7261 .Case("RT", AMDGPU::CPol::TH_RT)
7262 .Case("NT", AMDGPU::CPol::TH_NT)
7263 .Case("HT", AMDGPU::CPol::TH_HT)
7264 .Case("LU", AMDGPU::CPol::TH_LU)
7265 .Case("WB", AMDGPU::CPol::TH_WB)
7266 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7267 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7268 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7269 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7270 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7271 .Default(0xffffffff);
7272 }
7273
7274 if (TH == 0xffffffff)
7275 return Error(StringLoc, "invalid th value");
7276
7277 return ParseStatus::Success;
7278}
7279
7280static void
7282 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7283 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7284 std::optional<unsigned> InsertAt = std::nullopt) {
7285 auto i = OptionalIdx.find(ImmT);
7286 if (i != OptionalIdx.end()) {
7287 unsigned Idx = i->second;
7288 const AMDGPUOperand &Op =
7289 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7290 if (InsertAt)
7291 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7292 else
7293 Op.addImmOperands(Inst, 1);
7294 } else {
7295 if (InsertAt.has_value())
7296 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7297 else
7299 }
7300}
7301
7302ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7303 StringRef &Value,
7304 SMLoc &StringLoc) {
7305 if (!trySkipId(Prefix, AsmToken::Colon))
7306 return ParseStatus::NoMatch;
7307
7308 StringLoc = getLoc();
7309 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7311}
7312
7313ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7314 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7315 int64_t &IntVal) {
7316 if (!trySkipId(Name, AsmToken::Colon))
7317 return ParseStatus::NoMatch;
7318
7319 SMLoc StringLoc = getLoc();
7320
7321 StringRef Value;
7322 if (isToken(AsmToken::Identifier)) {
7323 Value = getTokenStr();
7324 lex();
7325
7326 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7327 if (Value == Ids[IntVal])
7328 break;
7329 } else if (!parseExpr(IntVal))
7330 return ParseStatus::Failure;
7331
7332 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7333 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7334
7335 return ParseStatus::Success;
7336}
7337
7338ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7339 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7340 AMDGPUOperand::ImmTy Type) {
7341 SMLoc S = getLoc();
7342 int64_t IntVal;
7343
7344 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7345 if (Res.isSuccess())
7346 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7347
7348 return Res;
7349}
7350
7351//===----------------------------------------------------------------------===//
7352// MTBUF format
7353//===----------------------------------------------------------------------===//
7354
7355bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7356 int64_t MaxVal,
7357 int64_t &Fmt) {
7358 int64_t Val;
7359 SMLoc Loc = getLoc();
7360
7361 auto Res = parseIntWithPrefix(Pref, Val);
7362 if (Res.isFailure())
7363 return false;
7364 if (Res.isNoMatch())
7365 return true;
7366
7367 if (Val < 0 || Val > MaxVal) {
7368 Error(Loc, Twine("out of range ", StringRef(Pref)));
7369 return false;
7370 }
7371
7372 Fmt = Val;
7373 return true;
7374}
7375
7376ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7377 AMDGPUOperand::ImmTy ImmTy) {
7378 const char *Pref = "index_key";
7379 int64_t ImmVal = 0;
7380 SMLoc Loc = getLoc();
7381 auto Res = parseIntWithPrefix(Pref, ImmVal);
7382 if (!Res.isSuccess())
7383 return Res;
7384
7385 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7386 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7387 (ImmVal < 0 || ImmVal > 1))
7388 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7389
7390 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7391 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7392
7393 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7394 return ParseStatus::Success;
7395}
7396
7397ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7398 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7399}
7400
7401ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7402 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7403}
7404
7405ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7406 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7407}
7408
7409ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7410 StringRef Name,
7411 AMDGPUOperand::ImmTy Type) {
7412 return parseStringOrIntWithPrefix(Operands, Name,
7413 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7414 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7415 "MATRIX_FMT_FP4"},
7416 Type);
7417}
7418
7419ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7420 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7421 AMDGPUOperand::ImmTyMatrixAFMT);
7422}
7423
7424ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7425 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7426 AMDGPUOperand::ImmTyMatrixBFMT);
7427}
7428
7429ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7430 StringRef Name,
7431 AMDGPUOperand::ImmTy Type) {
7432 return parseStringOrIntWithPrefix(
7433 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7434}
7435
7436ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7437 return tryParseMatrixScale(Operands, "matrix_a_scale",
7438 AMDGPUOperand::ImmTyMatrixAScale);
7439}
7440
7441ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7442 return tryParseMatrixScale(Operands, "matrix_b_scale",
7443 AMDGPUOperand::ImmTyMatrixBScale);
7444}
7445
7446ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7447 StringRef Name,
7448 AMDGPUOperand::ImmTy Type) {
7449 return parseStringOrIntWithPrefix(
7450 Operands, Name,
7451 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7452 Type);
7453}
7454
7455ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7456 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7457 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7458}
7459
7460ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7461 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7462 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7463}
7464
7465// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7466// values to live in a joint format operand in the MCInst encoding.
7467ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7468 using namespace llvm::AMDGPU::MTBUFFormat;
7469
7470 int64_t Dfmt = DFMT_UNDEF;
7471 int64_t Nfmt = NFMT_UNDEF;
7472
7473 // dfmt and nfmt can appear in either order, and each is optional.
7474 for (int I = 0; I < 2; ++I) {
7475 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7476 return ParseStatus::Failure;
7477
7478 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7479 return ParseStatus::Failure;
7480
7481 // Skip optional comma between dfmt/nfmt
7482 // but guard against 2 commas following each other.
7483 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7484 !peekToken().is(AsmToken::Comma)) {
7485 trySkipToken(AsmToken::Comma);
7486 }
7487 }
7488
7489 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7490 return ParseStatus::NoMatch;
7491
7492 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7493 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7494
7495 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7496 return ParseStatus::Success;
7497}
7498
7499ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7500 using namespace llvm::AMDGPU::MTBUFFormat;
7501
7502 int64_t Fmt = UFMT_UNDEF;
7503
7504 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7505 return ParseStatus::Failure;
7506
7507 if (Fmt == UFMT_UNDEF)
7508 return ParseStatus::NoMatch;
7509
7510 Format = Fmt;
7511 return ParseStatus::Success;
7512}
7513
7514bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7515 int64_t &Nfmt,
7516 StringRef FormatStr,
7517 SMLoc Loc) {
7518 using namespace llvm::AMDGPU::MTBUFFormat;
7519 int64_t Format;
7520
7521 Format = getDfmt(FormatStr);
7522 if (Format != DFMT_UNDEF) {
7523 Dfmt = Format;
7524 return true;
7525 }
7526
7527 Format = getNfmt(FormatStr, getSTI());
7528 if (Format != NFMT_UNDEF) {
7529 Nfmt = Format;
7530 return true;
7531 }
7532
7533 Error(Loc, "unsupported format");
7534 return false;
7535}
7536
7537ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7538 SMLoc FormatLoc,
7539 int64_t &Format) {
7540 using namespace llvm::AMDGPU::MTBUFFormat;
7541
7542 int64_t Dfmt = DFMT_UNDEF;
7543 int64_t Nfmt = NFMT_UNDEF;
7544 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7545 return ParseStatus::Failure;
7546
7547 if (trySkipToken(AsmToken::Comma)) {
7548 StringRef Str;
7549 SMLoc Loc = getLoc();
7550 if (!parseId(Str, "expected a format string") ||
7551 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7552 return ParseStatus::Failure;
7553 if (Dfmt == DFMT_UNDEF)
7554 return Error(Loc, "duplicate numeric format");
7555 if (Nfmt == NFMT_UNDEF)
7556 return Error(Loc, "duplicate data format");
7557 }
7558
7559 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7560 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7561
7562 if (isGFX10Plus()) {
7563 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7564 if (Ufmt == UFMT_UNDEF)
7565 return Error(FormatLoc, "unsupported format");
7566 Format = Ufmt;
7567 } else {
7568 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7569 }
7570
7571 return ParseStatus::Success;
7572}
7573
7574ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7575 SMLoc Loc,
7576 int64_t &Format) {
7577 using namespace llvm::AMDGPU::MTBUFFormat;
7578
7579 auto Id = getUnifiedFormat(FormatStr, getSTI());
7580 if (Id == UFMT_UNDEF)
7581 return ParseStatus::NoMatch;
7582
7583 if (!isGFX10Plus())
7584 return Error(Loc, "unified format is not supported on this GPU");
7585
7586 Format = Id;
7587 return ParseStatus::Success;
7588}
7589
7590ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7591 using namespace llvm::AMDGPU::MTBUFFormat;
7592 SMLoc Loc = getLoc();
7593
7594 if (!parseExpr(Format))
7595 return ParseStatus::Failure;
7596 if (!isValidFormatEncoding(Format, getSTI()))
7597 return Error(Loc, "out of range format");
7598
7599 return ParseStatus::Success;
7600}
7601
7602ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7603 using namespace llvm::AMDGPU::MTBUFFormat;
7604
7605 if (!trySkipId("format", AsmToken::Colon))
7606 return ParseStatus::NoMatch;
7607
7608 if (trySkipToken(AsmToken::LBrac)) {
7609 StringRef FormatStr;
7610 SMLoc Loc = getLoc();
7611 if (!parseId(FormatStr, "expected a format string"))
7612 return ParseStatus::Failure;
7613
7614 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7615 if (Res.isNoMatch())
7616 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7617 if (!Res.isSuccess())
7618 return Res;
7619
7620 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7621 return ParseStatus::Failure;
7622
7623 return ParseStatus::Success;
7624 }
7625
7626 return parseNumericFormat(Format);
7627}
7628
7629ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7630 using namespace llvm::AMDGPU::MTBUFFormat;
7631
7632 int64_t Format = getDefaultFormatEncoding(getSTI());
7633 ParseStatus Res;
7634 SMLoc Loc = getLoc();
7635
7636 // Parse legacy format syntax.
7637 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7638 if (Res.isFailure())
7639 return Res;
7640
7641 bool FormatFound = Res.isSuccess();
7642
7643 Operands.push_back(
7644 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7645
7646 if (FormatFound)
7647 trySkipToken(AsmToken::Comma);
7648
7649 if (isToken(AsmToken::EndOfStatement)) {
7650 // We are expecting an soffset operand,
7651 // but let matcher handle the error.
7652 return ParseStatus::Success;
7653 }
7654
7655 // Parse soffset.
7656 Res = parseRegOrImm(Operands);
7657 if (!Res.isSuccess())
7658 return Res;
7659
7660 trySkipToken(AsmToken::Comma);
7661
7662 if (!FormatFound) {
7663 Res = parseSymbolicOrNumericFormat(Format);
7664 if (Res.isFailure())
7665 return Res;
7666 if (Res.isSuccess()) {
7667 auto Size = Operands.size();
7668 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7669 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7670 Op.setImm(Format);
7671 }
7672 return ParseStatus::Success;
7673 }
7674
7675 if (isId("format") && peekToken().is(AsmToken::Colon))
7676 return Error(getLoc(), "duplicate format");
7677 return ParseStatus::Success;
7678}
7679
7680ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7681 ParseStatus Res =
7682 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7683 if (Res.isNoMatch()) {
7684 Res = parseIntWithPrefix("inst_offset", Operands,
7685 AMDGPUOperand::ImmTyInstOffset);
7686 }
7687 return Res;
7688}
7689
7690ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7691 ParseStatus Res =
7692 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7693 if (Res.isNoMatch())
7694 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7695 return Res;
7696}
7697
7698ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7699 ParseStatus Res =
7700 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7701 if (Res.isNoMatch()) {
7702 Res =
7703 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7704 }
7705 return Res;
7706}
7707
7708//===----------------------------------------------------------------------===//
7709// Exp
7710//===----------------------------------------------------------------------===//
7711
7712void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7713 OptionalImmIndexMap OptionalIdx;
7714
7715 unsigned OperandIdx[4];
7716 unsigned EnMask = 0;
7717 int SrcIdx = 0;
7718
7719 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7720 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7721
7722 // Add the register arguments
7723 if (Op.isReg()) {
7724 assert(SrcIdx < 4);
7725 OperandIdx[SrcIdx] = Inst.size();
7726 Op.addRegOperands(Inst, 1);
7727 ++SrcIdx;
7728 continue;
7729 }
7730
7731 if (Op.isOff()) {
7732 assert(SrcIdx < 4);
7733 OperandIdx[SrcIdx] = Inst.size();
7734 Inst.addOperand(MCOperand::createReg(MCRegister()));
7735 ++SrcIdx;
7736 continue;
7737 }
7738
7739 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7740 Op.addImmOperands(Inst, 1);
7741 continue;
7742 }
7743
7744 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7745 continue;
7746
7747 // Handle optional arguments
7748 OptionalIdx[Op.getImmTy()] = i;
7749 }
7750
7751 assert(SrcIdx == 4);
7752
7753 bool Compr = false;
7754 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7755 Compr = true;
7756 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7757 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7758 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7759 }
7760
7761 for (auto i = 0; i < SrcIdx; ++i) {
7762 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7763 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7764 }
7765 }
7766
7767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7768 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7769
7770 Inst.addOperand(MCOperand::createImm(EnMask));
7771}
7772
7773//===----------------------------------------------------------------------===//
7774// s_waitcnt
7775//===----------------------------------------------------------------------===//
7776
7777static bool
7779 const AMDGPU::IsaVersion ISA,
7780 int64_t &IntVal,
7781 int64_t CntVal,
7782 bool Saturate,
7783 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7784 unsigned (*decode)(const IsaVersion &Version, unsigned))
7785{
7786 bool Failed = false;
7787
7788 IntVal = encode(ISA, IntVal, CntVal);
7789 if (CntVal != decode(ISA, IntVal)) {
7790 if (Saturate) {
7791 IntVal = encode(ISA, IntVal, -1);
7792 } else {
7793 Failed = true;
7794 }
7795 }
7796 return Failed;
7797}
7798
7799bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7800
7801 SMLoc CntLoc = getLoc();
7802 StringRef CntName = getTokenStr();
7803
7804 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7805 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7806 return false;
7807
7808 int64_t CntVal;
7809 SMLoc ValLoc = getLoc();
7810 if (!parseExpr(CntVal))
7811 return false;
7812
7813 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7814
7815 bool Failed = true;
7816 bool Sat = CntName.ends_with("_sat");
7817
7818 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7819 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7820 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7821 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7822 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7823 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7824 } else {
7825 Error(CntLoc, "invalid counter name " + CntName);
7826 return false;
7827 }
7828
7829 if (Failed) {
7830 Error(ValLoc, "too large value for " + CntName);
7831 return false;
7832 }
7833
7834 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7835 return false;
7836
7837 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7838 if (isToken(AsmToken::EndOfStatement)) {
7839 Error(getLoc(), "expected a counter name");
7840 return false;
7841 }
7842 }
7843
7844 return true;
7845}
7846
7847ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7848 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7849 int64_t Waitcnt = getWaitcntBitMask(ISA);
7850 SMLoc S = getLoc();
7851
7852 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7853 while (!isToken(AsmToken::EndOfStatement)) {
7854 if (!parseCnt(Waitcnt))
7855 return ParseStatus::Failure;
7856 }
7857 } else {
7858 if (!parseExpr(Waitcnt))
7859 return ParseStatus::Failure;
7860 }
7861
7862 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7863 return ParseStatus::Success;
7864}
7865
7866bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7867 SMLoc FieldLoc = getLoc();
7868 StringRef FieldName = getTokenStr();
7869 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7870 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7871 return false;
7872
7873 SMLoc ValueLoc = getLoc();
7874 StringRef ValueName = getTokenStr();
7875 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7876 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7877 return false;
7878
7879 unsigned Shift;
7880 if (FieldName == "instid0") {
7881 Shift = 0;
7882 } else if (FieldName == "instskip") {
7883 Shift = 4;
7884 } else if (FieldName == "instid1") {
7885 Shift = 7;
7886 } else {
7887 Error(FieldLoc, "invalid field name " + FieldName);
7888 return false;
7889 }
7890
7891 int Value;
7892 if (Shift == 4) {
7893 // Parse values for instskip.
7894 Value = StringSwitch<int>(ValueName)
7895 .Case("SAME", 0)
7896 .Case("NEXT", 1)
7897 .Case("SKIP_1", 2)
7898 .Case("SKIP_2", 3)
7899 .Case("SKIP_3", 4)
7900 .Case("SKIP_4", 5)
7901 .Default(-1);
7902 } else {
7903 // Parse values for instid0 and instid1.
7904 Value = StringSwitch<int>(ValueName)
7905 .Case("NO_DEP", 0)
7906 .Case("VALU_DEP_1", 1)
7907 .Case("VALU_DEP_2", 2)
7908 .Case("VALU_DEP_3", 3)
7909 .Case("VALU_DEP_4", 4)
7910 .Case("TRANS32_DEP_1", 5)
7911 .Case("TRANS32_DEP_2", 6)
7912 .Case("TRANS32_DEP_3", 7)
7913 .Case("FMA_ACCUM_CYCLE_1", 8)
7914 .Case("SALU_CYCLE_1", 9)
7915 .Case("SALU_CYCLE_2", 10)
7916 .Case("SALU_CYCLE_3", 11)
7917 .Default(-1);
7918 }
7919 if (Value < 0) {
7920 Error(ValueLoc, "invalid value name " + ValueName);
7921 return false;
7922 }
7923
7924 Delay |= Value << Shift;
7925 return true;
7926}
7927
7928ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7929 int64_t Delay = 0;
7930 SMLoc S = getLoc();
7931
7932 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7933 do {
7934 if (!parseDelay(Delay))
7935 return ParseStatus::Failure;
7936 } while (trySkipToken(AsmToken::Pipe));
7937 } else {
7938 if (!parseExpr(Delay))
7939 return ParseStatus::Failure;
7940 }
7941
7942 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7943 return ParseStatus::Success;
7944}
7945
7946bool
7947AMDGPUOperand::isSWaitCnt() const {
7948 return isImm();
7949}
7950
7951bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7952
7953//===----------------------------------------------------------------------===//
7954// DepCtr
7955//===----------------------------------------------------------------------===//
7956
7957void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7958 StringRef DepCtrName) {
7959 switch (ErrorId) {
7960 case OPR_ID_UNKNOWN:
7961 Error(Loc, Twine("invalid counter name ", DepCtrName));
7962 return;
7963 case OPR_ID_UNSUPPORTED:
7964 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7965 return;
7966 case OPR_ID_DUPLICATE:
7967 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7968 return;
7969 case OPR_VAL_INVALID:
7970 Error(Loc, Twine("invalid value for ", DepCtrName));
7971 return;
7972 default:
7973 assert(false);
7974 }
7975}
7976
7977bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7978
7979 using namespace llvm::AMDGPU::DepCtr;
7980
7981 SMLoc DepCtrLoc = getLoc();
7982 StringRef DepCtrName = getTokenStr();
7983
7984 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7985 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7986 return false;
7987
7988 int64_t ExprVal;
7989 if (!parseExpr(ExprVal))
7990 return false;
7991
7992 unsigned PrevOprMask = UsedOprMask;
7993 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7994
7995 if (CntVal < 0) {
7996 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7997 return false;
7998 }
7999
8000 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8001 return false;
8002
8003 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8004 if (isToken(AsmToken::EndOfStatement)) {
8005 Error(getLoc(), "expected a counter name");
8006 return false;
8007 }
8008 }
8009
8010 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8011 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8012 return true;
8013}
8014
8015ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8016 using namespace llvm::AMDGPU::DepCtr;
8017
8018 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8019 SMLoc Loc = getLoc();
8020
8021 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8022 unsigned UsedOprMask = 0;
8023 while (!isToken(AsmToken::EndOfStatement)) {
8024 if (!parseDepCtr(DepCtr, UsedOprMask))
8025 return ParseStatus::Failure;
8026 }
8027 } else {
8028 if (!parseExpr(DepCtr))
8029 return ParseStatus::Failure;
8030 }
8031
8032 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8033 return ParseStatus::Success;
8034}
8035
8036bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8037
8038//===----------------------------------------------------------------------===//
8039// hwreg
8040//===----------------------------------------------------------------------===//
8041
8042ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8043 OperandInfoTy &Offset,
8044 OperandInfoTy &Width) {
8045 using namespace llvm::AMDGPU::Hwreg;
8046
8047 if (!trySkipId("hwreg", AsmToken::LParen))
8048 return ParseStatus::NoMatch;
8049
8050 // The register may be specified by name or using a numeric code
8051 HwReg.Loc = getLoc();
8052 if (isToken(AsmToken::Identifier) &&
8053 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8054 HwReg.IsSymbolic = true;
8055 lex(); // skip register name
8056 } else if (!parseExpr(HwReg.Val, "a register name")) {
8057 return ParseStatus::Failure;
8058 }
8059
8060 if (trySkipToken(AsmToken::RParen))
8061 return ParseStatus::Success;
8062
8063 // parse optional params
8064 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8065 return ParseStatus::Failure;
8066
8067 Offset.Loc = getLoc();
8068 if (!parseExpr(Offset.Val))
8069 return ParseStatus::Failure;
8070
8071 if (!skipToken(AsmToken::Comma, "expected a comma"))
8072 return ParseStatus::Failure;
8073
8074 Width.Loc = getLoc();
8075 if (!parseExpr(Width.Val) ||
8076 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8077 return ParseStatus::Failure;
8078
8079 return ParseStatus::Success;
8080}
8081
8082ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8083 using namespace llvm::AMDGPU::Hwreg;
8084
8085 int64_t ImmVal = 0;
8086 SMLoc Loc = getLoc();
8087
8088 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8089 HwregId::Default);
8090 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8091 HwregOffset::Default);
8092 struct : StructuredOpField {
8093 using StructuredOpField::StructuredOpField;
8094 bool validate(AMDGPUAsmParser &Parser) const override {
8095 if (!isUIntN(Width, Val - 1))
8096 return Error(Parser, "only values from 1 to 32 are legal");
8097 return true;
8098 }
8099 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8100 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8101
8102 if (Res.isNoMatch())
8103 Res = parseHwregFunc(HwReg, Offset, Width);
8104
8105 if (Res.isSuccess()) {
8106 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8107 return ParseStatus::Failure;
8108 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8109 }
8110
8111 if (Res.isNoMatch() &&
8112 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8114
8115 if (!Res.isSuccess())
8116 return ParseStatus::Failure;
8117
8118 if (!isUInt<16>(ImmVal))
8119 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8120 Operands.push_back(
8121 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8122 return ParseStatus::Success;
8123}
8124
8125bool AMDGPUOperand::isHwreg() const {
8126 return isImmTy(ImmTyHwreg);
8127}
8128
8129//===----------------------------------------------------------------------===//
8130// sendmsg
8131//===----------------------------------------------------------------------===//
8132
8133bool
8134AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8135 OperandInfoTy &Op,
8136 OperandInfoTy &Stream) {
8137 using namespace llvm::AMDGPU::SendMsg;
8138
8139 Msg.Loc = getLoc();
8140 if (isToken(AsmToken::Identifier) &&
8141 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8142 Msg.IsSymbolic = true;
8143 lex(); // skip message name
8144 } else if (!parseExpr(Msg.Val, "a message name")) {
8145 return false;
8146 }
8147
8148 if (trySkipToken(AsmToken::Comma)) {
8149 Op.IsDefined = true;
8150 Op.Loc = getLoc();
8151 if (isToken(AsmToken::Identifier) &&
8152 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8154 lex(); // skip operation name
8155 } else if (!parseExpr(Op.Val, "an operation name")) {
8156 return false;
8157 }
8158
8159 if (trySkipToken(AsmToken::Comma)) {
8160 Stream.IsDefined = true;
8161 Stream.Loc = getLoc();
8162 if (!parseExpr(Stream.Val))
8163 return false;
8164 }
8165 }
8166
8167 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8168}
8169
8170bool
8171AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8172 const OperandInfoTy &Op,
8173 const OperandInfoTy &Stream) {
8174 using namespace llvm::AMDGPU::SendMsg;
8175
8176 // Validation strictness depends on whether message is specified
8177 // in a symbolic or in a numeric form. In the latter case
8178 // only encoding possibility is checked.
8179 bool Strict = Msg.IsSymbolic;
8180
8181 if (Strict) {
8182 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8183 Error(Msg.Loc, "specified message id is not supported on this GPU");
8184 return false;
8185 }
8186 } else {
8187 if (!isValidMsgId(Msg.Val, getSTI())) {
8188 Error(Msg.Loc, "invalid message id");
8189 return false;
8190 }
8191 }
8192 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8193 if (Op.IsDefined) {
8194 Error(Op.Loc, "message does not support operations");
8195 } else {
8196 Error(Msg.Loc, "missing message operation");
8197 }
8198 return false;
8199 }
8200 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8201 if (Op.Val == OPR_ID_UNSUPPORTED)
8202 Error(Op.Loc, "specified operation id is not supported on this GPU");
8203 else
8204 Error(Op.Loc, "invalid operation id");
8205 return false;
8206 }
8207 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8208 Stream.IsDefined) {
8209 Error(Stream.Loc, "message operation does not support streams");
8210 return false;
8211 }
8212 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8213 Error(Stream.Loc, "invalid message stream id");
8214 return false;
8215 }
8216 return true;
8217}
8218
8219ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8220 using namespace llvm::AMDGPU::SendMsg;
8221
8222 int64_t ImmVal = 0;
8223 SMLoc Loc = getLoc();
8224
8225 if (trySkipId("sendmsg", AsmToken::LParen)) {
8226 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8227 OperandInfoTy Op(OP_NONE_);
8228 OperandInfoTy Stream(STREAM_ID_NONE_);
8229 if (parseSendMsgBody(Msg, Op, Stream) &&
8230 validateSendMsg(Msg, Op, Stream)) {
8231 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8232 } else {
8233 return ParseStatus::Failure;
8234 }
8235 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8236 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8237 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8238 } else {
8239 return ParseStatus::Failure;
8240 }
8241
8242 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8243 return ParseStatus::Success;
8244}
8245
8246bool AMDGPUOperand::isSendMsg() const {
8247 return isImmTy(ImmTySendMsg);
8248}
8249
8250//===----------------------------------------------------------------------===//
8251// v_interp
8252//===----------------------------------------------------------------------===//
8253
8254ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8255 StringRef Str;
8256 SMLoc S = getLoc();
8257
8258 if (!parseId(Str))
8259 return ParseStatus::NoMatch;
8260
8261 int Slot = StringSwitch<int>(Str)
8262 .Case("p10", 0)
8263 .Case("p20", 1)
8264 .Case("p0", 2)
8265 .Default(-1);
8266
8267 if (Slot == -1)
8268 return Error(S, "invalid interpolation slot");
8269
8270 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8271 AMDGPUOperand::ImmTyInterpSlot));
8272 return ParseStatus::Success;
8273}
8274
8275ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8276 StringRef Str;
8277 SMLoc S = getLoc();
8278
8279 if (!parseId(Str))
8280 return ParseStatus::NoMatch;
8281
8282 if (!Str.starts_with("attr"))
8283 return Error(S, "invalid interpolation attribute");
8284
8285 StringRef Chan = Str.take_back(2);
8286 int AttrChan = StringSwitch<int>(Chan)
8287 .Case(".x", 0)
8288 .Case(".y", 1)
8289 .Case(".z", 2)
8290 .Case(".w", 3)
8291 .Default(-1);
8292 if (AttrChan == -1)
8293 return Error(S, "invalid or missing interpolation attribute channel");
8294
8295 Str = Str.drop_back(2).drop_front(4);
8296
8297 uint8_t Attr;
8298 if (Str.getAsInteger(10, Attr))
8299 return Error(S, "invalid or missing interpolation attribute number");
8300
8301 if (Attr > 32)
8302 return Error(S, "out of bounds interpolation attribute number");
8303
8304 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8305
8306 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8307 AMDGPUOperand::ImmTyInterpAttr));
8308 Operands.push_back(AMDGPUOperand::CreateImm(
8309 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8310 return ParseStatus::Success;
8311}
8312
8313//===----------------------------------------------------------------------===//
8314// exp
8315//===----------------------------------------------------------------------===//
8316
8317ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8318 using namespace llvm::AMDGPU::Exp;
8319
8320 StringRef Str;
8321 SMLoc S = getLoc();
8322
8323 if (!parseId(Str))
8324 return ParseStatus::NoMatch;
8325
8326 unsigned Id = getTgtId(Str);
8327 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8328 return Error(S, (Id == ET_INVALID)
8329 ? "invalid exp target"
8330 : "exp target is not supported on this GPU");
8331
8332 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8333 AMDGPUOperand::ImmTyExpTgt));
8334 return ParseStatus::Success;
8335}
8336
8337//===----------------------------------------------------------------------===//
8338// parser helpers
8339//===----------------------------------------------------------------------===//
8340
8341bool
8342AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8343 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8344}
8345
8346bool
8347AMDGPUAsmParser::isId(const StringRef Id) const {
8348 return isId(getToken(), Id);
8349}
8350
8351bool
8352AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8353 return getTokenKind() == Kind;
8354}
8355
8356StringRef AMDGPUAsmParser::getId() const {
8357 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8358}
8359
8360bool
8361AMDGPUAsmParser::trySkipId(const StringRef Id) {
8362 if (isId(Id)) {
8363 lex();
8364 return true;
8365 }
8366 return false;
8367}
8368
8369bool
8370AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8371 if (isToken(AsmToken::Identifier)) {
8372 StringRef Tok = getTokenStr();
8373 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8374 lex();
8375 return true;
8376 }
8377 }
8378 return false;
8379}
8380
8381bool
8382AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8383 if (isId(Id) && peekToken().is(Kind)) {
8384 lex();
8385 lex();
8386 return true;
8387 }
8388 return false;
8389}
8390
8391bool
8392AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8393 if (isToken(Kind)) {
8394 lex();
8395 return true;
8396 }
8397 return false;
8398}
8399
8400bool
8401AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8402 const StringRef ErrMsg) {
8403 if (!trySkipToken(Kind)) {
8404 Error(getLoc(), ErrMsg);
8405 return false;
8406 }
8407 return true;
8408}
8409
8410bool
8411AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8412 SMLoc S = getLoc();
8413
8414 const MCExpr *Expr;
8415 if (Parser.parseExpression(Expr))
8416 return false;
8417
8418 if (Expr->evaluateAsAbsolute(Imm))
8419 return true;
8420
8421 if (Expected.empty()) {
8422 Error(S, "expected absolute expression");
8423 } else {
8424 Error(S, Twine("expected ", Expected) +
8425 Twine(" or an absolute expression"));
8426 }
8427 return false;
8428}
8429
8430bool
8431AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8432 SMLoc S = getLoc();
8433
8434 const MCExpr *Expr;
8435 if (Parser.parseExpression(Expr))
8436 return false;
8437
8438 int64_t IntVal;
8439 if (Expr->evaluateAsAbsolute(IntVal)) {
8440 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8441 } else {
8442 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8443 }
8444 return true;
8445}
8446
8447bool
8448AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8449 if (isToken(AsmToken::String)) {
8450 Val = getToken().getStringContents();
8451 lex();
8452 return true;
8453 }
8454 Error(getLoc(), ErrMsg);
8455 return false;
8456}
8457
8458bool
8459AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8460 if (isToken(AsmToken::Identifier)) {
8461 Val = getTokenStr();
8462 lex();
8463 return true;
8464 }
8465 if (!ErrMsg.empty())
8466 Error(getLoc(), ErrMsg);
8467 return false;
8468}
8469
8470AsmToken
8471AMDGPUAsmParser::getToken() const {
8472 return Parser.getTok();
8473}
8474
8475AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8476 return isToken(AsmToken::EndOfStatement)
8477 ? getToken()
8478 : getLexer().peekTok(ShouldSkipSpace);
8479}
8480
8481void
8482AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8483 auto TokCount = getLexer().peekTokens(Tokens);
8484
8485 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8486 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8487}
8488
8490AMDGPUAsmParser::getTokenKind() const {
8491 return getLexer().getKind();
8492}
8493
8494SMLoc
8495AMDGPUAsmParser::getLoc() const {
8496 return getToken().getLoc();
8497}
8498
8499StringRef
8500AMDGPUAsmParser::getTokenStr() const {
8501 return getToken().getString();
8502}
8503
8504void
8505AMDGPUAsmParser::lex() {
8506 Parser.Lex();
8507}
8508
8509SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8510 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8511}
8512
8513// Returns one of the given locations that comes later in the source.
8514SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8515 return a.getPointer() < b.getPointer() ? b : a;
8516}
8517
8518SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8519 int MCOpIdx) const {
8520 for (const auto &Op : Operands) {
8521 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8522 if (TargetOp.getMCOpIdx() == MCOpIdx)
8523 return TargetOp.getStartLoc();
8524 }
8525 llvm_unreachable("No such MC operand!");
8526}
8527
8528SMLoc
8529AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8530 const OperandVector &Operands) const {
8531 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8532 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8533 if (Test(Op))
8534 return Op.getStartLoc();
8535 }
8536 return getInstLoc(Operands);
8537}
8538
8539SMLoc
8540AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8541 const OperandVector &Operands) const {
8542 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8543 return getOperandLoc(Test, Operands);
8544}
8545
8546ParseStatus
8547AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8548 if (!trySkipToken(AsmToken::LCurly))
8549 return ParseStatus::NoMatch;
8550
8551 bool First = true;
8552 while (!trySkipToken(AsmToken::RCurly)) {
8553 if (!First &&
8554 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8555 return ParseStatus::Failure;
8556
8557 StringRef Id = getTokenStr();
8558 SMLoc IdLoc = getLoc();
8559 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8560 !skipToken(AsmToken::Colon, "colon expected"))
8561 return ParseStatus::Failure;
8562
8563 const auto *I =
8564 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8565 if (I == Fields.end())
8566 return Error(IdLoc, "unknown field");
8567 if ((*I)->IsDefined)
8568 return Error(IdLoc, "duplicate field");
8569
8570 // TODO: Support symbolic values.
8571 (*I)->Loc = getLoc();
8572 if (!parseExpr((*I)->Val))
8573 return ParseStatus::Failure;
8574 (*I)->IsDefined = true;
8575
8576 First = false;
8577 }
8578 return ParseStatus::Success;
8579}
8580
8581bool AMDGPUAsmParser::validateStructuredOpFields(
8583 return all_of(Fields, [this](const StructuredOpField *F) {
8584 return F->validate(*this);
8585 });
8586}
8587
8588//===----------------------------------------------------------------------===//
8589// swizzle
8590//===----------------------------------------------------------------------===//
8591
8593static unsigned
8594encodeBitmaskPerm(const unsigned AndMask,
8595 const unsigned OrMask,
8596 const unsigned XorMask) {
8597 using namespace llvm::AMDGPU::Swizzle;
8598
8599 return BITMASK_PERM_ENC |
8600 (AndMask << BITMASK_AND_SHIFT) |
8601 (OrMask << BITMASK_OR_SHIFT) |
8602 (XorMask << BITMASK_XOR_SHIFT);
8603}
8604
8605bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8606 const unsigned MaxVal,
8607 const Twine &ErrMsg, SMLoc &Loc) {
8608 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8609 return false;
8610 }
8611 Loc = getLoc();
8612 if (!parseExpr(Op)) {
8613 return false;
8614 }
8615 if (Op < MinVal || Op > MaxVal) {
8616 Error(Loc, ErrMsg);
8617 return false;
8618 }
8619
8620 return true;
8621}
8622
8623bool
8624AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8625 const unsigned MinVal,
8626 const unsigned MaxVal,
8627 const StringRef ErrMsg) {
8628 SMLoc Loc;
8629 for (unsigned i = 0; i < OpNum; ++i) {
8630 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8631 return false;
8632 }
8633
8634 return true;
8635}
8636
8637bool
8638AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8639 using namespace llvm::AMDGPU::Swizzle;
8640
8641 int64_t Lane[LANE_NUM];
8642 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8643 "expected a 2-bit lane id")) {
8645 for (unsigned I = 0; I < LANE_NUM; ++I) {
8646 Imm |= Lane[I] << (LANE_SHIFT * I);
8647 }
8648 return true;
8649 }
8650 return false;
8651}
8652
8653bool
8654AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8655 using namespace llvm::AMDGPU::Swizzle;
8656
8657 SMLoc Loc;
8658 int64_t GroupSize;
8659 int64_t LaneIdx;
8660
8661 if (!parseSwizzleOperand(GroupSize,
8662 2, 32,
8663 "group size must be in the interval [2,32]",
8664 Loc)) {
8665 return false;
8666 }
8667 if (!isPowerOf2_64(GroupSize)) {
8668 Error(Loc, "group size must be a power of two");
8669 return false;
8670 }
8671 if (parseSwizzleOperand(LaneIdx,
8672 0, GroupSize - 1,
8673 "lane id must be in the interval [0,group size - 1]",
8674 Loc)) {
8675 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8676 return true;
8677 }
8678 return false;
8679}
8680
8681bool
8682AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8683 using namespace llvm::AMDGPU::Swizzle;
8684
8685 SMLoc Loc;
8686 int64_t GroupSize;
8687
8688 if (!parseSwizzleOperand(GroupSize,
8689 2, 32,
8690 "group size must be in the interval [2,32]",
8691 Loc)) {
8692 return false;
8693 }
8694 if (!isPowerOf2_64(GroupSize)) {
8695 Error(Loc, "group size must be a power of two");
8696 return false;
8697 }
8698
8699 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8700 return true;
8701}
8702
8703bool
8704AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8705 using namespace llvm::AMDGPU::Swizzle;
8706
8707 SMLoc Loc;
8708 int64_t GroupSize;
8709
8710 if (!parseSwizzleOperand(GroupSize,
8711 1, 16,
8712 "group size must be in the interval [1,16]",
8713 Loc)) {
8714 return false;
8715 }
8716 if (!isPowerOf2_64(GroupSize)) {
8717 Error(Loc, "group size must be a power of two");
8718 return false;
8719 }
8720
8721 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8722 return true;
8723}
8724
8725bool
8726AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8727 using namespace llvm::AMDGPU::Swizzle;
8728
8729 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8730 return false;
8731 }
8732
8733 StringRef Ctl;
8734 SMLoc StrLoc = getLoc();
8735 if (!parseString(Ctl)) {
8736 return false;
8737 }
8738 if (Ctl.size() != BITMASK_WIDTH) {
8739 Error(StrLoc, "expected a 5-character mask");
8740 return false;
8741 }
8742
8743 unsigned AndMask = 0;
8744 unsigned OrMask = 0;
8745 unsigned XorMask = 0;
8746
8747 for (size_t i = 0; i < Ctl.size(); ++i) {
8748 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8749 switch(Ctl[i]) {
8750 default:
8751 Error(StrLoc, "invalid mask");
8752 return false;
8753 case '0':
8754 break;
8755 case '1':
8756 OrMask |= Mask;
8757 break;
8758 case 'p':
8759 AndMask |= Mask;
8760 break;
8761 case 'i':
8762 AndMask |= Mask;
8763 XorMask |= Mask;
8764 break;
8765 }
8766 }
8767
8768 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8769 return true;
8770}
8771
8772bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8773 using namespace llvm::AMDGPU::Swizzle;
8774
8775 if (!AMDGPU::isGFX9Plus(getSTI())) {
8776 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8777 return false;
8778 }
8779
8780 int64_t Swizzle;
8781 SMLoc Loc;
8782 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8783 "FFT swizzle must be in the interval [0," +
8784 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8785 Loc))
8786 return false;
8787
8788 Imm = FFT_MODE_ENC | Swizzle;
8789 return true;
8790}
8791
8792bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8793 using namespace llvm::AMDGPU::Swizzle;
8794
8795 if (!AMDGPU::isGFX9Plus(getSTI())) {
8796 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8797 return false;
8798 }
8799
8800 SMLoc Loc;
8801 int64_t Direction;
8802
8803 if (!parseSwizzleOperand(Direction, 0, 1,
8804 "direction must be 0 (left) or 1 (right)", Loc))
8805 return false;
8806
8807 int64_t RotateSize;
8808 if (!parseSwizzleOperand(
8809 RotateSize, 0, ROTATE_MAX_SIZE,
8810 "number of threads to rotate must be in the interval [0," +
8811 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8812 Loc))
8813 return false;
8814
8816 (RotateSize << ROTATE_SIZE_SHIFT);
8817 return true;
8818}
8819
8820bool
8821AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8822
8823 SMLoc OffsetLoc = getLoc();
8824
8825 if (!parseExpr(Imm, "a swizzle macro")) {
8826 return false;
8827 }
8828 if (!isUInt<16>(Imm)) {
8829 Error(OffsetLoc, "expected a 16-bit offset");
8830 return false;
8831 }
8832 return true;
8833}
8834
8835bool
8836AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8837 using namespace llvm::AMDGPU::Swizzle;
8838
8839 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8840
8841 SMLoc ModeLoc = getLoc();
8842 bool Ok = false;
8843
8844 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8845 Ok = parseSwizzleQuadPerm(Imm);
8846 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8847 Ok = parseSwizzleBitmaskPerm(Imm);
8848 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8849 Ok = parseSwizzleBroadcast(Imm);
8850 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8851 Ok = parseSwizzleSwap(Imm);
8852 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8853 Ok = parseSwizzleReverse(Imm);
8854 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8855 Ok = parseSwizzleFFT(Imm);
8856 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8857 Ok = parseSwizzleRotate(Imm);
8858 } else {
8859 Error(ModeLoc, "expected a swizzle mode");
8860 }
8861
8862 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8863 }
8864
8865 return false;
8866}
8867
8868ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8869 SMLoc S = getLoc();
8870 int64_t Imm = 0;
8871
8872 if (trySkipId("offset")) {
8873
8874 bool Ok = false;
8875 if (skipToken(AsmToken::Colon, "expected a colon")) {
8876 if (trySkipId("swizzle")) {
8877 Ok = parseSwizzleMacro(Imm);
8878 } else {
8879 Ok = parseSwizzleOffset(Imm);
8880 }
8881 }
8882
8883 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8884
8886 }
8887 return ParseStatus::NoMatch;
8888}
8889
8890bool
8891AMDGPUOperand::isSwizzle() const {
8892 return isImmTy(ImmTySwizzle);
8893}
8894
8895//===----------------------------------------------------------------------===//
8896// VGPR Index Mode
8897//===----------------------------------------------------------------------===//
8898
8899int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8900
8901 using namespace llvm::AMDGPU::VGPRIndexMode;
8902
8903 if (trySkipToken(AsmToken::RParen)) {
8904 return OFF;
8905 }
8906
8907 int64_t Imm = 0;
8908
8909 while (true) {
8910 unsigned Mode = 0;
8911 SMLoc S = getLoc();
8912
8913 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8914 if (trySkipId(IdSymbolic[ModeId])) {
8915 Mode = 1 << ModeId;
8916 break;
8917 }
8918 }
8919
8920 if (Mode == 0) {
8921 Error(S, (Imm == 0)?
8922 "expected a VGPR index mode or a closing parenthesis" :
8923 "expected a VGPR index mode");
8924 return UNDEF;
8925 }
8926
8927 if (Imm & Mode) {
8928 Error(S, "duplicate VGPR index mode");
8929 return UNDEF;
8930 }
8931 Imm |= Mode;
8932
8933 if (trySkipToken(AsmToken::RParen))
8934 break;
8935 if (!skipToken(AsmToken::Comma,
8936 "expected a comma or a closing parenthesis"))
8937 return UNDEF;
8938 }
8939
8940 return Imm;
8941}
8942
8943ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8944
8945 using namespace llvm::AMDGPU::VGPRIndexMode;
8946
8947 int64_t Imm = 0;
8948 SMLoc S = getLoc();
8949
8950 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8951 Imm = parseGPRIdxMacro();
8952 if (Imm == UNDEF)
8953 return ParseStatus::Failure;
8954 } else {
8955 if (getParser().parseAbsoluteExpression(Imm))
8956 return ParseStatus::Failure;
8957 if (Imm < 0 || !isUInt<4>(Imm))
8958 return Error(S, "invalid immediate: only 4-bit values are legal");
8959 }
8960
8961 Operands.push_back(
8962 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8963 return ParseStatus::Success;
8964}
8965
8966bool AMDGPUOperand::isGPRIdxMode() const {
8967 return isImmTy(ImmTyGprIdxMode);
8968}
8969
8970//===----------------------------------------------------------------------===//
8971// sopp branch targets
8972//===----------------------------------------------------------------------===//
8973
8974ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8975
8976 // Make sure we are not parsing something
8977 // that looks like a label or an expression but is not.
8978 // This will improve error messages.
8979 if (isRegister() || isModifier())
8980 return ParseStatus::NoMatch;
8981
8982 if (!parseExpr(Operands))
8983 return ParseStatus::Failure;
8984
8985 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8986 assert(Opr.isImm() || Opr.isExpr());
8987 SMLoc Loc = Opr.getStartLoc();
8988
8989 // Currently we do not support arbitrary expressions as branch targets.
8990 // Only labels and absolute expressions are accepted.
8991 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8992 Error(Loc, "expected an absolute expression or a label");
8993 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8994 Error(Loc, "expected a 16-bit signed jump offset");
8995 }
8996
8997 return ParseStatus::Success;
8998}
8999
9000//===----------------------------------------------------------------------===//
9001// Boolean holding registers
9002//===----------------------------------------------------------------------===//
9003
9004ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9005 return parseReg(Operands);
9006}
9007
9008//===----------------------------------------------------------------------===//
9009// mubuf
9010//===----------------------------------------------------------------------===//
9011
9012void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9013 const OperandVector &Operands,
9014 bool IsAtomic) {
9015 OptionalImmIndexMap OptionalIdx;
9016 unsigned FirstOperandIdx = 1;
9017 bool IsAtomicReturn = false;
9018
9019 if (IsAtomic) {
9020 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9022 }
9023
9024 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9025 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9026
9027 // Add the register arguments
9028 if (Op.isReg()) {
9029 Op.addRegOperands(Inst, 1);
9030 // Insert a tied src for atomic return dst.
9031 // This cannot be postponed as subsequent calls to
9032 // addImmOperands rely on correct number of MC operands.
9033 if (IsAtomicReturn && i == FirstOperandIdx)
9034 Op.addRegOperands(Inst, 1);
9035 continue;
9036 }
9037
9038 // Handle the case where soffset is an immediate
9039 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9040 Op.addImmOperands(Inst, 1);
9041 continue;
9042 }
9043
9044 // Handle tokens like 'offen' which are sometimes hard-coded into the
9045 // asm string. There are no MCInst operands for these.
9046 if (Op.isToken()) {
9047 continue;
9048 }
9049 assert(Op.isImm());
9050
9051 // Handle optional arguments
9052 OptionalIdx[Op.getImmTy()] = i;
9053 }
9054
9055 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9057 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9058 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9060}
9061
9062//===----------------------------------------------------------------------===//
9063// smrd
9064//===----------------------------------------------------------------------===//
9065
9066bool AMDGPUOperand::isSMRDOffset8() const {
9067 return isImmLiteral() && isUInt<8>(getImm());
9068}
9069
9070bool AMDGPUOperand::isSMEMOffset() const {
9071 // Offset range is checked later by validator.
9072 return isImmLiteral();
9073}
9074
9075bool AMDGPUOperand::isSMRDLiteralOffset() const {
9076 // 32-bit literals are only supported on CI and we only want to use them
9077 // when the offset is > 8-bits.
9078 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9079}
9080
9081//===----------------------------------------------------------------------===//
9082// vop3
9083//===----------------------------------------------------------------------===//
9084
9085static bool ConvertOmodMul(int64_t &Mul) {
9086 if (Mul != 1 && Mul != 2 && Mul != 4)
9087 return false;
9088
9089 Mul >>= 1;
9090 return true;
9091}
9092
9093static bool ConvertOmodDiv(int64_t &Div) {
9094 if (Div == 1) {
9095 Div = 0;
9096 return true;
9097 }
9098
9099 if (Div == 2) {
9100 Div = 3;
9101 return true;
9102 }
9103
9104 return false;
9105}
9106
9107// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9108// This is intentional and ensures compatibility with sp3.
9109// See bug 35397 for details.
9110bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9111 if (BoundCtrl == 0 || BoundCtrl == 1) {
9112 if (!isGFX11Plus())
9113 BoundCtrl = 1;
9114 return true;
9115 }
9116 return false;
9117}
9118
9119void AMDGPUAsmParser::onBeginOfFile() {
9120 if (!getParser().getStreamer().getTargetStreamer() ||
9121 getSTI().getTargetTriple().getArch() == Triple::r600)
9122 return;
9123
9124 if (!getTargetStreamer().getTargetID())
9125 getTargetStreamer().initializeTargetID(getSTI(),
9126 getSTI().getFeatureString());
9127
9128 if (isHsaAbi(getSTI()))
9129 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9130}
9131
9132/// Parse AMDGPU specific expressions.
9133///
9134/// expr ::= or(expr, ...) |
9135/// max(expr, ...)
9136///
9137bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9138 using AGVK = AMDGPUMCExpr::VariantKind;
9139
9140 if (isToken(AsmToken::Identifier)) {
9141 StringRef TokenId = getTokenStr();
9142 AGVK VK = StringSwitch<AGVK>(TokenId)
9143 .Case("max", AGVK::AGVK_Max)
9144 .Case("or", AGVK::AGVK_Or)
9145 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9146 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9147 .Case("alignto", AGVK::AGVK_AlignTo)
9148 .Case("occupancy", AGVK::AGVK_Occupancy)
9149 .Default(AGVK::AGVK_None);
9150
9151 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9153 uint64_t CommaCount = 0;
9154 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9155 lex(); // Eat '('
9156 while (true) {
9157 if (trySkipToken(AsmToken::RParen)) {
9158 if (Exprs.empty()) {
9159 Error(getToken().getLoc(),
9160 "empty " + Twine(TokenId) + " expression");
9161 return true;
9162 }
9163 if (CommaCount + 1 != Exprs.size()) {
9164 Error(getToken().getLoc(),
9165 "mismatch of commas in " + Twine(TokenId) + " expression");
9166 return true;
9167 }
9168 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9169 return false;
9170 }
9171 const MCExpr *Expr;
9172 if (getParser().parseExpression(Expr, EndLoc))
9173 return true;
9174 Exprs.push_back(Expr);
9175 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9176 if (LastTokenWasComma)
9177 CommaCount++;
9178 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9179 Error(getToken().getLoc(),
9180 "unexpected token in " + Twine(TokenId) + " expression");
9181 return true;
9182 }
9183 }
9184 }
9185 }
9186 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9187}
9188
9189ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9190 StringRef Name = getTokenStr();
9191 if (Name == "mul") {
9192 return parseIntWithPrefix("mul", Operands,
9193 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9194 }
9195
9196 if (Name == "div") {
9197 return parseIntWithPrefix("div", Operands,
9198 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9199 }
9200
9201 return ParseStatus::NoMatch;
9202}
9203
9204// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9205// the number of src operands present, then copies that bit into src0_modifiers.
9206static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9207 int Opc = Inst.getOpcode();
9208 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9209 if (OpSelIdx == -1)
9210 return;
9211
9212 int SrcNum;
9213 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9214 AMDGPU::OpName::src2};
9215 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9216 ++SrcNum)
9217 ;
9218 assert(SrcNum > 0);
9219
9220 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9221
9222 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9223 if (DstIdx == -1)
9224 return;
9225
9226 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9227 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9228 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9229 if (DstOp.isReg() &&
9230 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9232 ModVal |= SISrcMods::DST_OP_SEL;
9233 } else {
9234 if ((OpSel & (1 << SrcNum)) != 0)
9235 ModVal |= SISrcMods::DST_OP_SEL;
9236 }
9237 Inst.getOperand(ModIdx).setImm(ModVal);
9238}
9239
9240void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9241 const OperandVector &Operands) {
9242 cvtVOP3P(Inst, Operands);
9243 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9244}
9245
9246void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9247 OptionalImmIndexMap &OptionalIdx) {
9248 cvtVOP3P(Inst, Operands, OptionalIdx);
9249 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9250}
9251
9252static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9253 return
9254 // 1. This operand is input modifiers
9255 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9256 // 2. This is not last operand
9257 && Desc.NumOperands > (OpNum + 1)
9258 // 3. Next operand is register class
9259 && Desc.operands()[OpNum + 1].RegClass != -1
9260 // 4. Next register is not tied to any other operand
9261 && Desc.getOperandConstraint(OpNum + 1,
9263}
9264
9265void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9266 unsigned Opc = Inst.getOpcode();
9267 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9268 AMDGPU::OpName::src2};
9269 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9270 AMDGPU::OpName::src1_modifiers,
9271 AMDGPU::OpName::src2_modifiers};
9272 for (int J = 0; J < 3; ++J) {
9273 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9274 if (OpIdx == -1)
9275 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9276 // no src1. So continue instead of break.
9277 continue;
9278
9279 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9280 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9281
9282 if ((OpSel & (1 << J)) != 0)
9283 ModVal |= SISrcMods::OP_SEL_0;
9284 // op_sel[3] is encoded in src0_modifiers.
9285 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9286 ModVal |= SISrcMods::DST_OP_SEL;
9287
9288 Inst.getOperand(ModIdx).setImm(ModVal);
9289 }
9290}
9291
9292void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9293{
9294 OptionalImmIndexMap OptionalIdx;
9295 unsigned Opc = Inst.getOpcode();
9296
9297 unsigned I = 1;
9298 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9299 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9300 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9301 }
9302
9303 for (unsigned E = Operands.size(); I != E; ++I) {
9304 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9306 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9307 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9308 Op.isInterpAttrChan()) {
9309 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9310 } else if (Op.isImmModifier()) {
9311 OptionalIdx[Op.getImmTy()] = I;
9312 } else {
9313 llvm_unreachable("unhandled operand type");
9314 }
9315 }
9316
9317 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9318 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9319 AMDGPUOperand::ImmTyHigh);
9320
9321 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9322 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9323 AMDGPUOperand::ImmTyClamp);
9324
9325 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9326 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9327 AMDGPUOperand::ImmTyOModSI);
9328
9329 // Some v_interp instructions use op_sel[3] for dst.
9330 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9331 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9332 AMDGPUOperand::ImmTyOpSel);
9333 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9334 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9335
9336 cvtOpSelHelper(Inst, OpSel);
9337 }
9338}
9339
9340void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9341{
9342 OptionalImmIndexMap OptionalIdx;
9343 unsigned Opc = Inst.getOpcode();
9344
9345 unsigned I = 1;
9346 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9347 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9348 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9349 }
9350
9351 for (unsigned E = Operands.size(); I != E; ++I) {
9352 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9354 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9355 } else if (Op.isImmModifier()) {
9356 OptionalIdx[Op.getImmTy()] = I;
9357 } else {
9358 llvm_unreachable("unhandled operand type");
9359 }
9360 }
9361
9362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9363
9364 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9365 if (OpSelIdx != -1)
9366 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9367
9368 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9369
9370 if (OpSelIdx == -1)
9371 return;
9372
9373 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9374 cvtOpSelHelper(Inst, OpSel);
9375}
9376
9377void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9378 const OperandVector &Operands) {
9379 OptionalImmIndexMap OptionalIdx;
9380 unsigned Opc = Inst.getOpcode();
9381 unsigned I = 1;
9382 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9383
9384 const MCInstrDesc &Desc = MII.get(Opc);
9385
9386 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9387 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9388
9389 for (unsigned E = Operands.size(); I != E; ++I) {
9390 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9391 int NumOperands = Inst.getNumOperands();
9392 // The order of operands in MCInst and parsed operands are different.
9393 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9394 // indices for parsing scale values correctly.
9395 if (NumOperands == CbszOpIdx) {
9398 }
9399 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9400 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9401 } else if (Op.isImmModifier()) {
9402 OptionalIdx[Op.getImmTy()] = I;
9403 } else {
9404 Op.addRegOrImmOperands(Inst, 1);
9405 }
9406 }
9407
9408 // Insert CBSZ and BLGP operands for F8F6F4 variants
9409 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9410 if (CbszIdx != OptionalIdx.end()) {
9411 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9412 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9413 }
9414
9415 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9416 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9417 if (BlgpIdx != OptionalIdx.end()) {
9418 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9419 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9420 }
9421
9422 // Add dummy src_modifiers
9425
9426 // Handle op_sel fields
9427
9428 unsigned OpSel = 0;
9429 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9430 if (OpselIdx != OptionalIdx.end()) {
9431 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9432 .getImm();
9433 }
9434
9435 unsigned OpSelHi = 0;
9436 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9437 if (OpselHiIdx != OptionalIdx.end()) {
9438 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9439 .getImm();
9440 }
9441 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9442 AMDGPU::OpName::src1_modifiers};
9443
9444 for (unsigned J = 0; J < 2; ++J) {
9445 unsigned ModVal = 0;
9446 if (OpSel & (1 << J))
9447 ModVal |= SISrcMods::OP_SEL_0;
9448 if (OpSelHi & (1 << J))
9449 ModVal |= SISrcMods::OP_SEL_1;
9450
9451 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9452 Inst.getOperand(ModIdx).setImm(ModVal);
9453 }
9454}
9455
9456void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9457 OptionalImmIndexMap &OptionalIdx) {
9458 unsigned Opc = Inst.getOpcode();
9459
9460 unsigned I = 1;
9461 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9462 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9463 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9464 }
9465
9466 for (unsigned E = Operands.size(); I != E; ++I) {
9467 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9469 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9470 } else if (Op.isImmModifier()) {
9471 OptionalIdx[Op.getImmTy()] = I;
9472 } else {
9473 Op.addRegOrImmOperands(Inst, 1);
9474 }
9475 }
9476
9477 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9478 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9479 AMDGPUOperand::ImmTyScaleSel);
9480
9481 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9482 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9483 AMDGPUOperand::ImmTyClamp);
9484
9485 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9486 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9487 Inst.addOperand(Inst.getOperand(0));
9488 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9489 AMDGPUOperand::ImmTyByteSel);
9490 }
9491
9492 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9493 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9494 AMDGPUOperand::ImmTyOModSI);
9495
9496 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9497 // it has src2 register operand that is tied to dst operand
9498 // we don't allow modifiers for this operand in assembler so src2_modifiers
9499 // should be 0.
9500 if (isMAC(Opc)) {
9501 auto *it = Inst.begin();
9502 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9503 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9504 ++it;
9505 // Copy the operand to ensure it's not invalidated when Inst grows.
9506 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9507 }
9508}
9509
9510void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9511 OptionalImmIndexMap OptionalIdx;
9512 cvtVOP3(Inst, Operands, OptionalIdx);
9513}
9514
9515void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9516 OptionalImmIndexMap &OptIdx) {
9517 const int Opc = Inst.getOpcode();
9518 const MCInstrDesc &Desc = MII.get(Opc);
9519
9520 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9521
9522 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9523 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9524 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9525 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9526 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9527 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9528 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9529 Inst.addOperand(Inst.getOperand(0));
9530 }
9531
9532 // Adding vdst_in operand is already covered for these DPP instructions in
9533 // cvtVOP3DPP.
9534 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9535 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9536 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9537 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9538 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9539 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9540 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9541 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9542 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9543 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9544 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9545 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9546 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9547 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9548 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9549 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9550 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9551 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9552 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9553 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9554 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9555 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9556 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9557 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9558 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9559 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9560 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9561 Inst.addOperand(Inst.getOperand(0));
9562 }
9563
9564 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9565 if (BitOp3Idx != -1) {
9566 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9567 }
9568
9569 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9570 // instruction, and then figure out where to actually put the modifiers
9571
9572 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9573 if (OpSelIdx != -1) {
9574 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9575 }
9576
9577 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9578 if (OpSelHiIdx != -1) {
9579 int DefaultVal = IsPacked ? -1 : 0;
9580 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9581 DefaultVal);
9582 }
9583
9584 int MatrixAFMTIdx =
9585 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9586 if (MatrixAFMTIdx != -1) {
9587 addOptionalImmOperand(Inst, Operands, OptIdx,
9588 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9589 }
9590
9591 int MatrixBFMTIdx =
9592 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9593 if (MatrixBFMTIdx != -1) {
9594 addOptionalImmOperand(Inst, Operands, OptIdx,
9595 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9596 }
9597
9598 int MatrixAScaleIdx =
9599 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9600 if (MatrixAScaleIdx != -1) {
9601 addOptionalImmOperand(Inst, Operands, OptIdx,
9602 AMDGPUOperand::ImmTyMatrixAScale, 0);
9603 }
9604
9605 int MatrixBScaleIdx =
9606 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9607 if (MatrixBScaleIdx != -1) {
9608 addOptionalImmOperand(Inst, Operands, OptIdx,
9609 AMDGPUOperand::ImmTyMatrixBScale, 0);
9610 }
9611
9612 int MatrixAScaleFmtIdx =
9613 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9614 if (MatrixAScaleFmtIdx != -1) {
9615 addOptionalImmOperand(Inst, Operands, OptIdx,
9616 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9617 }
9618
9619 int MatrixBScaleFmtIdx =
9620 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9621 if (MatrixBScaleFmtIdx != -1) {
9622 addOptionalImmOperand(Inst, Operands, OptIdx,
9623 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9624 }
9625
9626 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9627 addOptionalImmOperand(Inst, Operands, OptIdx,
9628 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9629
9630 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9631 addOptionalImmOperand(Inst, Operands, OptIdx,
9632 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9633
9634 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9635 if (NegLoIdx != -1)
9636 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9637
9638 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9639 if (NegHiIdx != -1)
9640 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9641
9642 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9643 AMDGPU::OpName::src2};
9644 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9645 AMDGPU::OpName::src1_modifiers,
9646 AMDGPU::OpName::src2_modifiers};
9647
9648 unsigned OpSel = 0;
9649 unsigned OpSelHi = 0;
9650 unsigned NegLo = 0;
9651 unsigned NegHi = 0;
9652
9653 if (OpSelIdx != -1)
9654 OpSel = Inst.getOperand(OpSelIdx).getImm();
9655
9656 if (OpSelHiIdx != -1)
9657 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9658
9659 if (NegLoIdx != -1)
9660 NegLo = Inst.getOperand(NegLoIdx).getImm();
9661
9662 if (NegHiIdx != -1)
9663 NegHi = Inst.getOperand(NegHiIdx).getImm();
9664
9665 for (int J = 0; J < 3; ++J) {
9666 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9667 if (OpIdx == -1)
9668 break;
9669
9670 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9671
9672 if (ModIdx == -1)
9673 continue;
9674
9675 uint32_t ModVal = 0;
9676
9677 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9678 if (SrcOp.isReg() && getMRI()
9679 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9680 .contains(SrcOp.getReg())) {
9681 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9682 if (VGPRSuffixIsHi)
9683 ModVal |= SISrcMods::OP_SEL_0;
9684 } else {
9685 if ((OpSel & (1 << J)) != 0)
9686 ModVal |= SISrcMods::OP_SEL_0;
9687 }
9688
9689 if ((OpSelHi & (1 << J)) != 0)
9690 ModVal |= SISrcMods::OP_SEL_1;
9691
9692 if ((NegLo & (1 << J)) != 0)
9693 ModVal |= SISrcMods::NEG;
9694
9695 if ((NegHi & (1 << J)) != 0)
9696 ModVal |= SISrcMods::NEG_HI;
9697
9698 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9699 }
9700}
9701
9702void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9703 OptionalImmIndexMap OptIdx;
9704 cvtVOP3(Inst, Operands, OptIdx);
9705 cvtVOP3P(Inst, Operands, OptIdx);
9706}
9707
9709 unsigned i, unsigned Opc,
9710 AMDGPU::OpName OpName) {
9711 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9712 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9713 else
9714 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9715}
9716
9717void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9718 unsigned Opc = Inst.getOpcode();
9719
9720 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9721 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9722 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9723 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9724 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9725
9726 OptionalImmIndexMap OptIdx;
9727 for (unsigned i = 5; i < Operands.size(); ++i) {
9728 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9729 OptIdx[Op.getImmTy()] = i;
9730 }
9731
9732 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9733 addOptionalImmOperand(Inst, Operands, OptIdx,
9734 AMDGPUOperand::ImmTyIndexKey8bit);
9735
9736 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9737 addOptionalImmOperand(Inst, Operands, OptIdx,
9738 AMDGPUOperand::ImmTyIndexKey16bit);
9739
9740 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9741 addOptionalImmOperand(Inst, Operands, OptIdx,
9742 AMDGPUOperand::ImmTyIndexKey32bit);
9743
9744 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9745 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9746
9747 cvtVOP3P(Inst, Operands, OptIdx);
9748}
9749
9750//===----------------------------------------------------------------------===//
9751// VOPD
9752//===----------------------------------------------------------------------===//
9753
9754ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9755 if (!hasVOPD(getSTI()))
9756 return ParseStatus::NoMatch;
9757
9758 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9759 SMLoc S = getLoc();
9760 lex();
9761 lex();
9762 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9763 SMLoc OpYLoc = getLoc();
9764 StringRef OpYName;
9765 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9766 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9767 return ParseStatus::Success;
9768 }
9769 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9770 }
9771 return ParseStatus::NoMatch;
9772}
9773
9774// Create VOPD MCInst operands using parsed assembler operands.
9775void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9776 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9777
9778 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9779 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9781 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9782 return;
9783 }
9784 if (Op.isReg()) {
9785 Op.addRegOperands(Inst, 1);
9786 return;
9787 }
9788 if (Op.isImm()) {
9789 Op.addImmOperands(Inst, 1);
9790 return;
9791 }
9792 llvm_unreachable("Unhandled operand type in cvtVOPD");
9793 };
9794
9795 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9796
9797 // MCInst operands are ordered as follows:
9798 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9799
9800 for (auto CompIdx : VOPD::COMPONENTS) {
9801 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9802 }
9803
9804 for (auto CompIdx : VOPD::COMPONENTS) {
9805 const auto &CInfo = InstInfo[CompIdx];
9806 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9807 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9808 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9809 if (CInfo.hasSrc2Acc())
9810 addOp(CInfo.getIndexOfDstInParsedOperands());
9811 }
9812
9813 int BitOp3Idx =
9814 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9815 if (BitOp3Idx != -1) {
9816 OptionalImmIndexMap OptIdx;
9817 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9818 if (Op.isImm())
9819 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9820
9821 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9822 }
9823}
9824
9825//===----------------------------------------------------------------------===//
9826// dpp
9827//===----------------------------------------------------------------------===//
9828
9829bool AMDGPUOperand::isDPP8() const {
9830 return isImmTy(ImmTyDPP8);
9831}
9832
9833bool AMDGPUOperand::isDPPCtrl() const {
9834 using namespace AMDGPU::DPP;
9835
9836 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9837 if (result) {
9838 int64_t Imm = getImm();
9839 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9840 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9841 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9842 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9843 (Imm == DppCtrl::WAVE_SHL1) ||
9844 (Imm == DppCtrl::WAVE_ROL1) ||
9845 (Imm == DppCtrl::WAVE_SHR1) ||
9846 (Imm == DppCtrl::WAVE_ROR1) ||
9847 (Imm == DppCtrl::ROW_MIRROR) ||
9848 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9849 (Imm == DppCtrl::BCAST15) ||
9850 (Imm == DppCtrl::BCAST31) ||
9851 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9852 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9853 }
9854 return false;
9855}
9856
9857//===----------------------------------------------------------------------===//
9858// mAI
9859//===----------------------------------------------------------------------===//
9860
9861bool AMDGPUOperand::isBLGP() const {
9862 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9863}
9864
9865bool AMDGPUOperand::isS16Imm() const {
9866 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9867}
9868
9869bool AMDGPUOperand::isU16Imm() const {
9870 return isImmLiteral() && isUInt<16>(getImm());
9871}
9872
9873//===----------------------------------------------------------------------===//
9874// dim
9875//===----------------------------------------------------------------------===//
9876
9877bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9878 // We want to allow "dim:1D" etc.,
9879 // but the initial 1 is tokenized as an integer.
9880 std::string Token;
9881 if (isToken(AsmToken::Integer)) {
9882 SMLoc Loc = getToken().getEndLoc();
9883 Token = std::string(getTokenStr());
9884 lex();
9885 if (getLoc() != Loc)
9886 return false;
9887 }
9888
9889 StringRef Suffix;
9890 if (!parseId(Suffix))
9891 return false;
9892 Token += Suffix;
9893
9894 StringRef DimId = Token;
9895 DimId.consume_front("SQ_RSRC_IMG_");
9896
9897 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9898 if (!DimInfo)
9899 return false;
9900
9901 Encoding = DimInfo->Encoding;
9902 return true;
9903}
9904
9905ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9906 if (!isGFX10Plus())
9907 return ParseStatus::NoMatch;
9908
9909 SMLoc S = getLoc();
9910
9911 if (!trySkipId("dim", AsmToken::Colon))
9912 return ParseStatus::NoMatch;
9913
9914 unsigned Encoding;
9915 SMLoc Loc = getLoc();
9916 if (!parseDimId(Encoding))
9917 return Error(Loc, "invalid dim value");
9918
9919 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9920 AMDGPUOperand::ImmTyDim));
9921 return ParseStatus::Success;
9922}
9923
9924//===----------------------------------------------------------------------===//
9925// dpp
9926//===----------------------------------------------------------------------===//
9927
9928ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9929 SMLoc S = getLoc();
9930
9931 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9932 return ParseStatus::NoMatch;
9933
9934 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9935
9936 int64_t Sels[8];
9937
9938 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9939 return ParseStatus::Failure;
9940
9941 for (size_t i = 0; i < 8; ++i) {
9942 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9943 return ParseStatus::Failure;
9944
9945 SMLoc Loc = getLoc();
9946 if (getParser().parseAbsoluteExpression(Sels[i]))
9947 return ParseStatus::Failure;
9948 if (0 > Sels[i] || 7 < Sels[i])
9949 return Error(Loc, "expected a 3-bit value");
9950 }
9951
9952 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9953 return ParseStatus::Failure;
9954
9955 unsigned DPP8 = 0;
9956 for (size_t i = 0; i < 8; ++i)
9957 DPP8 |= (Sels[i] << (i * 3));
9958
9959 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9960 return ParseStatus::Success;
9961}
9962
9963bool
9964AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9965 const OperandVector &Operands) {
9966 if (Ctrl == "row_newbcast")
9967 return isGFX90A();
9968
9969 if (Ctrl == "row_share" ||
9970 Ctrl == "row_xmask")
9971 return isGFX10Plus();
9972
9973 if (Ctrl == "wave_shl" ||
9974 Ctrl == "wave_shr" ||
9975 Ctrl == "wave_rol" ||
9976 Ctrl == "wave_ror" ||
9977 Ctrl == "row_bcast")
9978 return isVI() || isGFX9();
9979
9980 return Ctrl == "row_mirror" ||
9981 Ctrl == "row_half_mirror" ||
9982 Ctrl == "quad_perm" ||
9983 Ctrl == "row_shl" ||
9984 Ctrl == "row_shr" ||
9985 Ctrl == "row_ror";
9986}
9987
9988int64_t
9989AMDGPUAsmParser::parseDPPCtrlPerm() {
9990 // quad_perm:[%d,%d,%d,%d]
9991
9992 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9993 return -1;
9994
9995 int64_t Val = 0;
9996 for (int i = 0; i < 4; ++i) {
9997 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9998 return -1;
9999
10000 int64_t Temp;
10001 SMLoc Loc = getLoc();
10002 if (getParser().parseAbsoluteExpression(Temp))
10003 return -1;
10004 if (Temp < 0 || Temp > 3) {
10005 Error(Loc, "expected a 2-bit value");
10006 return -1;
10007 }
10008
10009 Val += (Temp << i * 2);
10010 }
10011
10012 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10013 return -1;
10014
10015 return Val;
10016}
10017
10018int64_t
10019AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10020 using namespace AMDGPU::DPP;
10021
10022 // sel:%d
10023
10024 int64_t Val;
10025 SMLoc Loc = getLoc();
10026
10027 if (getParser().parseAbsoluteExpression(Val))
10028 return -1;
10029
10030 struct DppCtrlCheck {
10031 int64_t Ctrl;
10032 int Lo;
10033 int Hi;
10034 };
10035
10036 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10037 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10038 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10039 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10040 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10041 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10042 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10043 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10044 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10045 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10046 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10047 .Default({-1, 0, 0});
10048
10049 bool Valid;
10050 if (Check.Ctrl == -1) {
10051 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10052 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10053 } else {
10054 Valid = Check.Lo <= Val && Val <= Check.Hi;
10055 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10056 }
10057
10058 if (!Valid) {
10059 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10060 return -1;
10061 }
10062
10063 return Val;
10064}
10065
10066ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10067 using namespace AMDGPU::DPP;
10068
10069 if (!isToken(AsmToken::Identifier) ||
10070 !isSupportedDPPCtrl(getTokenStr(), Operands))
10071 return ParseStatus::NoMatch;
10072
10073 SMLoc S = getLoc();
10074 int64_t Val = -1;
10075 StringRef Ctrl;
10076
10077 parseId(Ctrl);
10078
10079 if (Ctrl == "row_mirror") {
10080 Val = DppCtrl::ROW_MIRROR;
10081 } else if (Ctrl == "row_half_mirror") {
10082 Val = DppCtrl::ROW_HALF_MIRROR;
10083 } else {
10084 if (skipToken(AsmToken::Colon, "expected a colon")) {
10085 if (Ctrl == "quad_perm") {
10086 Val = parseDPPCtrlPerm();
10087 } else {
10088 Val = parseDPPCtrlSel(Ctrl);
10089 }
10090 }
10091 }
10092
10093 if (Val == -1)
10094 return ParseStatus::Failure;
10095
10096 Operands.push_back(
10097 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10098 return ParseStatus::Success;
10099}
10100
10101void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10102 bool IsDPP8) {
10103 OptionalImmIndexMap OptionalIdx;
10104 unsigned Opc = Inst.getOpcode();
10105 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10106
10107 // MAC instructions are special because they have 'old'
10108 // operand which is not tied to dst (but assumed to be).
10109 // They also have dummy unused src2_modifiers.
10110 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10111 int Src2ModIdx =
10112 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10113 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10114 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10115
10116 unsigned I = 1;
10117 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10118 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10119 }
10120
10121 int Fi = 0;
10122 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10123 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10124 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10125 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10126 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10127
10128 for (unsigned E = Operands.size(); I != E; ++I) {
10129
10130 if (IsMAC) {
10131 int NumOperands = Inst.getNumOperands();
10132 if (OldIdx == NumOperands) {
10133 // Handle old operand
10134 constexpr int DST_IDX = 0;
10135 Inst.addOperand(Inst.getOperand(DST_IDX));
10136 } else if (Src2ModIdx == NumOperands) {
10137 // Add unused dummy src2_modifiers
10139 }
10140 }
10141
10142 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10143 Inst.addOperand(Inst.getOperand(0));
10144 }
10145
10146 if (IsVOP3CvtSrDpp) {
10147 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10149 Inst.addOperand(MCOperand::createReg(MCRegister()));
10150 }
10151 }
10152
10153 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10155 if (TiedTo != -1) {
10156 assert((unsigned)TiedTo < Inst.getNumOperands());
10157 // handle tied old or src2 for MAC instructions
10158 Inst.addOperand(Inst.getOperand(TiedTo));
10159 }
10160 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10161 // Add the register arguments
10162 if (IsDPP8 && Op.isDppFI()) {
10163 Fi = Op.getImm();
10164 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10165 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10166 } else if (Op.isReg()) {
10167 Op.addRegOperands(Inst, 1);
10168 } else if (Op.isImm() &&
10169 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10170 Op.addImmOperands(Inst, 1);
10171 } else if (Op.isImm()) {
10172 OptionalIdx[Op.getImmTy()] = I;
10173 } else {
10174 llvm_unreachable("unhandled operand type");
10175 }
10176 }
10177
10178 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10179 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10180 AMDGPUOperand::ImmTyClamp);
10181
10182 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10183 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10184 Inst.addOperand(Inst.getOperand(0));
10185 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10186 AMDGPUOperand::ImmTyByteSel);
10187 }
10188
10189 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10190 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10191
10192 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10193 cvtVOP3P(Inst, Operands, OptionalIdx);
10194 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10195 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10196 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10198 }
10199
10200 if (IsDPP8) {
10201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10202 using namespace llvm::AMDGPU::DPP;
10203 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10204 } else {
10205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10206 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10207 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10208 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10209
10210 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10211 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10212 AMDGPUOperand::ImmTyDppFI);
10213 }
10214}
10215
10216void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10217 OptionalImmIndexMap OptionalIdx;
10218
10219 unsigned I = 1;
10220 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10221 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10222 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10223 }
10224
10225 int Fi = 0;
10226 for (unsigned E = Operands.size(); I != E; ++I) {
10227 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10229 if (TiedTo != -1) {
10230 assert((unsigned)TiedTo < Inst.getNumOperands());
10231 // handle tied old or src2 for MAC instructions
10232 Inst.addOperand(Inst.getOperand(TiedTo));
10233 }
10234 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10235 // Add the register arguments
10236 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10237 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10238 // Skip it.
10239 continue;
10240 }
10241
10242 if (IsDPP8) {
10243 if (Op.isDPP8()) {
10244 Op.addImmOperands(Inst, 1);
10245 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10246 Op.addRegWithFPInputModsOperands(Inst, 2);
10247 } else if (Op.isDppFI()) {
10248 Fi = Op.getImm();
10249 } else if (Op.isReg()) {
10250 Op.addRegOperands(Inst, 1);
10251 } else {
10252 llvm_unreachable("Invalid operand type");
10253 }
10254 } else {
10256 Op.addRegWithFPInputModsOperands(Inst, 2);
10257 } else if (Op.isReg()) {
10258 Op.addRegOperands(Inst, 1);
10259 } else if (Op.isDPPCtrl()) {
10260 Op.addImmOperands(Inst, 1);
10261 } else if (Op.isImm()) {
10262 // Handle optional arguments
10263 OptionalIdx[Op.getImmTy()] = I;
10264 } else {
10265 llvm_unreachable("Invalid operand type");
10266 }
10267 }
10268 }
10269
10270 if (IsDPP8) {
10271 using namespace llvm::AMDGPU::DPP;
10272 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10273 } else {
10274 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10275 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10276 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10277 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10278 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10279 AMDGPUOperand::ImmTyDppFI);
10280 }
10281 }
10282}
10283
10284//===----------------------------------------------------------------------===//
10285// sdwa
10286//===----------------------------------------------------------------------===//
10287
10288ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10289 StringRef Prefix,
10290 AMDGPUOperand::ImmTy Type) {
10291 return parseStringOrIntWithPrefix(
10292 Operands, Prefix,
10293 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10294 Type);
10295}
10296
10297ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10298 return parseStringOrIntWithPrefix(
10299 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10300 AMDGPUOperand::ImmTySDWADstUnused);
10301}
10302
10303void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10304 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10305}
10306
10307void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10308 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10309}
10310
10311void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10312 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10313}
10314
10315void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10316 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10317}
10318
10319void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10320 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10321}
10322
10323void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10324 uint64_t BasicInstType,
10325 bool SkipDstVcc,
10326 bool SkipSrcVcc) {
10327 using namespace llvm::AMDGPU::SDWA;
10328
10329 OptionalImmIndexMap OptionalIdx;
10330 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10331 bool SkippedVcc = false;
10332
10333 unsigned I = 1;
10334 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10335 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10336 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10337 }
10338
10339 for (unsigned E = Operands.size(); I != E; ++I) {
10340 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10341 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10342 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10343 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10344 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10345 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10346 // Skip VCC only if we didn't skip it on previous iteration.
10347 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10348 if (BasicInstType == SIInstrFlags::VOP2 &&
10349 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10350 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10351 SkippedVcc = true;
10352 continue;
10353 }
10354 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10355 SkippedVcc = true;
10356 continue;
10357 }
10358 }
10360 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10361 } else if (Op.isImm()) {
10362 // Handle optional arguments
10363 OptionalIdx[Op.getImmTy()] = I;
10364 } else {
10365 llvm_unreachable("Invalid operand type");
10366 }
10367 SkippedVcc = false;
10368 }
10369
10370 const unsigned Opc = Inst.getOpcode();
10371 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10372 Opc != AMDGPU::V_NOP_sdwa_vi) {
10373 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10374 switch (BasicInstType) {
10375 case SIInstrFlags::VOP1:
10376 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10377 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10378 AMDGPUOperand::ImmTyClamp, 0);
10379
10380 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10381 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10382 AMDGPUOperand::ImmTyOModSI, 0);
10383
10384 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10385 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10386 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10387
10388 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10389 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10390 AMDGPUOperand::ImmTySDWADstUnused,
10391 DstUnused::UNUSED_PRESERVE);
10392
10393 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10394 break;
10395
10396 case SIInstrFlags::VOP2:
10397 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10398 AMDGPUOperand::ImmTyClamp, 0);
10399
10400 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10401 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10402
10403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10404 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10405 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10406 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10407 break;
10408
10409 case SIInstrFlags::VOPC:
10410 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10411 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10412 AMDGPUOperand::ImmTyClamp, 0);
10413 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10415 break;
10416
10417 default:
10418 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10419 }
10420 }
10421
10422 // special case v_mac_{f16, f32}:
10423 // it has src2 register operand that is tied to dst operand
10424 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10425 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10426 auto *it = Inst.begin();
10427 std::advance(
10428 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10429 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10430 }
10431}
10432
10433/// Force static initialization.
10434extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10439
10440#define GET_MATCHER_IMPLEMENTATION
10441#define GET_MNEMONIC_SPELL_CHECKER
10442#define GET_MNEMONIC_CHECKER
10443#include "AMDGPUGenAsmMatcher.inc"
10444
10445ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10446 unsigned MCK) {
10447 switch (MCK) {
10448 case MCK_addr64:
10449 return parseTokenOp("addr64", Operands);
10450 case MCK_done:
10451 return parseTokenOp("done", Operands);
10452 case MCK_idxen:
10453 return parseTokenOp("idxen", Operands);
10454 case MCK_lds:
10455 return parseTokenOp("lds", Operands);
10456 case MCK_offen:
10457 return parseTokenOp("offen", Operands);
10458 case MCK_off:
10459 return parseTokenOp("off", Operands);
10460 case MCK_row_95_en:
10461 return parseTokenOp("row_en", Operands);
10462 case MCK_gds:
10463 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10464 case MCK_tfe:
10465 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10466 }
10467 return tryCustomParseOperand(Operands, MCK);
10468}
10469
10470// This function should be defined after auto-generated include so that we have
10471// MatchClassKind enum defined
10472unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10473 unsigned Kind) {
10474 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10475 // But MatchInstructionImpl() expects to meet token and fails to validate
10476 // operand. This method checks if we are given immediate operand but expect to
10477 // get corresponding token.
10478 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10479 switch (Kind) {
10480 case MCK_addr64:
10481 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10482 case MCK_gds:
10483 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10484 case MCK_lds:
10485 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10486 case MCK_idxen:
10487 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10488 case MCK_offen:
10489 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10490 case MCK_tfe:
10491 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10492 case MCK_SSrc_b32:
10493 // When operands have expression values, they will return true for isToken,
10494 // because it is not possible to distinguish between a token and an
10495 // expression at parse time. MatchInstructionImpl() will always try to
10496 // match an operand as a token, when isToken returns true, and when the
10497 // name of the expression is not a valid token, the match will fail,
10498 // so we need to handle it here.
10499 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10500 case MCK_SSrc_f32:
10501 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10502 case MCK_SOPPBrTarget:
10503 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10504 case MCK_VReg32OrOff:
10505 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10506 case MCK_InterpSlot:
10507 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10508 case MCK_InterpAttr:
10509 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10510 case MCK_InterpAttrChan:
10511 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10512 case MCK_SReg_64:
10513 case MCK_SReg_64_XEXEC:
10514 // Null is defined as a 32-bit register but
10515 // it should also be enabled with 64-bit operands or larger.
10516 // The following code enables it for SReg_64 and larger operands
10517 // used as source and destination. Remaining source
10518 // operands are handled in isInlinableImm.
10519 case MCK_SReg_96:
10520 case MCK_SReg_128:
10521 case MCK_SReg_256:
10522 case MCK_SReg_512:
10523 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10524 default:
10525 return Match_InvalidOperand;
10526 }
10527}
10528
10529//===----------------------------------------------------------------------===//
10530// endpgm
10531//===----------------------------------------------------------------------===//
10532
10533ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10534 SMLoc S = getLoc();
10535 int64_t Imm = 0;
10536
10537 if (!parseExpr(Imm)) {
10538 // The operand is optional, if not present default to 0
10539 Imm = 0;
10540 }
10541
10542 if (!isUInt<16>(Imm))
10543 return Error(S, "expected a 16-bit value");
10544
10545 Operands.push_back(
10546 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10547 return ParseStatus::Success;
10548}
10549
10550bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10551
10552//===----------------------------------------------------------------------===//
10553// Split Barrier
10554//===----------------------------------------------------------------------===//
10555
10556bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5975
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:667
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:233
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:210
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:207
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:218
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:227
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:239
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:223
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:225
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:215
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:240
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:222
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:204
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:230
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1430
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...