LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 } else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
106 } else {
107 return 0;
108 }
109 }
110
111 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
112 };
113
114 enum ImmTy {
115 ImmTyNone,
116 ImmTyGDS,
117 ImmTyLDS,
118 ImmTyOffen,
119 ImmTyIdxen,
120 ImmTyAddr64,
121 ImmTyOffset,
122 ImmTyInstOffset,
123 ImmTyOffset0,
124 ImmTyOffset1,
125 ImmTySMEMOffsetMod,
126 ImmTyCPol,
127 ImmTyTFE,
128 ImmTyD16,
129 ImmTyClampSI,
130 ImmTyOModSI,
131 ImmTySDWADstSel,
132 ImmTySDWASrc0Sel,
133 ImmTySDWASrc1Sel,
134 ImmTySDWADstUnused,
135 ImmTyDMask,
136 ImmTyDim,
137 ImmTyUNorm,
138 ImmTyDA,
139 ImmTyR128A16,
140 ImmTyA16,
141 ImmTyLWE,
142 ImmTyExpTgt,
143 ImmTyExpCompr,
144 ImmTyExpVM,
145 ImmTyFORMAT,
146 ImmTyHwreg,
147 ImmTyOff,
148 ImmTySendMsg,
149 ImmTyInterpSlot,
150 ImmTyInterpAttr,
151 ImmTyInterpAttrChan,
152 ImmTyOpSel,
153 ImmTyOpSelHi,
154 ImmTyNegLo,
155 ImmTyNegHi,
156 ImmTyIndexKey8bit,
157 ImmTyIndexKey16bit,
158 ImmTyDPP8,
159 ImmTyDppCtrl,
160 ImmTyDppRowMask,
161 ImmTyDppBankMask,
162 ImmTyDppBoundCtrl,
163 ImmTyDppFI,
164 ImmTySwizzle,
165 ImmTyGprIdxMode,
166 ImmTyHigh,
167 ImmTyBLGP,
168 ImmTyCBSZ,
169 ImmTyABID,
170 ImmTyEndpgm,
171 ImmTyWaitVDST,
172 ImmTyWaitEXP,
173 ImmTyWaitVAVDst,
174 ImmTyWaitVMVSrc,
175 };
176
177 // Immediate operand kind.
178 // It helps to identify the location of an offending operand after an error.
179 // Note that regular literals and mandatory literals (KImm) must be handled
180 // differently. When looking for an offending operand, we should usually
181 // ignore mandatory literals because they are part of the instruction and
182 // cannot be changed. Report location of mandatory operands only for VOPD,
183 // when both OpX and OpY have a KImm and there are no other literals.
184 enum ImmKindTy {
185 ImmKindTyNone,
186 ImmKindTyLiteral,
187 ImmKindTyMandatoryLiteral,
188 ImmKindTyConst,
189 };
190
191private:
192 struct TokOp {
193 const char *Data;
194 unsigned Length;
195 };
196
197 struct ImmOp {
198 int64_t Val;
199 ImmTy Type;
200 bool IsFPImm;
201 mutable ImmKindTy Kind;
202 Modifiers Mods;
203 };
204
205 struct RegOp {
206 unsigned RegNo;
207 Modifiers Mods;
208 };
209
210 union {
211 TokOp Tok;
212 ImmOp Imm;
213 RegOp Reg;
214 const MCExpr *Expr;
215 };
216
217public:
218 bool isToken() const override { return Kind == Token; }
219
220 bool isSymbolRefExpr() const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
222 }
223
224 bool isImm() const override {
225 return Kind == Immediate;
226 }
227
228 void setImmKindNone() const {
229 assert(isImm());
230 Imm.Kind = ImmKindTyNone;
231 }
232
233 void setImmKindLiteral() const {
234 assert(isImm());
235 Imm.Kind = ImmKindTyLiteral;
236 }
237
238 void setImmKindMandatoryLiteral() const {
239 assert(isImm());
240 Imm.Kind = ImmKindTyMandatoryLiteral;
241 }
242
243 void setImmKindConst() const {
244 assert(isImm());
245 Imm.Kind = ImmKindTyConst;
246 }
247
248 bool IsImmKindLiteral() const {
249 return isImm() && Imm.Kind == ImmKindTyLiteral;
250 }
251
252 bool IsImmKindMandatoryLiteral() const {
253 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
254 }
255
256 bool isImmKindConst() const {
257 return isImm() && Imm.Kind == ImmKindTyConst;
258 }
259
260 bool isInlinableImm(MVT type) const;
261 bool isLiteralImm(MVT type) const;
262
263 bool isRegKind() const {
264 return Kind == Register;
265 }
266
267 bool isReg() const override {
268 return isRegKind() && !hasModifiers();
269 }
270
271 bool isRegOrInline(unsigned RCID, MVT type) const {
272 return isRegClass(RCID) || isInlinableImm(type);
273 }
274
275 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
277 }
278
279 bool isRegOrImmWithInt16InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281 }
282
283 bool isRegOrImmWithIntT16InputMods() const {
284 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
285 }
286
287 bool isRegOrImmWithInt32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
289 }
290
291 bool isRegOrInlineImmWithInt16InputMods() const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
293 }
294
295 bool isRegOrInlineImmWithInt32InputMods() const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
297 }
298
299 bool isRegOrImmWithInt64InputMods() const {
300 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
301 }
302
303 bool isRegOrImmWithFP16InputMods() const {
304 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
305 }
306
307 bool isRegOrImmWithFPT16InputMods() const {
308 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
309 }
310
311 bool isRegOrImmWithFP32InputMods() const {
312 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
313 }
314
315 bool isRegOrImmWithFP64InputMods() const {
316 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
317 }
318
319 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
322 }
323
324 bool isRegOrInlineImmWithFP32InputMods() const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
326 }
327
328 bool isPackedFP16InputMods() const {
329 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
330 }
331
332 bool isVReg() const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
342 }
343
344 bool isVReg32() const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
346 }
347
348 bool isVReg32OrOff() const {
349 return isOff() || isVReg32();
350 }
351
352 bool isNull() const {
353 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
354 }
355
356 bool isVRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDMask() const { return isImmTy(ImmTyDMask); }
379 bool isDim() const { return isImmTy(ImmTyDim); }
380 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
381 bool isOff() const { return isImmTy(ImmTyOff); }
382 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
383 bool isOffen() const { return isImmTy(ImmTyOffen); }
384 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
385 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
386 bool isOffset() const { return isImmTy(ImmTyOffset); }
387 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
388 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
389 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
390 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
391 bool isGDS() const { return isImmTy(ImmTyGDS); }
392 bool isLDS() const { return isImmTy(ImmTyLDS); }
393 bool isCPol() const { return isImmTy(ImmTyCPol); }
394 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
395 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
396 bool isTFE() const { return isImmTy(ImmTyTFE); }
397 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
399 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
400 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
401 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
402 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
403 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
404 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
405 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
406 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
407 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
408 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
409 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
410 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
411 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
412 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
413
414 bool isRegOrImm() const {
415 return isReg() || isImm();
416 }
417
418 bool isRegClass(unsigned RCID) const;
419
420 bool isInlineValue() const;
421
422 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
423 return isRegOrInline(RCID, type) && !hasModifiers();
424 }
425
426 bool isSCSrcB16() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
428 }
429
430 bool isSCSrcV2B16() const {
431 return isSCSrcB16();
432 }
433
434 bool isSCSrc_b32() const {
435 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
436 }
437
438 bool isSCSrc_b64() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
440 }
441
442 bool isBoolReg() const;
443
444 bool isSCSrcF16() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
446 }
447
448 bool isSCSrcV2F16() const {
449 return isSCSrcF16();
450 }
451
452 bool isSCSrcF32() const {
453 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
454 }
455
456 bool isSCSrcF64() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
458 }
459
460 bool isSSrc_b32() const {
461 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
462 }
463
464 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
465
466 bool isSSrcV2B16() const {
467 llvm_unreachable("cannot happen");
468 return isSSrc_b16();
469 }
470
471 bool isSSrc_b64() const {
472 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
473 // See isVSrc64().
474 return isSCSrc_b64() || isLiteralImm(MVT::i64);
475 }
476
477 bool isSSrc_f32() const {
478 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
479 }
480
481 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
482
483 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
484
485 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
486
487 bool isSSrcV2F16() const {
488 llvm_unreachable("cannot happen");
489 return isSSrc_f16();
490 }
491
492 bool isSSrcV2FP32() const {
493 llvm_unreachable("cannot happen");
494 return isSSrc_f32();
495 }
496
497 bool isSCSrcV2FP32() const {
498 llvm_unreachable("cannot happen");
499 return isSCSrcF32();
500 }
501
502 bool isSSrcV2INT32() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_b32();
505 }
506
507 bool isSCSrcV2INT32() const {
508 llvm_unreachable("cannot happen");
509 return isSCSrc_b32();
510 }
511
512 bool isSSrcOrLds_b32() const {
513 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
514 isLiteralImm(MVT::i32) || isExpr();
515 }
516
517 bool isVCSrc_b32() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
519 }
520
521 bool isVCSrcB64() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
523 }
524
525 bool isVCSrcTB16() const {
526 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
527 }
528
529 bool isVCSrcTB16_Lo128() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
531 }
532
533 bool isVCSrcFake16B16_Lo128() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
535 }
536
537 bool isVCSrc_b16() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
539 }
540
541 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
542
543 bool isVCSrc_f32() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
545 }
546
547 bool isVCSrcF64() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
549 }
550
551 bool isVCSrcTBF16() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
553 }
554
555 bool isVCSrcTF16() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
557 }
558
559 bool isVCSrcTBF16_Lo128() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
561 }
562
563 bool isVCSrcTF16_Lo128() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
565 }
566
567 bool isVCSrcFake16BF16_Lo128() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
569 }
570
571 bool isVCSrcFake16F16_Lo128() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
573 }
574
575 bool isVCSrc_bf16() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
577 }
578
579 bool isVCSrc_f16() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
581 }
582
583 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
584
585 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
586
587 bool isVSrc_b32() const {
588 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
589 }
590
591 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
592
593 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
594
595 bool isVSrcT_b16_Lo128() const {
596 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
597 }
598
599 bool isVSrcFake16_b16_Lo128() const {
600 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
601 }
602
603 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
604
605 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
606
607 bool isVCSrcV2FP32() const {
608 return isVCSrcF64();
609 }
610
611 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
612
613 bool isVCSrcV2INT32() const {
614 return isVCSrcB64();
615 }
616
617 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
618
619 bool isVSrc_f32() const {
620 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
621 }
622
623 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
624
625 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
626
627 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
628
629 bool isVSrcT_bf16_Lo128() const {
630 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
631 }
632
633 bool isVSrcT_f16_Lo128() const {
634 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
635 }
636
637 bool isVSrcFake16_bf16_Lo128() const {
638 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
639 }
640
641 bool isVSrcFake16_f16_Lo128() const {
642 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
643 }
644
645 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
646
647 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
648
649 bool isVSrc_v2bf16() const {
650 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
651 }
652
653 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
654
655 bool isVISrcB32() const {
656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
657 }
658
659 bool isVISrcB16() const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
661 }
662
663 bool isVISrcV2B16() const {
664 return isVISrcB16();
665 }
666
667 bool isVISrcF32() const {
668 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
669 }
670
671 bool isVISrcF16() const {
672 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
673 }
674
675 bool isVISrcV2F16() const {
676 return isVISrcF16() || isVISrcB32();
677 }
678
679 bool isVISrc_64_bf16() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
681 }
682
683 bool isVISrc_64_f16() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
685 }
686
687 bool isVISrc_64_b32() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
689 }
690
691 bool isVISrc_64B64() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
693 }
694
695 bool isVISrc_64_f64() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
697 }
698
699 bool isVISrc_64V2FP32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
701 }
702
703 bool isVISrc_64V2INT32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
705 }
706
707 bool isVISrc_256_b32() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
709 }
710
711 bool isVISrc_256_f32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
713 }
714
715 bool isVISrc_256B64() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
717 }
718
719 bool isVISrc_256_f64() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
721 }
722
723 bool isVISrc_128B16() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
725 }
726
727 bool isVISrc_128V2B16() const {
728 return isVISrc_128B16();
729 }
730
731 bool isVISrc_128_b32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
733 }
734
735 bool isVISrc_128_f32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
737 }
738
739 bool isVISrc_256V2FP32() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
741 }
742
743 bool isVISrc_256V2INT32() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
745 }
746
747 bool isVISrc_512_b32() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
749 }
750
751 bool isVISrc_512B16() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
753 }
754
755 bool isVISrc_512V2B16() const {
756 return isVISrc_512B16();
757 }
758
759 bool isVISrc_512_f32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
761 }
762
763 bool isVISrc_512F16() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
765 }
766
767 bool isVISrc_512V2F16() const {
768 return isVISrc_512F16() || isVISrc_512_b32();
769 }
770
771 bool isVISrc_1024_b32() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
773 }
774
775 bool isVISrc_1024B16() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
777 }
778
779 bool isVISrc_1024V2B16() const {
780 return isVISrc_1024B16();
781 }
782
783 bool isVISrc_1024_f32() const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
785 }
786
787 bool isVISrc_1024F16() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
789 }
790
791 bool isVISrc_1024V2F16() const {
792 return isVISrc_1024F16() || isVISrc_1024_b32();
793 }
794
795 bool isAISrcB32() const {
796 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
797 }
798
799 bool isAISrcB16() const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
801 }
802
803 bool isAISrcV2B16() const {
804 return isAISrcB16();
805 }
806
807 bool isAISrcF32() const {
808 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
809 }
810
811 bool isAISrcF16() const {
812 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
813 }
814
815 bool isAISrcV2F16() const {
816 return isAISrcF16() || isAISrcB32();
817 }
818
819 bool isAISrc_64B64() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
821 }
822
823 bool isAISrc_64_f64() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
825 }
826
827 bool isAISrc_128_b32() const {
828 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
829 }
830
831 bool isAISrc_128B16() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
833 }
834
835 bool isAISrc_128V2B16() const {
836 return isAISrc_128B16();
837 }
838
839 bool isAISrc_128_f32() const {
840 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
841 }
842
843 bool isAISrc_128F16() const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
845 }
846
847 bool isAISrc_128V2F16() const {
848 return isAISrc_128F16() || isAISrc_128_b32();
849 }
850
851 bool isVISrc_128_bf16() const {
852 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
853 }
854
855 bool isVISrc_128_f16() const {
856 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
857 }
858
859 bool isVISrc_128V2F16() const {
860 return isVISrc_128_f16() || isVISrc_128_b32();
861 }
862
863 bool isAISrc_256B64() const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
865 }
866
867 bool isAISrc_256_f64() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
869 }
870
871 bool isAISrc_512_b32() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
873 }
874
875 bool isAISrc_512B16() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
877 }
878
879 bool isAISrc_512V2B16() const {
880 return isAISrc_512B16();
881 }
882
883 bool isAISrc_512_f32() const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
885 }
886
887 bool isAISrc_512F16() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
889 }
890
891 bool isAISrc_512V2F16() const {
892 return isAISrc_512F16() || isAISrc_512_b32();
893 }
894
895 bool isAISrc_1024_b32() const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
897 }
898
899 bool isAISrc_1024B16() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
901 }
902
903 bool isAISrc_1024V2B16() const {
904 return isAISrc_1024B16();
905 }
906
907 bool isAISrc_1024_f32() const {
908 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
909 }
910
911 bool isAISrc_1024F16() const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
913 }
914
915 bool isAISrc_1024V2F16() const {
916 return isAISrc_1024F16() || isAISrc_1024_b32();
917 }
918
919 bool isKImmFP32() const {
920 return isLiteralImm(MVT::f32);
921 }
922
923 bool isKImmFP16() const {
924 return isLiteralImm(MVT::f16);
925 }
926
927 bool isMem() const override {
928 return false;
929 }
930
931 bool isExpr() const {
932 return Kind == Expression;
933 }
934
935 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
936
937 bool isSWaitCnt() const;
938 bool isDepCtr() const;
939 bool isSDelayALU() const;
940 bool isHwreg() const;
941 bool isSendMsg() const;
942 bool isSplitBarrier() const;
943 bool isSwizzle() const;
944 bool isSMRDOffset8() const;
945 bool isSMEMOffset() const;
946 bool isSMRDLiteralOffset() const;
947 bool isDPP8() const;
948 bool isDPPCtrl() const;
949 bool isBLGP() const;
950 bool isCBSZ() const;
951 bool isABID() const;
952 bool isGPRIdxMode() const;
953 bool isS16Imm() const;
954 bool isU16Imm() const;
955 bool isEndpgm() const;
956 bool isWaitVDST() const;
957 bool isWaitEXP() const;
958 bool isWaitVAVDst() const;
959 bool isWaitVMVSrc() const;
960
961 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
962 return std::bind(P, *this);
963 }
964
965 StringRef getToken() const {
966 assert(isToken());
967 return StringRef(Tok.Data, Tok.Length);
968 }
969
970 int64_t getImm() const {
971 assert(isImm());
972 return Imm.Val;
973 }
974
975 void setImm(int64_t Val) {
976 assert(isImm());
977 Imm.Val = Val;
978 }
979
980 ImmTy getImmTy() const {
981 assert(isImm());
982 return Imm.Type;
983 }
984
985 MCRegister getReg() const override {
986 assert(isRegKind());
987 return Reg.RegNo;
988 }
989
990 SMLoc getStartLoc() const override {
991 return StartLoc;
992 }
993
994 SMLoc getEndLoc() const override {
995 return EndLoc;
996 }
997
998 SMRange getLocRange() const {
999 return SMRange(StartLoc, EndLoc);
1000 }
1001
1002 Modifiers getModifiers() const {
1003 assert(isRegKind() || isImmTy(ImmTyNone));
1004 return isRegKind() ? Reg.Mods : Imm.Mods;
1005 }
1006
1007 void setModifiers(Modifiers Mods) {
1008 assert(isRegKind() || isImmTy(ImmTyNone));
1009 if (isRegKind())
1010 Reg.Mods = Mods;
1011 else
1012 Imm.Mods = Mods;
1013 }
1014
1015 bool hasModifiers() const {
1016 return getModifiers().hasModifiers();
1017 }
1018
1019 bool hasFPModifiers() const {
1020 return getModifiers().hasFPModifiers();
1021 }
1022
1023 bool hasIntModifiers() const {
1024 return getModifiers().hasIntModifiers();
1025 }
1026
1027 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1028
1029 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1030
1031 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1032
1033 void addRegOperands(MCInst &Inst, unsigned N) const;
1034
1035 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1036 if (isRegKind())
1037 addRegOperands(Inst, N);
1038 else
1039 addImmOperands(Inst, N);
1040 }
1041
1042 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1043 Modifiers Mods = getModifiers();
1044 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1045 if (isRegKind()) {
1046 addRegOperands(Inst, N);
1047 } else {
1048 addImmOperands(Inst, N, false);
1049 }
1050 }
1051
1052 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1053 assert(!hasIntModifiers());
1054 addRegOrImmWithInputModsOperands(Inst, N);
1055 }
1056
1057 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1058 assert(!hasFPModifiers());
1059 addRegOrImmWithInputModsOperands(Inst, N);
1060 }
1061
1062 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1063 Modifiers Mods = getModifiers();
1064 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1065 assert(isRegKind());
1066 addRegOperands(Inst, N);
1067 }
1068
1069 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1070 assert(!hasIntModifiers());
1071 addRegWithInputModsOperands(Inst, N);
1072 }
1073
1074 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1075 assert(!hasFPModifiers());
1076 addRegWithInputModsOperands(Inst, N);
1077 }
1078
1079 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1080 // clang-format off
1081 switch (Type) {
1082 case ImmTyNone: OS << "None"; break;
1083 case ImmTyGDS: OS << "GDS"; break;
1084 case ImmTyLDS: OS << "LDS"; break;
1085 case ImmTyOffen: OS << "Offen"; break;
1086 case ImmTyIdxen: OS << "Idxen"; break;
1087 case ImmTyAddr64: OS << "Addr64"; break;
1088 case ImmTyOffset: OS << "Offset"; break;
1089 case ImmTyInstOffset: OS << "InstOffset"; break;
1090 case ImmTyOffset0: OS << "Offset0"; break;
1091 case ImmTyOffset1: OS << "Offset1"; break;
1092 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1093 case ImmTyCPol: OS << "CPol"; break;
1094 case ImmTyIndexKey8bit: OS << "index_key"; break;
1095 case ImmTyIndexKey16bit: OS << "index_key"; break;
1096 case ImmTyTFE: OS << "TFE"; break;
1097 case ImmTyD16: OS << "D16"; break;
1098 case ImmTyFORMAT: OS << "FORMAT"; break;
1099 case ImmTyClampSI: OS << "ClampSI"; break;
1100 case ImmTyOModSI: OS << "OModSI"; break;
1101 case ImmTyDPP8: OS << "DPP8"; break;
1102 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1103 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1104 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1105 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1106 case ImmTyDppFI: OS << "DppFI"; break;
1107 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1108 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1109 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1110 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1111 case ImmTyDMask: OS << "DMask"; break;
1112 case ImmTyDim: OS << "Dim"; break;
1113 case ImmTyUNorm: OS << "UNorm"; break;
1114 case ImmTyDA: OS << "DA"; break;
1115 case ImmTyR128A16: OS << "R128A16"; break;
1116 case ImmTyA16: OS << "A16"; break;
1117 case ImmTyLWE: OS << "LWE"; break;
1118 case ImmTyOff: OS << "Off"; break;
1119 case ImmTyExpTgt: OS << "ExpTgt"; break;
1120 case ImmTyExpCompr: OS << "ExpCompr"; break;
1121 case ImmTyExpVM: OS << "ExpVM"; break;
1122 case ImmTyHwreg: OS << "Hwreg"; break;
1123 case ImmTySendMsg: OS << "SendMsg"; break;
1124 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1125 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1126 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1127 case ImmTyOpSel: OS << "OpSel"; break;
1128 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1129 case ImmTyNegLo: OS << "NegLo"; break;
1130 case ImmTyNegHi: OS << "NegHi"; break;
1131 case ImmTySwizzle: OS << "Swizzle"; break;
1132 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1133 case ImmTyHigh: OS << "High"; break;
1134 case ImmTyBLGP: OS << "BLGP"; break;
1135 case ImmTyCBSZ: OS << "CBSZ"; break;
1136 case ImmTyABID: OS << "ABID"; break;
1137 case ImmTyEndpgm: OS << "Endpgm"; break;
1138 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1139 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1140 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1141 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1142 }
1143 // clang-format on
1144 }
1145
1146 void print(raw_ostream &OS) const override {
1147 switch (Kind) {
1148 case Register:
1149 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1150 break;
1151 case Immediate:
1152 OS << '<' << getImm();
1153 if (getImmTy() != ImmTyNone) {
1154 OS << " type: "; printImmTy(OS, getImmTy());
1155 }
1156 OS << " mods: " << Imm.Mods << '>';
1157 break;
1158 case Token:
1159 OS << '\'' << getToken() << '\'';
1160 break;
1161 case Expression:
1162 OS << "<expr " << *Expr << '>';
1163 break;
1164 }
1165 }
1166
1167 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1168 int64_t Val, SMLoc Loc,
1169 ImmTy Type = ImmTyNone,
1170 bool IsFPImm = false) {
1171 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1172 Op->Imm.Val = Val;
1173 Op->Imm.IsFPImm = IsFPImm;
1174 Op->Imm.Kind = ImmKindTyNone;
1175 Op->Imm.Type = Type;
1176 Op->Imm.Mods = Modifiers();
1177 Op->StartLoc = Loc;
1178 Op->EndLoc = Loc;
1179 return Op;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1183 StringRef Str, SMLoc Loc,
1184 bool HasExplicitEncodingSize = true) {
1185 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1186 Res->Tok.Data = Str.data();
1187 Res->Tok.Length = Str.size();
1188 Res->StartLoc = Loc;
1189 Res->EndLoc = Loc;
1190 return Res;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1194 unsigned RegNo, SMLoc S,
1195 SMLoc E) {
1196 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1197 Op->Reg.RegNo = RegNo;
1198 Op->Reg.Mods = Modifiers();
1199 Op->StartLoc = S;
1200 Op->EndLoc = E;
1201 return Op;
1202 }
1203
1204 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1205 const class MCExpr *Expr, SMLoc S) {
1206 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1207 Op->Expr = Expr;
1208 Op->StartLoc = S;
1209 Op->EndLoc = S;
1210 return Op;
1211 }
1212};
1213
1214raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1215 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1216 return OS;
1217}
1218
1219//===----------------------------------------------------------------------===//
1220// AsmParser
1221//===----------------------------------------------------------------------===//
1222
1223// Holds info related to the current kernel, e.g. count of SGPRs used.
1224// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1225// .amdgpu_hsa_kernel or at EOF.
1226class KernelScopeInfo {
1227 int SgprIndexUnusedMin = -1;
1228 int VgprIndexUnusedMin = -1;
1229 int AgprIndexUnusedMin = -1;
1230 MCContext *Ctx = nullptr;
1231 MCSubtargetInfo const *MSTI = nullptr;
1232
1233 void usesSgprAt(int i) {
1234 if (i >= SgprIndexUnusedMin) {
1235 SgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1239 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1240 }
1241 }
1242 }
1243
1244 void usesVgprAt(int i) {
1245 if (i >= VgprIndexUnusedMin) {
1246 VgprIndexUnusedMin = ++i;
1247 if (Ctx) {
1248 MCSymbol* const Sym =
1249 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1250 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1251 VgprIndexUnusedMin);
1252 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1253 }
1254 }
1255 }
1256
1257 void usesAgprAt(int i) {
1258 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1259 if (!hasMAIInsts(*MSTI))
1260 return;
1261
1262 if (i >= AgprIndexUnusedMin) {
1263 AgprIndexUnusedMin = ++i;
1264 if (Ctx) {
1265 MCSymbol* const Sym =
1266 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1267 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1268
1269 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1270 MCSymbol* const vSym =
1271 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1272 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1273 VgprIndexUnusedMin);
1274 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1275 }
1276 }
1277 }
1278
1279public:
1280 KernelScopeInfo() = default;
1281
1282 void initialize(MCContext &Context) {
1283 Ctx = &Context;
1284 MSTI = Ctx->getSubtargetInfo();
1285
1286 usesSgprAt(SgprIndexUnusedMin = -1);
1287 usesVgprAt(VgprIndexUnusedMin = -1);
1288 if (hasMAIInsts(*MSTI)) {
1289 usesAgprAt(AgprIndexUnusedMin = -1);
1290 }
1291 }
1292
1293 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1294 unsigned RegWidth) {
1295 switch (RegKind) {
1296 case IS_SGPR:
1297 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1298 break;
1299 case IS_AGPR:
1300 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1301 break;
1302 case IS_VGPR:
1303 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1304 break;
1305 default:
1306 break;
1307 }
1308 }
1309};
1310
1311class AMDGPUAsmParser : public MCTargetAsmParser {
1312 MCAsmParser &Parser;
1313
1314 unsigned ForcedEncodingSize = 0;
1315 bool ForcedDPP = false;
1316 bool ForcedSDWA = false;
1317 KernelScopeInfo KernelScope;
1318
1319 /// @name Auto-generated Match Functions
1320 /// {
1321
1322#define GET_ASSEMBLER_HEADER
1323#include "AMDGPUGenAsmMatcher.inc"
1324
1325 /// }
1326
1327private:
1328 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1329 bool OutOfRangeError(SMRange Range);
1330 /// Calculate VGPR/SGPR blocks required for given target, reserved
1331 /// registers, and user-specified NextFreeXGPR values.
1332 ///
1333 /// \param Features [in] Target features, used for bug corrections.
1334 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1335 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1336 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1337 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1338 /// descriptor field, if valid.
1339 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1340 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1341 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1342 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1343 /// \param VGPRBlocks [out] Result VGPR block count.
1344 /// \param SGPRBlocks [out] Result SGPR block count.
1345 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1346 bool FlatScrUsed, bool XNACKUsed,
1347 std::optional<bool> EnableWavefrontSize32,
1348 unsigned NextFreeVGPR, SMRange VGPRRange,
1349 unsigned NextFreeSGPR, SMRange SGPRRange,
1350 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1351 bool ParseDirectiveAMDGCNTarget();
1352 bool ParseDirectiveAMDHSACodeObjectVersion();
1353 bool ParseDirectiveAMDHSAKernel();
1354 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1355 bool ParseDirectiveAMDKernelCodeT();
1356 // TODO: Possibly make subtargetHasRegister const.
1357 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1358 bool ParseDirectiveAMDGPUHsaKernel();
1359
1360 bool ParseDirectiveISAVersion();
1361 bool ParseDirectiveHSAMetadata();
1362 bool ParseDirectivePALMetadataBegin();
1363 bool ParseDirectivePALMetadata();
1364 bool ParseDirectiveAMDGPULDS();
1365
1366 /// Common code to parse out a block of text (typically YAML) between start and
1367 /// end directives.
1368 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1369 const char *AssemblerDirectiveEnd,
1370 std::string &CollectString);
1371
1372 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1373 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1374 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1375 unsigned &RegNum, unsigned &RegWidth,
1376 bool RestoreOnFailure = false);
1377 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1378 unsigned &RegNum, unsigned &RegWidth,
1380 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1381 unsigned &RegWidth,
1383 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1384 unsigned &RegWidth,
1386 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1387 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1388 bool ParseRegRange(unsigned& Num, unsigned& Width);
1389 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1390 unsigned RegWidth, SMLoc Loc);
1391
1392 bool isRegister();
1393 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1394 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1395 void initializeGprCountSymbol(RegisterKind RegKind);
1396 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1397 unsigned RegWidth);
1398 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1399 bool IsAtomic);
1400
1401public:
1402 enum AMDGPUMatchResultTy {
1403 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1404 };
1405 enum OperandMode {
1406 OperandMode_Default,
1407 OperandMode_NSA,
1408 };
1409
1410 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1411
1412 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1413 const MCInstrInfo &MII,
1414 const MCTargetOptions &Options)
1415 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1417
1418 if (getFeatureBits().none()) {
1419 // Set default features.
1420 copySTI().ToggleFeature("southern-islands");
1421 }
1422
1423 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1424
1425 {
1426 // TODO: make those pre-defined variables read-only.
1427 // Currently there is none suitable machinery in the core llvm-mc for this.
1428 // MCSymbol::isRedefinable is intended for another purpose, and
1429 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1431 MCContext &Ctx = getContext();
1432 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1433 MCSymbol *Sym =
1434 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1435 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1436 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1437 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1438 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1439 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1440 } else {
1441 MCSymbol *Sym =
1442 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1443 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1444 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1445 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1446 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1447 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1448 }
1449 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1450 initializeGprCountSymbol(IS_VGPR);
1451 initializeGprCountSymbol(IS_SGPR);
1452 } else
1453 KernelScope.initialize(getContext());
1454 }
1455 }
1456
1457 bool hasMIMG_R128() const {
1458 return AMDGPU::hasMIMG_R128(getSTI());
1459 }
1460
1461 bool hasPackedD16() const {
1462 return AMDGPU::hasPackedD16(getSTI());
1463 }
1464
1465 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1466
1467 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1468
1469 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1470
1471 bool isSI() const {
1472 return AMDGPU::isSI(getSTI());
1473 }
1474
1475 bool isCI() const {
1476 return AMDGPU::isCI(getSTI());
1477 }
1478
1479 bool isVI() const {
1480 return AMDGPU::isVI(getSTI());
1481 }
1482
1483 bool isGFX9() const {
1484 return AMDGPU::isGFX9(getSTI());
1485 }
1486
1487 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1488 bool isGFX90A() const {
1489 return AMDGPU::isGFX90A(getSTI());
1490 }
1491
1492 bool isGFX940() const {
1493 return AMDGPU::isGFX940(getSTI());
1494 }
1495
1496 bool isGFX9Plus() const {
1497 return AMDGPU::isGFX9Plus(getSTI());
1498 }
1499
1500 bool isGFX10() const {
1501 return AMDGPU::isGFX10(getSTI());
1502 }
1503
1504 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1505
1506 bool isGFX11() const {
1507 return AMDGPU::isGFX11(getSTI());
1508 }
1509
1510 bool isGFX11Plus() const {
1511 return AMDGPU::isGFX11Plus(getSTI());
1512 }
1513
1514 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1515
1516 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1517
1518 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1519
1520 bool isGFX10_BEncoding() const {
1522 }
1523
1524 bool hasInv2PiInlineImm() const {
1525 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1526 }
1527
1528 bool hasFlatOffsets() const {
1529 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1530 }
1531
1532 bool hasArchitectedFlatScratch() const {
1533 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1534 }
1535
1536 bool hasSGPR102_SGPR103() const {
1537 return !isVI() && !isGFX9();
1538 }
1539
1540 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1541
1542 bool hasIntClamp() const {
1543 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1544 }
1545
1546 bool hasPartialNSAEncoding() const {
1547 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1548 }
1549
1550 unsigned getNSAMaxSize(bool HasSampler = false) const {
1551 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1552 }
1553
1554 unsigned getMaxNumUserSGPRs() const {
1556 }
1557
1558 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1559
1560 AMDGPUTargetStreamer &getTargetStreamer() {
1562 return static_cast<AMDGPUTargetStreamer &>(TS);
1563 }
1564
1565 const MCRegisterInfo *getMRI() const {
1566 // We need this const_cast because for some reason getContext() is not const
1567 // in MCAsmParser.
1568 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1569 }
1570
1571 const MCInstrInfo *getMII() const {
1572 return &MII;
1573 }
1574
1575 const FeatureBitset &getFeatureBits() const {
1576 return getSTI().getFeatureBits();
1577 }
1578
1579 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1580 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1581 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1582
1583 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1584 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1585 bool isForcedDPP() const { return ForcedDPP; }
1586 bool isForcedSDWA() const { return ForcedSDWA; }
1587 ArrayRef<unsigned> getMatchedVariants() const;
1588 StringRef getMatchedVariantName() const;
1589
1590 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1591 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1592 bool RestoreOnFailure);
1593 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1595 SMLoc &EndLoc) override;
1596 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1598 unsigned Kind) override;
1599 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1602 bool MatchingInlineAsm) override;
1603 bool ParseDirective(AsmToken DirectiveID) override;
1604 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1605 OperandMode Mode = OperandMode_Default);
1606 StringRef parseMnemonicSuffix(StringRef Name);
1608 SMLoc NameLoc, OperandVector &Operands) override;
1609 //bool ProcessInstruction(MCInst &Inst);
1610
1612
1613 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1614
1616 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1617 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1618 std::function<bool(int64_t &)> ConvertResult = nullptr);
1619
1620 ParseStatus parseOperandArrayWithPrefix(
1621 const char *Prefix, OperandVector &Operands,
1622 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1623 bool (*ConvertResult)(int64_t &) = nullptr);
1624
1626 parseNamedBit(StringRef Name, OperandVector &Operands,
1627 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1628 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1630 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1631 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1632 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1633 SMLoc &StringLoc);
1634
1635 bool isModifier();
1636 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1637 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1638 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1639 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1640 bool parseSP3NegModifier();
1641 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1642 bool HasLit = false);
1644 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1645 bool HasLit = false);
1646 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1647 bool AllowImm = true);
1648 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1649 bool AllowImm = true);
1650 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1651 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1652 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1653 ParseStatus tryParseIndexKey(OperandVector &Operands,
1654 AMDGPUOperand::ImmTy ImmTy);
1655 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1656 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1657
1658 ParseStatus parseDfmtNfmt(int64_t &Format);
1659 ParseStatus parseUfmt(int64_t &Format);
1660 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1661 int64_t &Format);
1662 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1663 int64_t &Format);
1664 ParseStatus parseFORMAT(OperandVector &Operands);
1665 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1666 ParseStatus parseNumericFormat(int64_t &Format);
1667 ParseStatus parseFlatOffset(OperandVector &Operands);
1668 ParseStatus parseR128A16(OperandVector &Operands);
1670 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1671 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1672
1673 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1674
1675 bool parseCnt(int64_t &IntVal);
1676 ParseStatus parseSWaitCnt(OperandVector &Operands);
1677
1678 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1679 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1680 ParseStatus parseDepCtr(OperandVector &Operands);
1681
1682 bool parseDelay(int64_t &Delay);
1683 ParseStatus parseSDelayALU(OperandVector &Operands);
1684
1685 ParseStatus parseHwreg(OperandVector &Operands);
1686
1687private:
1688 struct OperandInfoTy {
1689 SMLoc Loc;
1690 int64_t Val;
1691 bool IsSymbolic = false;
1692 bool IsDefined = false;
1693
1694 OperandInfoTy(int64_t Val) : Val(Val) {}
1695 };
1696
1697 struct StructuredOpField : OperandInfoTy {
1700 unsigned Width;
1701 bool IsDefined = false;
1702
1703 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1704 int64_t Default)
1705 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1706 virtual ~StructuredOpField() = default;
1707
1708 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1709 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1710 return false;
1711 }
1712
1713 virtual bool validate(AMDGPUAsmParser &Parser) const {
1714 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1715 return Error(Parser, "not supported on this GPU");
1716 if (!isUIntN(Width, Val))
1717 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1718 return true;
1719 }
1720 };
1721
1722 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1723 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1724
1725 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1726 bool validateSendMsg(const OperandInfoTy &Msg,
1727 const OperandInfoTy &Op,
1728 const OperandInfoTy &Stream);
1729
1730 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1731 OperandInfoTy &Width);
1732
1733 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1734 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1735 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1736
1737 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1738 const OperandVector &Operands) const;
1739 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1740 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1741 SMLoc getLitLoc(const OperandVector &Operands,
1742 bool SearchMandatoryLiterals = false) const;
1743 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1744 SMLoc getConstLoc(const OperandVector &Operands) const;
1745 SMLoc getInstLoc(const OperandVector &Operands) const;
1746
1747 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1748 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1749 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1750 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1751 bool validateSOPLiteral(const MCInst &Inst) const;
1752 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1753 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1754 const OperandVector &Operands);
1755 bool validateIntClampSupported(const MCInst &Inst);
1756 bool validateMIMGAtomicDMask(const MCInst &Inst);
1757 bool validateMIMGGatherDMask(const MCInst &Inst);
1758 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1759 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1760 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1761 bool validateMIMGD16(const MCInst &Inst);
1762 bool validateMIMGMSAA(const MCInst &Inst);
1763 bool validateOpSel(const MCInst &Inst);
1764 bool validateNeg(const MCInst &Inst, int OpName);
1765 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1766 bool validateVccOperand(unsigned Reg) const;
1767 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1768 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1769 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1770 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1771 bool validateAGPRLdSt(const MCInst &Inst) const;
1772 bool validateVGPRAlign(const MCInst &Inst) const;
1773 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1775 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1776 bool validateDivScale(const MCInst &Inst);
1777 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1778 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1779 const SMLoc &IDLoc);
1780 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1781 const unsigned CPol);
1782 bool validateExeczVcczOperands(const OperandVector &Operands);
1783 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1784 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1785 unsigned getConstantBusLimit(unsigned Opcode) const;
1786 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1787 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1788 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1789
1790 bool isSupportedMnemo(StringRef Mnemo,
1791 const FeatureBitset &FBS);
1792 bool isSupportedMnemo(StringRef Mnemo,
1793 const FeatureBitset &FBS,
1794 ArrayRef<unsigned> Variants);
1795 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1796
1797 bool isId(const StringRef Id) const;
1798 bool isId(const AsmToken &Token, const StringRef Id) const;
1799 bool isToken(const AsmToken::TokenKind Kind) const;
1800 StringRef getId() const;
1801 bool trySkipId(const StringRef Id);
1802 bool trySkipId(const StringRef Pref, const StringRef Id);
1803 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1804 bool trySkipToken(const AsmToken::TokenKind Kind);
1805 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1806 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1807 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1808
1809 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1810 AsmToken::TokenKind getTokenKind() const;
1811 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1812 bool parseExpr(OperandVector &Operands);
1813 StringRef getTokenStr() const;
1814 AsmToken peekToken(bool ShouldSkipSpace = true);
1815 AsmToken getToken() const;
1816 SMLoc getLoc() const;
1817 void lex();
1818
1819public:
1820 void onBeginOfFile() override;
1821 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1822
1823 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1824
1825 ParseStatus parseExpTgt(OperandVector &Operands);
1826 ParseStatus parseSendMsg(OperandVector &Operands);
1827 ParseStatus parseInterpSlot(OperandVector &Operands);
1828 ParseStatus parseInterpAttr(OperandVector &Operands);
1829 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1830 ParseStatus parseBoolReg(OperandVector &Operands);
1831
1832 bool parseSwizzleOperand(int64_t &Op,
1833 const unsigned MinVal,
1834 const unsigned MaxVal,
1835 const StringRef ErrMsg,
1836 SMLoc &Loc);
1837 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1838 const unsigned MinVal,
1839 const unsigned MaxVal,
1840 const StringRef ErrMsg);
1841 ParseStatus parseSwizzle(OperandVector &Operands);
1842 bool parseSwizzleOffset(int64_t &Imm);
1843 bool parseSwizzleMacro(int64_t &Imm);
1844 bool parseSwizzleQuadPerm(int64_t &Imm);
1845 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1846 bool parseSwizzleBroadcast(int64_t &Imm);
1847 bool parseSwizzleSwap(int64_t &Imm);
1848 bool parseSwizzleReverse(int64_t &Imm);
1849
1850 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1851 int64_t parseGPRIdxMacro();
1852
1853 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1854 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1855
1856 ParseStatus parseOModSI(OperandVector &Operands);
1857
1858 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1859 OptionalImmIndexMap &OptionalIdx);
1860 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1861 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1862 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1863 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1864
1865 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1866 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1867 OptionalImmIndexMap &OptionalIdx);
1868 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1869 OptionalImmIndexMap &OptionalIdx);
1870
1871 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1872 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1873
1874 bool parseDimId(unsigned &Encoding);
1876 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1878 ParseStatus parseDPPCtrl(OperandVector &Operands);
1879 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1880 int64_t parseDPPCtrlSel(StringRef Ctrl);
1881 int64_t parseDPPCtrlPerm();
1882 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1883 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1884 cvtDPP(Inst, Operands, true);
1885 }
1886 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1887 bool IsDPP8 = false);
1888 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1889 cvtVOP3DPP(Inst, Operands, true);
1890 }
1891
1892 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1893 AMDGPUOperand::ImmTy Type);
1894 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1895 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1896 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1897 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1898 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1899 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1900 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1901 uint64_t BasicInstType,
1902 bool SkipDstVcc = false,
1903 bool SkipSrcVcc = false);
1904
1905 ParseStatus parseEndpgm(OperandVector &Operands);
1906
1908};
1909
1910} // end anonymous namespace
1911
1912// May be called with integer type with equivalent bitwidth.
1913static const fltSemantics *getFltSemantics(unsigned Size) {
1914 switch (Size) {
1915 case 4:
1916 return &APFloat::IEEEsingle();
1917 case 8:
1918 return &APFloat::IEEEdouble();
1919 case 2:
1920 return &APFloat::IEEEhalf();
1921 default:
1922 llvm_unreachable("unsupported fp type");
1923 }
1924}
1925
1927 return getFltSemantics(VT.getSizeInBits() / 8);
1928}
1929
1931 switch (OperandType) {
1932 // When floating-point immediate is used as operand of type i16, the 32-bit
1933 // representation of the constant truncated to the 16 LSBs should be used.
1953 return &APFloat::IEEEsingle();
1959 return &APFloat::IEEEdouble();
1968 return &APFloat::IEEEhalf();
1976 return &APFloat::BFloat();
1977 default:
1978 llvm_unreachable("unsupported fp type");
1979 }
1980}
1981
1982//===----------------------------------------------------------------------===//
1983// Operand
1984//===----------------------------------------------------------------------===//
1985
1986static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1987 bool Lost;
1988
1989 // Convert literal to single precision
1991 APFloat::rmNearestTiesToEven,
1992 &Lost);
1993 // We allow precision lost but not overflow or underflow
1994 if (Status != APFloat::opOK &&
1995 Lost &&
1996 ((Status & APFloat::opOverflow) != 0 ||
1997 (Status & APFloat::opUnderflow) != 0)) {
1998 return false;
1999 }
2000
2001 return true;
2002}
2003
2004static bool isSafeTruncation(int64_t Val, unsigned Size) {
2005 return isUIntN(Size, Val) || isIntN(Size, Val);
2006}
2007
2008static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2009 if (VT.getScalarType() == MVT::i16)
2010 return isInlinableLiteral32(Val, HasInv2Pi);
2011
2012 if (VT.getScalarType() == MVT::f16)
2013 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2014
2015 assert(VT.getScalarType() == MVT::bf16);
2016
2017 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2018}
2019
2020bool AMDGPUOperand::isInlinableImm(MVT type) const {
2021
2022 // This is a hack to enable named inline values like
2023 // shared_base with both 32-bit and 64-bit operands.
2024 // Note that these values are defined as
2025 // 32-bit operands only.
2026 if (isInlineValue()) {
2027 return true;
2028 }
2029
2030 if (!isImmTy(ImmTyNone)) {
2031 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2032 return false;
2033 }
2034 // TODO: We should avoid using host float here. It would be better to
2035 // check the float bit values which is what a few other places do.
2036 // We've had bot failures before due to weird NaN support on mips hosts.
2037
2038 APInt Literal(64, Imm.Val);
2039
2040 if (Imm.IsFPImm) { // We got fp literal token
2041 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2043 AsmParser->hasInv2PiInlineImm());
2044 }
2045
2046 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2047 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2048 return false;
2049
2050 if (type.getScalarSizeInBits() == 16) {
2051 bool Lost = false;
2052 switch (type.getScalarType().SimpleTy) {
2053 default:
2054 llvm_unreachable("unknown 16-bit type");
2055 case MVT::bf16:
2056 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2057 &Lost);
2058 break;
2059 case MVT::f16:
2060 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2061 &Lost);
2062 break;
2063 case MVT::i16:
2064 FPLiteral.convert(APFloatBase::IEEEsingle(),
2065 APFloat::rmNearestTiesToEven, &Lost);
2066 break;
2067 }
2068 // We need to use 32-bit representation here because when a floating-point
2069 // inline constant is used as an i16 operand, its 32-bit representation
2070 // representation will be used. We will need the 32-bit value to check if
2071 // it is FP inline constant.
2072 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2073 return isInlineableLiteralOp16(ImmVal, type,
2074 AsmParser->hasInv2PiInlineImm());
2075 }
2076
2077 // Check if single precision literal is inlinable
2079 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2080 AsmParser->hasInv2PiInlineImm());
2081 }
2082
2083 // We got int literal token.
2084 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2086 AsmParser->hasInv2PiInlineImm());
2087 }
2088
2089 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2090 return false;
2091 }
2092
2093 if (type.getScalarSizeInBits() == 16) {
2095 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2096 type, AsmParser->hasInv2PiInlineImm());
2097 }
2098
2100 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2101 AsmParser->hasInv2PiInlineImm());
2102}
2103
2104bool AMDGPUOperand::isLiteralImm(MVT type) const {
2105 // Check that this immediate can be added as literal
2106 if (!isImmTy(ImmTyNone)) {
2107 return false;
2108 }
2109
2110 if (!Imm.IsFPImm) {
2111 // We got int literal token.
2112
2113 if (type == MVT::f64 && hasFPModifiers()) {
2114 // Cannot apply fp modifiers to int literals preserving the same semantics
2115 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2116 // disable these cases.
2117 return false;
2118 }
2119
2120 unsigned Size = type.getSizeInBits();
2121 if (Size == 64)
2122 Size = 32;
2123
2124 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2125 // types.
2126 return isSafeTruncation(Imm.Val, Size);
2127 }
2128
2129 // We got fp literal token
2130 if (type == MVT::f64) { // Expected 64-bit fp operand
2131 // We would set low 64-bits of literal to zeroes but we accept this literals
2132 return true;
2133 }
2134
2135 if (type == MVT::i64) { // Expected 64-bit int operand
2136 // We don't allow fp literals in 64-bit integer instructions. It is
2137 // unclear how we should encode them.
2138 return false;
2139 }
2140
2141 // We allow fp literals with f16x2 operands assuming that the specified
2142 // literal goes into the lower half and the upper half is zero. We also
2143 // require that the literal may be losslessly converted to f16.
2144 //
2145 // For i16x2 operands, we assume that the specified literal is encoded as a
2146 // single-precision float. This is pretty odd, but it matches SP3 and what
2147 // happens in hardware.
2148 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2149 : (type == MVT::v2i16) ? MVT::f32
2150 : (type == MVT::v2f32) ? MVT::f32
2151 : type;
2152
2153 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2154 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2155}
2156
2157bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2158 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2159}
2160
2161bool AMDGPUOperand::isVRegWithInputMods() const {
2162 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2163 // GFX90A allows DPP on 64-bit operands.
2164 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2165 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2166}
2167
2168template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2169 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2170 : AMDGPU::VGPR_16_Lo128RegClassID);
2171}
2172
2173bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2174 if (AsmParser->isVI())
2175 return isVReg32();
2176 else if (AsmParser->isGFX9Plus())
2177 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2178 else
2179 return false;
2180}
2181
2182bool AMDGPUOperand::isSDWAFP16Operand() const {
2183 return isSDWAOperand(MVT::f16);
2184}
2185
2186bool AMDGPUOperand::isSDWAFP32Operand() const {
2187 return isSDWAOperand(MVT::f32);
2188}
2189
2190bool AMDGPUOperand::isSDWAInt16Operand() const {
2191 return isSDWAOperand(MVT::i16);
2192}
2193
2194bool AMDGPUOperand::isSDWAInt32Operand() const {
2195 return isSDWAOperand(MVT::i32);
2196}
2197
2198bool AMDGPUOperand::isBoolReg() const {
2199 auto FB = AsmParser->getFeatureBits();
2200 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2201 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2202}
2203
2204uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2205{
2206 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2207 assert(Size == 2 || Size == 4 || Size == 8);
2208
2209 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2210
2211 if (Imm.Mods.Abs) {
2212 Val &= ~FpSignMask;
2213 }
2214 if (Imm.Mods.Neg) {
2215 Val ^= FpSignMask;
2216 }
2217
2218 return Val;
2219}
2220
2221void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2222 if (isExpr()) {
2224 return;
2225 }
2226
2227 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2228 Inst.getNumOperands())) {
2229 addLiteralImmOperand(Inst, Imm.Val,
2230 ApplyModifiers &
2231 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2232 } else {
2233 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2235 setImmKindNone();
2236 }
2237}
2238
2239void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2240 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2241 auto OpNum = Inst.getNumOperands();
2242 // Check that this operand accepts literals
2243 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2244
2245 if (ApplyModifiers) {
2246 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2247 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2248 Val = applyInputFPModifiers(Val, Size);
2249 }
2250
2251 APInt Literal(64, Val);
2252 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2253
2254 if (Imm.IsFPImm) { // We got fp literal token
2255 switch (OpTy) {
2261 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2262 AsmParser->hasInv2PiInlineImm())) {
2263 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2264 setImmKindConst();
2265 return;
2266 }
2267
2268 // Non-inlineable
2269 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2270 // For fp operands we check if low 32 bits are zeros
2271 if (Literal.getLoBits(32) != 0) {
2272 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2273 "Can't encode literal as exact 64-bit floating-point operand. "
2274 "Low 32-bits will be set to zero");
2275 Val &= 0xffffffff00000000u;
2276 }
2277
2279 setImmKindLiteral();
2280 return;
2281 }
2282
2283 // We don't allow fp literals in 64-bit integer instructions. It is
2284 // unclear how we should encode them. This case should be checked earlier
2285 // in predicate methods (isLiteralImm())
2286 llvm_unreachable("fp literal in 64-bit integer instruction.");
2287
2295 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2296 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2297 // loss of precision. The constant represents ideomatic fp32 value of
2298 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2299 // bits. Prevent rounding below.
2300 Inst.addOperand(MCOperand::createImm(0x3e22));
2301 setImmKindLiteral();
2302 return;
2303 }
2304 [[fallthrough]];
2305
2333 bool lost;
2334 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2335 // Convert literal to single precision
2336 FPLiteral.convert(*getOpFltSemantics(OpTy),
2337 APFloat::rmNearestTiesToEven, &lost);
2338 // We allow precision lost but not overflow or underflow. This should be
2339 // checked earlier in isLiteralImm()
2340
2341 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2342 Inst.addOperand(MCOperand::createImm(ImmVal));
2343 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2344 setImmKindMandatoryLiteral();
2345 } else {
2346 setImmKindLiteral();
2347 }
2348 return;
2349 }
2350 default:
2351 llvm_unreachable("invalid operand size");
2352 }
2353
2354 return;
2355 }
2356
2357 // We got int literal token.
2358 // Only sign extend inline immediates.
2359 switch (OpTy) {
2375 if (isSafeTruncation(Val, 32) &&
2376 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2377 AsmParser->hasInv2PiInlineImm())) {
2379 setImmKindConst();
2380 return;
2381 }
2382
2383 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2384 setImmKindLiteral();
2385 return;
2386
2392 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2394 setImmKindConst();
2395 return;
2396 }
2397
2398 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2399 : Lo_32(Val);
2400
2402 setImmKindLiteral();
2403 return;
2404
2408 if (isSafeTruncation(Val, 16) &&
2409 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2410 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2411 setImmKindConst();
2412 return;
2413 }
2414
2415 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2416 setImmKindLiteral();
2417 return;
2418
2423 if (isSafeTruncation(Val, 16) &&
2424 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2425 AsmParser->hasInv2PiInlineImm())) {
2427 setImmKindConst();
2428 return;
2429 }
2430
2431 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2432 setImmKindLiteral();
2433 return;
2434
2439 if (isSafeTruncation(Val, 16) &&
2440 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2441 AsmParser->hasInv2PiInlineImm())) {
2443 setImmKindConst();
2444 return;
2445 }
2446
2447 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2448 setImmKindLiteral();
2449 return;
2450
2453 assert(isSafeTruncation(Val, 16));
2454 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2456 return;
2457 }
2460 assert(isSafeTruncation(Val, 16));
2461 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2462 AsmParser->hasInv2PiInlineImm()));
2463
2465 return;
2466 }
2467
2470 assert(isSafeTruncation(Val, 16));
2471 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2472 AsmParser->hasInv2PiInlineImm()));
2473
2475 return;
2476 }
2477
2479 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2480 setImmKindMandatoryLiteral();
2481 return;
2483 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2484 setImmKindMandatoryLiteral();
2485 return;
2486 default:
2487 llvm_unreachable("invalid operand size");
2488 }
2489}
2490
2491void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2492 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2493}
2494
2495bool AMDGPUOperand::isInlineValue() const {
2496 return isRegKind() && ::isInlineValue(getReg());
2497}
2498
2499//===----------------------------------------------------------------------===//
2500// AsmParser
2501//===----------------------------------------------------------------------===//
2502
2503static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2504 if (Is == IS_VGPR) {
2505 switch (RegWidth) {
2506 default: return -1;
2507 case 32:
2508 return AMDGPU::VGPR_32RegClassID;
2509 case 64:
2510 return AMDGPU::VReg_64RegClassID;
2511 case 96:
2512 return AMDGPU::VReg_96RegClassID;
2513 case 128:
2514 return AMDGPU::VReg_128RegClassID;
2515 case 160:
2516 return AMDGPU::VReg_160RegClassID;
2517 case 192:
2518 return AMDGPU::VReg_192RegClassID;
2519 case 224:
2520 return AMDGPU::VReg_224RegClassID;
2521 case 256:
2522 return AMDGPU::VReg_256RegClassID;
2523 case 288:
2524 return AMDGPU::VReg_288RegClassID;
2525 case 320:
2526 return AMDGPU::VReg_320RegClassID;
2527 case 352:
2528 return AMDGPU::VReg_352RegClassID;
2529 case 384:
2530 return AMDGPU::VReg_384RegClassID;
2531 case 512:
2532 return AMDGPU::VReg_512RegClassID;
2533 case 1024:
2534 return AMDGPU::VReg_1024RegClassID;
2535 }
2536 } else if (Is == IS_TTMP) {
2537 switch (RegWidth) {
2538 default: return -1;
2539 case 32:
2540 return AMDGPU::TTMP_32RegClassID;
2541 case 64:
2542 return AMDGPU::TTMP_64RegClassID;
2543 case 128:
2544 return AMDGPU::TTMP_128RegClassID;
2545 case 256:
2546 return AMDGPU::TTMP_256RegClassID;
2547 case 512:
2548 return AMDGPU::TTMP_512RegClassID;
2549 }
2550 } else if (Is == IS_SGPR) {
2551 switch (RegWidth) {
2552 default: return -1;
2553 case 32:
2554 return AMDGPU::SGPR_32RegClassID;
2555 case 64:
2556 return AMDGPU::SGPR_64RegClassID;
2557 case 96:
2558 return AMDGPU::SGPR_96RegClassID;
2559 case 128:
2560 return AMDGPU::SGPR_128RegClassID;
2561 case 160:
2562 return AMDGPU::SGPR_160RegClassID;
2563 case 192:
2564 return AMDGPU::SGPR_192RegClassID;
2565 case 224:
2566 return AMDGPU::SGPR_224RegClassID;
2567 case 256:
2568 return AMDGPU::SGPR_256RegClassID;
2569 case 288:
2570 return AMDGPU::SGPR_288RegClassID;
2571 case 320:
2572 return AMDGPU::SGPR_320RegClassID;
2573 case 352:
2574 return AMDGPU::SGPR_352RegClassID;
2575 case 384:
2576 return AMDGPU::SGPR_384RegClassID;
2577 case 512:
2578 return AMDGPU::SGPR_512RegClassID;
2579 }
2580 } else if (Is == IS_AGPR) {
2581 switch (RegWidth) {
2582 default: return -1;
2583 case 32:
2584 return AMDGPU::AGPR_32RegClassID;
2585 case 64:
2586 return AMDGPU::AReg_64RegClassID;
2587 case 96:
2588 return AMDGPU::AReg_96RegClassID;
2589 case 128:
2590 return AMDGPU::AReg_128RegClassID;
2591 case 160:
2592 return AMDGPU::AReg_160RegClassID;
2593 case 192:
2594 return AMDGPU::AReg_192RegClassID;
2595 case 224:
2596 return AMDGPU::AReg_224RegClassID;
2597 case 256:
2598 return AMDGPU::AReg_256RegClassID;
2599 case 288:
2600 return AMDGPU::AReg_288RegClassID;
2601 case 320:
2602 return AMDGPU::AReg_320RegClassID;
2603 case 352:
2604 return AMDGPU::AReg_352RegClassID;
2605 case 384:
2606 return AMDGPU::AReg_384RegClassID;
2607 case 512:
2608 return AMDGPU::AReg_512RegClassID;
2609 case 1024:
2610 return AMDGPU::AReg_1024RegClassID;
2611 }
2612 }
2613 return -1;
2614}
2615
2618 .Case("exec", AMDGPU::EXEC)
2619 .Case("vcc", AMDGPU::VCC)
2620 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2621 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2622 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2623 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2624 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2625 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2626 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2627 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2628 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2629 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2630 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2631 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2632 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2633 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2634 .Case("m0", AMDGPU::M0)
2635 .Case("vccz", AMDGPU::SRC_VCCZ)
2636 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2637 .Case("execz", AMDGPU::SRC_EXECZ)
2638 .Case("src_execz", AMDGPU::SRC_EXECZ)
2639 .Case("scc", AMDGPU::SRC_SCC)
2640 .Case("src_scc", AMDGPU::SRC_SCC)
2641 .Case("tba", AMDGPU::TBA)
2642 .Case("tma", AMDGPU::TMA)
2643 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2644 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2645 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2646 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2647 .Case("vcc_lo", AMDGPU::VCC_LO)
2648 .Case("vcc_hi", AMDGPU::VCC_HI)
2649 .Case("exec_lo", AMDGPU::EXEC_LO)
2650 .Case("exec_hi", AMDGPU::EXEC_HI)
2651 .Case("tma_lo", AMDGPU::TMA_LO)
2652 .Case("tma_hi", AMDGPU::TMA_HI)
2653 .Case("tba_lo", AMDGPU::TBA_LO)
2654 .Case("tba_hi", AMDGPU::TBA_HI)
2655 .Case("pc", AMDGPU::PC_REG)
2656 .Case("null", AMDGPU::SGPR_NULL)
2657 .Default(AMDGPU::NoRegister);
2658}
2659
2660bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2661 SMLoc &EndLoc, bool RestoreOnFailure) {
2662 auto R = parseRegister();
2663 if (!R) return true;
2664 assert(R->isReg());
2665 RegNo = R->getReg();
2666 StartLoc = R->getStartLoc();
2667 EndLoc = R->getEndLoc();
2668 return false;
2669}
2670
2671bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2672 SMLoc &EndLoc) {
2673 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2674}
2675
2676ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2677 SMLoc &EndLoc) {
2678 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2679 bool PendingErrors = getParser().hasPendingError();
2680 getParser().clearPendingErrors();
2681 if (PendingErrors)
2682 return ParseStatus::Failure;
2683 if (Result)
2684 return ParseStatus::NoMatch;
2685 return ParseStatus::Success;
2686}
2687
2688bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2689 RegisterKind RegKind, unsigned Reg1,
2690 SMLoc Loc) {
2691 switch (RegKind) {
2692 case IS_SPECIAL:
2693 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2694 Reg = AMDGPU::EXEC;
2695 RegWidth = 64;
2696 return true;
2697 }
2698 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2699 Reg = AMDGPU::FLAT_SCR;
2700 RegWidth = 64;
2701 return true;
2702 }
2703 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2704 Reg = AMDGPU::XNACK_MASK;
2705 RegWidth = 64;
2706 return true;
2707 }
2708 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2709 Reg = AMDGPU::VCC;
2710 RegWidth = 64;
2711 return true;
2712 }
2713 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2714 Reg = AMDGPU::TBA;
2715 RegWidth = 64;
2716 return true;
2717 }
2718 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2719 Reg = AMDGPU::TMA;
2720 RegWidth = 64;
2721 return true;
2722 }
2723 Error(Loc, "register does not fit in the list");
2724 return false;
2725 case IS_VGPR:
2726 case IS_SGPR:
2727 case IS_AGPR:
2728 case IS_TTMP:
2729 if (Reg1 != Reg + RegWidth / 32) {
2730 Error(Loc, "registers in a list must have consecutive indices");
2731 return false;
2732 }
2733 RegWidth += 32;
2734 return true;
2735 default:
2736 llvm_unreachable("unexpected register kind");
2737 }
2738}
2739
2740struct RegInfo {
2742 RegisterKind Kind;
2743};
2744
2745static constexpr RegInfo RegularRegisters[] = {
2746 {{"v"}, IS_VGPR},
2747 {{"s"}, IS_SGPR},
2748 {{"ttmp"}, IS_TTMP},
2749 {{"acc"}, IS_AGPR},
2750 {{"a"}, IS_AGPR},
2751};
2752
2753static bool isRegularReg(RegisterKind Kind) {
2754 return Kind == IS_VGPR ||
2755 Kind == IS_SGPR ||
2756 Kind == IS_TTMP ||
2757 Kind == IS_AGPR;
2758}
2759
2761 for (const RegInfo &Reg : RegularRegisters)
2762 if (Str.starts_with(Reg.Name))
2763 return &Reg;
2764 return nullptr;
2765}
2766
2767static bool getRegNum(StringRef Str, unsigned& Num) {
2768 return !Str.getAsInteger(10, Num);
2769}
2770
2771bool
2772AMDGPUAsmParser::isRegister(const AsmToken &Token,
2773 const AsmToken &NextToken) const {
2774
2775 // A list of consecutive registers: [s0,s1,s2,s3]
2776 if (Token.is(AsmToken::LBrac))
2777 return true;
2778
2779 if (!Token.is(AsmToken::Identifier))
2780 return false;
2781
2782 // A single register like s0 or a range of registers like s[0:1]
2783
2784 StringRef Str = Token.getString();
2785 const RegInfo *Reg = getRegularRegInfo(Str);
2786 if (Reg) {
2787 StringRef RegName = Reg->Name;
2788 StringRef RegSuffix = Str.substr(RegName.size());
2789 if (!RegSuffix.empty()) {
2790 RegSuffix.consume_back(".l");
2791 RegSuffix.consume_back(".h");
2792 unsigned Num;
2793 // A single register with an index: rXX
2794 if (getRegNum(RegSuffix, Num))
2795 return true;
2796 } else {
2797 // A range of registers: r[XX:YY].
2798 if (NextToken.is(AsmToken::LBrac))
2799 return true;
2800 }
2801 }
2802
2803 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2804}
2805
2806bool
2807AMDGPUAsmParser::isRegister()
2808{
2809 return isRegister(getToken(), peekToken());
2810}
2811
2812unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2813 unsigned SubReg, unsigned RegWidth,
2814 SMLoc Loc) {
2815 assert(isRegularReg(RegKind));
2816
2817 unsigned AlignSize = 1;
2818 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2819 // SGPR and TTMP registers must be aligned.
2820 // Max required alignment is 4 dwords.
2821 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2822 }
2823
2824 if (RegNum % AlignSize != 0) {
2825 Error(Loc, "invalid register alignment");
2826 return AMDGPU::NoRegister;
2827 }
2828
2829 unsigned RegIdx = RegNum / AlignSize;
2830 int RCID = getRegClass(RegKind, RegWidth);
2831 if (RCID == -1) {
2832 Error(Loc, "invalid or unsupported register size");
2833 return AMDGPU::NoRegister;
2834 }
2835
2836 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2837 const MCRegisterClass RC = TRI->getRegClass(RCID);
2838 if (RegIdx >= RC.getNumRegs()) {
2839 Error(Loc, "register index is out of range");
2840 return AMDGPU::NoRegister;
2841 }
2842
2843 unsigned Reg = RC.getRegister(RegIdx);
2844
2845 if (SubReg) {
2846 Reg = TRI->getSubReg(Reg, SubReg);
2847
2848 // Currently all regular registers have their .l and .h subregisters, so
2849 // we should never need to generate an error here.
2850 assert(Reg && "Invalid subregister!");
2851 }
2852
2853 return Reg;
2854}
2855
2856bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2857 int64_t RegLo, RegHi;
2858 if (!skipToken(AsmToken::LBrac, "missing register index"))
2859 return false;
2860
2861 SMLoc FirstIdxLoc = getLoc();
2862 SMLoc SecondIdxLoc;
2863
2864 if (!parseExpr(RegLo))
2865 return false;
2866
2867 if (trySkipToken(AsmToken::Colon)) {
2868 SecondIdxLoc = getLoc();
2869 if (!parseExpr(RegHi))
2870 return false;
2871 } else {
2872 RegHi = RegLo;
2873 }
2874
2875 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2876 return false;
2877
2878 if (!isUInt<32>(RegLo)) {
2879 Error(FirstIdxLoc, "invalid register index");
2880 return false;
2881 }
2882
2883 if (!isUInt<32>(RegHi)) {
2884 Error(SecondIdxLoc, "invalid register index");
2885 return false;
2886 }
2887
2888 if (RegLo > RegHi) {
2889 Error(FirstIdxLoc, "first register index should not exceed second index");
2890 return false;
2891 }
2892
2893 Num = static_cast<unsigned>(RegLo);
2894 RegWidth = 32 * ((RegHi - RegLo) + 1);
2895 return true;
2896}
2897
2898unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2899 unsigned &RegNum, unsigned &RegWidth,
2900 SmallVectorImpl<AsmToken> &Tokens) {
2901 assert(isToken(AsmToken::Identifier));
2902 unsigned Reg = getSpecialRegForName(getTokenStr());
2903 if (Reg) {
2904 RegNum = 0;
2905 RegWidth = 32;
2906 RegKind = IS_SPECIAL;
2907 Tokens.push_back(getToken());
2908 lex(); // skip register name
2909 }
2910 return Reg;
2911}
2912
2913unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2914 unsigned &RegNum, unsigned &RegWidth,
2915 SmallVectorImpl<AsmToken> &Tokens) {
2916 assert(isToken(AsmToken::Identifier));
2917 StringRef RegName = getTokenStr();
2918 auto Loc = getLoc();
2919
2920 const RegInfo *RI = getRegularRegInfo(RegName);
2921 if (!RI) {
2922 Error(Loc, "invalid register name");
2923 return AMDGPU::NoRegister;
2924 }
2925
2926 Tokens.push_back(getToken());
2927 lex(); // skip register name
2928
2929 RegKind = RI->Kind;
2930 StringRef RegSuffix = RegName.substr(RI->Name.size());
2931 unsigned SubReg = NoSubRegister;
2932 if (!RegSuffix.empty()) {
2933 // We don't know the opcode till we are done parsing, so we don't know if
2934 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2935 // .h to correctly specify 16 bit registers. We also can't determine class
2936 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2937 if (RegSuffix.consume_back(".l"))
2938 SubReg = AMDGPU::lo16;
2939 else if (RegSuffix.consume_back(".h"))
2940 SubReg = AMDGPU::hi16;
2941
2942 // Single 32-bit register: vXX.
2943 if (!getRegNum(RegSuffix, RegNum)) {
2944 Error(Loc, "invalid register index");
2945 return AMDGPU::NoRegister;
2946 }
2947 RegWidth = 32;
2948 } else {
2949 // Range of registers: v[XX:YY]. ":YY" is optional.
2950 if (!ParseRegRange(RegNum, RegWidth))
2951 return AMDGPU::NoRegister;
2952 }
2953
2954 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2955}
2956
2957unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2958 unsigned &RegWidth,
2959 SmallVectorImpl<AsmToken> &Tokens) {
2960 unsigned Reg = AMDGPU::NoRegister;
2961 auto ListLoc = getLoc();
2962
2963 if (!skipToken(AsmToken::LBrac,
2964 "expected a register or a list of registers")) {
2965 return AMDGPU::NoRegister;
2966 }
2967
2968 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2969
2970 auto Loc = getLoc();
2971 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2972 return AMDGPU::NoRegister;
2973 if (RegWidth != 32) {
2974 Error(Loc, "expected a single 32-bit register");
2975 return AMDGPU::NoRegister;
2976 }
2977
2978 for (; trySkipToken(AsmToken::Comma); ) {
2979 RegisterKind NextRegKind;
2980 unsigned NextReg, NextRegNum, NextRegWidth;
2981 Loc = getLoc();
2982
2983 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2984 NextRegNum, NextRegWidth,
2985 Tokens)) {
2986 return AMDGPU::NoRegister;
2987 }
2988 if (NextRegWidth != 32) {
2989 Error(Loc, "expected a single 32-bit register");
2990 return AMDGPU::NoRegister;
2991 }
2992 if (NextRegKind != RegKind) {
2993 Error(Loc, "registers in a list must be of the same kind");
2994 return AMDGPU::NoRegister;
2995 }
2996 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2997 return AMDGPU::NoRegister;
2998 }
2999
3000 if (!skipToken(AsmToken::RBrac,
3001 "expected a comma or a closing square bracket")) {
3002 return AMDGPU::NoRegister;
3003 }
3004
3005 if (isRegularReg(RegKind))
3006 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3007
3008 return Reg;
3009}
3010
3011bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3012 unsigned &RegNum, unsigned &RegWidth,
3013 SmallVectorImpl<AsmToken> &Tokens) {
3014 auto Loc = getLoc();
3015 Reg = AMDGPU::NoRegister;
3016
3017 if (isToken(AsmToken::Identifier)) {
3018 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3019 if (Reg == AMDGPU::NoRegister)
3020 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3021 } else {
3022 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3023 }
3024
3025 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3026 if (Reg == AMDGPU::NoRegister) {
3027 assert(Parser.hasPendingError());
3028 return false;
3029 }
3030
3031 if (!subtargetHasRegister(*TRI, Reg)) {
3032 if (Reg == AMDGPU::SGPR_NULL) {
3033 Error(Loc, "'null' operand is not supported on this GPU");
3034 } else {
3035 Error(Loc, "register not available on this GPU");
3036 }
3037 return false;
3038 }
3039
3040 return true;
3041}
3042
3043bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3044 unsigned &RegNum, unsigned &RegWidth,
3045 bool RestoreOnFailure /*=false*/) {
3046 Reg = AMDGPU::NoRegister;
3047
3049 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3050 if (RestoreOnFailure) {
3051 while (!Tokens.empty()) {
3052 getLexer().UnLex(Tokens.pop_back_val());
3053 }
3054 }
3055 return true;
3056 }
3057 return false;
3058}
3059
3060std::optional<StringRef>
3061AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3062 switch (RegKind) {
3063 case IS_VGPR:
3064 return StringRef(".amdgcn.next_free_vgpr");
3065 case IS_SGPR:
3066 return StringRef(".amdgcn.next_free_sgpr");
3067 default:
3068 return std::nullopt;
3069 }
3070}
3071
3072void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3073 auto SymbolName = getGprCountSymbolName(RegKind);
3074 assert(SymbolName && "initializing invalid register kind");
3075 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3076 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3077}
3078
3079bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3080 unsigned DwordRegIndex,
3081 unsigned RegWidth) {
3082 // Symbols are only defined for GCN targets
3083 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3084 return true;
3085
3086 auto SymbolName = getGprCountSymbolName(RegKind);
3087 if (!SymbolName)
3088 return true;
3089 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3090
3091 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3092 int64_t OldCount;
3093
3094 if (!Sym->isVariable())
3095 return !Error(getLoc(),
3096 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3097 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3098 return !Error(
3099 getLoc(),
3100 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3101
3102 if (OldCount <= NewMax)
3103 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3104
3105 return true;
3106}
3107
3108std::unique_ptr<AMDGPUOperand>
3109AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3110 const auto &Tok = getToken();
3111 SMLoc StartLoc = Tok.getLoc();
3112 SMLoc EndLoc = Tok.getEndLoc();
3113 RegisterKind RegKind;
3114 unsigned Reg, RegNum, RegWidth;
3115
3116 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3117 return nullptr;
3118 }
3119 if (isHsaAbi(getSTI())) {
3120 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3121 return nullptr;
3122 } else
3123 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3124 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3125}
3126
3127ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3128 bool HasSP3AbsModifier, bool HasLit) {
3129 // TODO: add syntactic sugar for 1/(2*PI)
3130
3131 if (isRegister())
3132 return ParseStatus::NoMatch;
3133 assert(!isModifier());
3134
3135 if (!HasLit) {
3136 HasLit = trySkipId("lit");
3137 if (HasLit) {
3138 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3139 return ParseStatus::Failure;
3140 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3141 if (S.isSuccess() &&
3142 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3143 return ParseStatus::Failure;
3144 return S;
3145 }
3146 }
3147
3148 const auto& Tok = getToken();
3149 const auto& NextTok = peekToken();
3150 bool IsReal = Tok.is(AsmToken::Real);
3151 SMLoc S = getLoc();
3152 bool Negate = false;
3153
3154 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3155 lex();
3156 IsReal = true;
3157 Negate = true;
3158 }
3159
3160 AMDGPUOperand::Modifiers Mods;
3161 Mods.Lit = HasLit;
3162
3163 if (IsReal) {
3164 // Floating-point expressions are not supported.
3165 // Can only allow floating-point literals with an
3166 // optional sign.
3167
3168 StringRef Num = getTokenStr();
3169 lex();
3170
3171 APFloat RealVal(APFloat::IEEEdouble());
3172 auto roundMode = APFloat::rmNearestTiesToEven;
3173 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3174 return ParseStatus::Failure;
3175 if (Negate)
3176 RealVal.changeSign();
3177
3178 Operands.push_back(
3179 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3180 AMDGPUOperand::ImmTyNone, true));
3181 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3182 Op.setModifiers(Mods);
3183
3184 return ParseStatus::Success;
3185
3186 } else {
3187 int64_t IntVal;
3188 const MCExpr *Expr;
3189 SMLoc S = getLoc();
3190
3191 if (HasSP3AbsModifier) {
3192 // This is a workaround for handling expressions
3193 // as arguments of SP3 'abs' modifier, for example:
3194 // |1.0|
3195 // |-1|
3196 // |1+x|
3197 // This syntax is not compatible with syntax of standard
3198 // MC expressions (due to the trailing '|').
3199 SMLoc EndLoc;
3200 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3201 return ParseStatus::Failure;
3202 } else {
3203 if (Parser.parseExpression(Expr))
3204 return ParseStatus::Failure;
3205 }
3206
3207 if (Expr->evaluateAsAbsolute(IntVal)) {
3208 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3209 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3210 Op.setModifiers(Mods);
3211 } else {
3212 if (HasLit)
3213 return ParseStatus::NoMatch;
3214 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3215 }
3216
3217 return ParseStatus::Success;
3218 }
3219
3220 return ParseStatus::NoMatch;
3221}
3222
3223ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3224 if (!isRegister())
3225 return ParseStatus::NoMatch;
3226
3227 if (auto R = parseRegister()) {
3228 assert(R->isReg());
3229 Operands.push_back(std::move(R));
3230 return ParseStatus::Success;
3231 }
3232 return ParseStatus::Failure;
3233}
3234
3235ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3236 bool HasSP3AbsMod, bool HasLit) {
3237 ParseStatus Res = parseReg(Operands);
3238 if (!Res.isNoMatch())
3239 return Res;
3240 if (isModifier())
3241 return ParseStatus::NoMatch;
3242 return parseImm(Operands, HasSP3AbsMod, HasLit);
3243}
3244
3245bool
3246AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3247 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3248 const auto &str = Token.getString();
3249 return str == "abs" || str == "neg" || str == "sext";
3250 }
3251 return false;
3252}
3253
3254bool
3255AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3256 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3257}
3258
3259bool
3260AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3261 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3262}
3263
3264bool
3265AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3266 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3267}
3268
3269// Check if this is an operand modifier or an opcode modifier
3270// which may look like an expression but it is not. We should
3271// avoid parsing these modifiers as expressions. Currently
3272// recognized sequences are:
3273// |...|
3274// abs(...)
3275// neg(...)
3276// sext(...)
3277// -reg
3278// -|...|
3279// -abs(...)
3280// name:...
3281//
3282bool
3283AMDGPUAsmParser::isModifier() {
3284
3285 AsmToken Tok = getToken();
3286 AsmToken NextToken[2];
3287 peekTokens(NextToken);
3288
3289 return isOperandModifier(Tok, NextToken[0]) ||
3290 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3291 isOpcodeModifierWithVal(Tok, NextToken[0]);
3292}
3293
3294// Check if the current token is an SP3 'neg' modifier.
3295// Currently this modifier is allowed in the following context:
3296//
3297// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3298// 2. Before an 'abs' modifier: -abs(...)
3299// 3. Before an SP3 'abs' modifier: -|...|
3300//
3301// In all other cases "-" is handled as a part
3302// of an expression that follows the sign.
3303//
3304// Note: When "-" is followed by an integer literal,
3305// this is interpreted as integer negation rather
3306// than a floating-point NEG modifier applied to N.
3307// Beside being contr-intuitive, such use of floating-point
3308// NEG modifier would have resulted in different meaning
3309// of integer literals used with VOP1/2/C and VOP3,
3310// for example:
3311// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3312// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3313// Negative fp literals with preceding "-" are
3314// handled likewise for uniformity
3315//
3316bool
3317AMDGPUAsmParser::parseSP3NegModifier() {
3318
3319 AsmToken NextToken[2];
3320 peekTokens(NextToken);
3321
3322 if (isToken(AsmToken::Minus) &&
3323 (isRegister(NextToken[0], NextToken[1]) ||
3324 NextToken[0].is(AsmToken::Pipe) ||
3325 isId(NextToken[0], "abs"))) {
3326 lex();
3327 return true;
3328 }
3329
3330 return false;
3331}
3332
3334AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3335 bool AllowImm) {
3336 bool Neg, SP3Neg;
3337 bool Abs, SP3Abs;
3338 bool Lit;
3339 SMLoc Loc;
3340
3341 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3342 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3343 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3344
3345 SP3Neg = parseSP3NegModifier();
3346
3347 Loc = getLoc();
3348 Neg = trySkipId("neg");
3349 if (Neg && SP3Neg)
3350 return Error(Loc, "expected register or immediate");
3351 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3352 return ParseStatus::Failure;
3353
3354 Abs = trySkipId("abs");
3355 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3356 return ParseStatus::Failure;
3357
3358 Lit = trySkipId("lit");
3359 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3360 return ParseStatus::Failure;
3361
3362 Loc = getLoc();
3363 SP3Abs = trySkipToken(AsmToken::Pipe);
3364 if (Abs && SP3Abs)
3365 return Error(Loc, "expected register or immediate");
3366
3367 ParseStatus Res;
3368 if (AllowImm) {
3369 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3370 } else {
3371 Res = parseReg(Operands);
3372 }
3373 if (!Res.isSuccess())
3374 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3375
3376 if (Lit && !Operands.back()->isImm())
3377 Error(Loc, "expected immediate with lit modifier");
3378
3379 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3380 return ParseStatus::Failure;
3381 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3382 return ParseStatus::Failure;
3383 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3384 return ParseStatus::Failure;
3385 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3386 return ParseStatus::Failure;
3387
3388 AMDGPUOperand::Modifiers Mods;
3389 Mods.Abs = Abs || SP3Abs;
3390 Mods.Neg = Neg || SP3Neg;
3391 Mods.Lit = Lit;
3392
3393 if (Mods.hasFPModifiers() || Lit) {
3394 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3395 if (Op.isExpr())
3396 return Error(Op.getStartLoc(), "expected an absolute expression");
3397 Op.setModifiers(Mods);
3398 }
3399 return ParseStatus::Success;
3400}
3401
3403AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3404 bool AllowImm) {
3405 bool Sext = trySkipId("sext");
3406 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3407 return ParseStatus::Failure;
3408
3409 ParseStatus Res;
3410 if (AllowImm) {
3411 Res = parseRegOrImm(Operands);
3412 } else {
3413 Res = parseReg(Operands);
3414 }
3415 if (!Res.isSuccess())
3416 return Sext ? ParseStatus::Failure : Res;
3417
3418 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3419 return ParseStatus::Failure;
3420
3421 AMDGPUOperand::Modifiers Mods;
3422 Mods.Sext = Sext;
3423
3424 if (Mods.hasIntModifiers()) {
3425 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3426 if (Op.isExpr())
3427 return Error(Op.getStartLoc(), "expected an absolute expression");
3428 Op.setModifiers(Mods);
3429 }
3430
3431 return ParseStatus::Success;
3432}
3433
3434ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3435 return parseRegOrImmWithFPInputMods(Operands, false);
3436}
3437
3438ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3439 return parseRegOrImmWithIntInputMods(Operands, false);
3440}
3441
3442ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3443 auto Loc = getLoc();
3444 if (trySkipId("off")) {
3445 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3446 AMDGPUOperand::ImmTyOff, false));
3447 return ParseStatus::Success;
3448 }
3449
3450 if (!isRegister())
3451 return ParseStatus::NoMatch;
3452
3453 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3454 if (Reg) {
3455 Operands.push_back(std::move(Reg));
3456 return ParseStatus::Success;
3457 }
3458
3459 return ParseStatus::Failure;
3460}
3461
3462unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3463 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3464
3465 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3466 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3467 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3468 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3469 return Match_InvalidOperand;
3470
3471 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3472 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3473 // v_mac_f32/16 allow only dst_sel == DWORD;
3474 auto OpNum =
3475 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3476 const auto &Op = Inst.getOperand(OpNum);
3477 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3478 return Match_InvalidOperand;
3479 }
3480 }
3481
3482 return Match_Success;
3483}
3484
3486 static const unsigned Variants[] = {
3490 };
3491
3492 return ArrayRef(Variants);
3493}
3494
3495// What asm variants we should check
3496ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3497 if (isForcedDPP() && isForcedVOP3()) {
3498 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3499 return ArrayRef(Variants);
3500 }
3501 if (getForcedEncodingSize() == 32) {
3502 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3503 return ArrayRef(Variants);
3504 }
3505
3506 if (isForcedVOP3()) {
3507 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3508 return ArrayRef(Variants);
3509 }
3510
3511 if (isForcedSDWA()) {
3512 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3514 return ArrayRef(Variants);
3515 }
3516
3517 if (isForcedDPP()) {
3518 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3519 return ArrayRef(Variants);
3520 }
3521
3522 return getAllVariants();
3523}
3524
3525StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3526 if (isForcedDPP() && isForcedVOP3())
3527 return "e64_dpp";
3528
3529 if (getForcedEncodingSize() == 32)
3530 return "e32";
3531
3532 if (isForcedVOP3())
3533 return "e64";
3534
3535 if (isForcedSDWA())
3536 return "sdwa";
3537
3538 if (isForcedDPP())
3539 return "dpp";
3540
3541 return "";
3542}
3543
3544unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3545 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3546 for (MCPhysReg Reg : Desc.implicit_uses()) {
3547 switch (Reg) {
3548 case AMDGPU::FLAT_SCR:
3549 case AMDGPU::VCC:
3550 case AMDGPU::VCC_LO:
3551 case AMDGPU::VCC_HI:
3552 case AMDGPU::M0:
3553 return Reg;
3554 default:
3555 break;
3556 }
3557 }
3558 return AMDGPU::NoRegister;
3559}
3560
3561// NB: This code is correct only when used to check constant
3562// bus limitations because GFX7 support no f16 inline constants.
3563// Note that there are no cases when a GFX7 opcode violates
3564// constant bus limitations due to the use of an f16 constant.
3565bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3566 unsigned OpIdx) const {
3567 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3568
3569 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3570 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3571 return false;
3572 }
3573
3574 const MCOperand &MO = Inst.getOperand(OpIdx);
3575
3576 int64_t Val = MO.getImm();
3577 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3578
3579 switch (OpSize) { // expected operand size
3580 case 8:
3581 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3582 case 4:
3583 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3584 case 2: {
3585 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3589 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3590
3595
3600
3605
3610 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3611
3616 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3617
3618 llvm_unreachable("invalid operand type");
3619 }
3620 default:
3621 llvm_unreachable("invalid operand size");
3622 }
3623}
3624
3625unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3626 if (!isGFX10Plus())
3627 return 1;
3628
3629 switch (Opcode) {
3630 // 64-bit shift instructions can use only one scalar value input
3631 case AMDGPU::V_LSHLREV_B64_e64:
3632 case AMDGPU::V_LSHLREV_B64_gfx10:
3633 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3634 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3635 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3636 case AMDGPU::V_LSHRREV_B64_e64:
3637 case AMDGPU::V_LSHRREV_B64_gfx10:
3638 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3639 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3640 case AMDGPU::V_ASHRREV_I64_e64:
3641 case AMDGPU::V_ASHRREV_I64_gfx10:
3642 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3643 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3644 case AMDGPU::V_LSHL_B64_e64:
3645 case AMDGPU::V_LSHR_B64_e64:
3646 case AMDGPU::V_ASHR_I64_e64:
3647 return 1;
3648 default:
3649 return 2;
3650 }
3651}
3652
3653constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3655
3656// Get regular operand indices in the same order as specified
3657// in the instruction (but append mandatory literals to the end).
3659 bool AddMandatoryLiterals = false) {
3660
3661 int16_t ImmIdx =
3662 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3663
3664 if (isVOPD(Opcode)) {
3665 int16_t ImmDeferredIdx =
3666 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3667 : -1;
3668
3669 return {getNamedOperandIdx(Opcode, OpName::src0X),
3670 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3671 getNamedOperandIdx(Opcode, OpName::src0Y),
3672 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3673 ImmDeferredIdx,
3674 ImmIdx};
3675 }
3676
3677 return {getNamedOperandIdx(Opcode, OpName::src0),
3678 getNamedOperandIdx(Opcode, OpName::src1),
3679 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3680}
3681
3682bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3683 const MCOperand &MO = Inst.getOperand(OpIdx);
3684 if (MO.isImm()) {
3685 return !isInlineConstant(Inst, OpIdx);
3686 } else if (MO.isReg()) {
3687 auto Reg = MO.getReg();
3688 if (!Reg) {
3689 return false;
3690 }
3691 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3692 auto PReg = mc2PseudoReg(Reg);
3693 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3694 } else {
3695 return true;
3696 }
3697}
3698
3699// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3700// Writelane is special in that it can use SGPR and M0 (which would normally
3701// count as using the constant bus twice - but in this case it is allowed since
3702// the lane selector doesn't count as a use of the constant bus). However, it is
3703// still required to abide by the 1 SGPR rule.
3704static bool checkWriteLane(const MCInst &Inst) {
3705 const unsigned Opcode = Inst.getOpcode();
3706 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3707 return false;
3708 const MCOperand &LaneSelOp = Inst.getOperand(2);
3709 if (!LaneSelOp.isReg())
3710 return false;
3711 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3712 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3713}
3714
3715bool AMDGPUAsmParser::validateConstantBusLimitations(
3716 const MCInst &Inst, const OperandVector &Operands) {
3717 const unsigned Opcode = Inst.getOpcode();
3718 const MCInstrDesc &Desc = MII.get(Opcode);
3719 unsigned LastSGPR = AMDGPU::NoRegister;
3720 unsigned ConstantBusUseCount = 0;
3721 unsigned NumLiterals = 0;
3722 unsigned LiteralSize;
3723
3724 if (!(Desc.TSFlags &
3727 !isVOPD(Opcode))
3728 return true;
3729
3730 if (checkWriteLane(Inst))
3731 return true;
3732
3733 // Check special imm operands (used by madmk, etc)
3734 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3735 ++NumLiterals;
3736 LiteralSize = 4;
3737 }
3738
3739 SmallDenseSet<unsigned> SGPRsUsed;
3740 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3741 if (SGPRUsed != AMDGPU::NoRegister) {
3742 SGPRsUsed.insert(SGPRUsed);
3743 ++ConstantBusUseCount;
3744 }
3745
3746 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3747
3748 for (int OpIdx : OpIndices) {
3749 if (OpIdx == -1)
3750 continue;
3751
3752 const MCOperand &MO = Inst.getOperand(OpIdx);
3753 if (usesConstantBus(Inst, OpIdx)) {
3754 if (MO.isReg()) {
3755 LastSGPR = mc2PseudoReg(MO.getReg());
3756 // Pairs of registers with a partial intersections like these
3757 // s0, s[0:1]
3758 // flat_scratch_lo, flat_scratch
3759 // flat_scratch_lo, flat_scratch_hi
3760 // are theoretically valid but they are disabled anyway.
3761 // Note that this code mimics SIInstrInfo::verifyInstruction
3762 if (SGPRsUsed.insert(LastSGPR).second) {
3763 ++ConstantBusUseCount;
3764 }
3765 } else { // Expression or a literal
3766
3767 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3768 continue; // special operand like VINTERP attr_chan
3769
3770 // An instruction may use only one literal.
3771 // This has been validated on the previous step.
3772 // See validateVOPLiteral.
3773 // This literal may be used as more than one operand.
3774 // If all these operands are of the same size,
3775 // this literal counts as one scalar value.
3776 // Otherwise it counts as 2 scalar values.
3777 // See "GFX10 Shader Programming", section 3.6.2.3.
3778
3779 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3780 if (Size < 4)
3781 Size = 4;
3782
3783 if (NumLiterals == 0) {
3784 NumLiterals = 1;
3785 LiteralSize = Size;
3786 } else if (LiteralSize != Size) {
3787 NumLiterals = 2;
3788 }
3789 }
3790 }
3791 }
3792 ConstantBusUseCount += NumLiterals;
3793
3794 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3795 return true;
3796
3797 SMLoc LitLoc = getLitLoc(Operands);
3798 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3799 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3800 Error(Loc, "invalid operand (violates constant bus restrictions)");
3801 return false;
3802}
3803
3804bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3805 const MCInst &Inst, const OperandVector &Operands) {
3806
3807 const unsigned Opcode = Inst.getOpcode();
3808 if (!isVOPD(Opcode))
3809 return true;
3810
3811 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3812
3813 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3814 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3815 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3816 ? Opr.getReg()
3818 };
3819
3820 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3821 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3822
3823 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3824 auto InvalidCompOprIdx =
3825 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3826 if (!InvalidCompOprIdx)
3827 return true;
3828
3829 auto CompOprIdx = *InvalidCompOprIdx;
3830 auto ParsedIdx =
3831 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3832 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3833 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3834
3835 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3836 if (CompOprIdx == VOPD::Component::DST) {
3837 Error(Loc, "one dst register must be even and the other odd");
3838 } else {
3839 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3840 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3841 " operands must use different VGPR banks");
3842 }
3843
3844 return false;
3845}
3846
3847bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3848
3849 const unsigned Opc = Inst.getOpcode();
3850 const MCInstrDesc &Desc = MII.get(Opc);
3851
3852 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3853 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3854 assert(ClampIdx != -1);
3855 return Inst.getOperand(ClampIdx).getImm() == 0;
3856 }
3857
3858 return true;
3859}
3860
3863
3864bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3865 const SMLoc &IDLoc) {
3866
3867 const unsigned Opc = Inst.getOpcode();
3868 const MCInstrDesc &Desc = MII.get(Opc);
3869
3870 if ((Desc.TSFlags & MIMGFlags) == 0)
3871 return true;
3872
3873 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3874 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3875 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3876
3877 assert(VDataIdx != -1);
3878
3879 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3880 return true;
3881
3882 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3883 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3884 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3885 if (DMask == 0)
3886 DMask = 1;
3887
3888 bool IsPackedD16 = false;
3889 unsigned DataSize =
3890 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3891 if (hasPackedD16()) {
3892 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3893 IsPackedD16 = D16Idx >= 0;
3894 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3895 DataSize = (DataSize + 1) / 2;
3896 }
3897
3898 if ((VDataSize / 4) == DataSize + TFESize)
3899 return true;
3900
3901 StringRef Modifiers;
3902 if (isGFX90A())
3903 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3904 else
3905 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3906
3907 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3908 return false;
3909}
3910
3911bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3912 const SMLoc &IDLoc) {
3913 const unsigned Opc = Inst.getOpcode();
3914 const MCInstrDesc &Desc = MII.get(Opc);
3915
3916 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3917 return true;
3918
3920
3921 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3923 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3924 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3925 : AMDGPU::OpName::rsrc;
3926 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3927 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3928 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3929
3930 assert(VAddr0Idx != -1);
3931 assert(SrsrcIdx != -1);
3932 assert(SrsrcIdx > VAddr0Idx);
3933
3934 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3935 if (BaseOpcode->BVH) {
3936 if (IsA16 == BaseOpcode->A16)
3937 return true;
3938 Error(IDLoc, "image address size does not match a16");
3939 return false;
3940 }
3941
3942 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3944 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3945 unsigned ActualAddrSize =
3946 IsNSA ? SrsrcIdx - VAddr0Idx
3947 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3948
3949 unsigned ExpectedAddrSize =
3950 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3951
3952 if (IsNSA) {
3953 if (hasPartialNSAEncoding() &&
3954 ExpectedAddrSize >
3956 int VAddrLastIdx = SrsrcIdx - 1;
3957 unsigned VAddrLastSize =
3958 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3959
3960 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3961 }
3962 } else {
3963 if (ExpectedAddrSize > 12)
3964 ExpectedAddrSize = 16;
3965
3966 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3967 // This provides backward compatibility for assembly created
3968 // before 160b/192b/224b types were directly supported.
3969 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3970 return true;
3971 }
3972
3973 if (ActualAddrSize == ExpectedAddrSize)
3974 return true;
3975
3976 Error(IDLoc, "image address size does not match dim and a16");
3977 return false;
3978}
3979
3980bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3981
3982 const unsigned Opc = Inst.getOpcode();
3983 const MCInstrDesc &Desc = MII.get(Opc);
3984
3985 if ((Desc.TSFlags & MIMGFlags) == 0)
3986 return true;
3987 if (!Desc.mayLoad() || !Desc.mayStore())
3988 return true; // Not atomic
3989
3990 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3991 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3992
3993 // This is an incomplete check because image_atomic_cmpswap
3994 // may only use 0x3 and 0xf while other atomic operations
3995 // may use 0x1 and 0x3. However these limitations are
3996 // verified when we check that dmask matches dst size.
3997 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3998}
3999
4000bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4001
4002 const unsigned Opc = Inst.getOpcode();
4003 const MCInstrDesc &Desc = MII.get(Opc);
4004
4005 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4006 return true;
4007
4008 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4009 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4010
4011 // GATHER4 instructions use dmask in a different fashion compared to
4012 // other MIMG instructions. The only useful DMASK values are
4013 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4014 // (red,red,red,red) etc.) The ISA document doesn't mention
4015 // this.
4016 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4017}
4018
4019bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4020 const unsigned Opc = Inst.getOpcode();
4021 const MCInstrDesc &Desc = MII.get(Opc);
4022
4023 if ((Desc.TSFlags & MIMGFlags) == 0)
4024 return true;
4025
4027 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4029
4030 if (!BaseOpcode->MSAA)
4031 return true;
4032
4033 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4034 assert(DimIdx != -1);
4035
4036 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4038
4039 return DimInfo->MSAA;
4040}
4041
4042static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4043{
4044 switch (Opcode) {
4045 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4046 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4047 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4048 return true;
4049 default:
4050 return false;
4051 }
4052}
4053
4054// movrels* opcodes should only allow VGPRS as src0.
4055// This is specified in .td description for vop1/vop3,
4056// but sdwa is handled differently. See isSDWAOperand.
4057bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4058 const OperandVector &Operands) {
4059
4060 const unsigned Opc = Inst.getOpcode();
4061 const MCInstrDesc &Desc = MII.get(Opc);
4062
4063 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4064 return true;
4065
4066 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4067 assert(Src0Idx != -1);
4068
4069 SMLoc ErrLoc;
4070 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4071 if (Src0.isReg()) {
4072 auto Reg = mc2PseudoReg(Src0.getReg());
4073 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4074 if (!isSGPR(Reg, TRI))
4075 return true;
4076 ErrLoc = getRegLoc(Reg, Operands);
4077 } else {
4078 ErrLoc = getConstLoc(Operands);
4079 }
4080
4081 Error(ErrLoc, "source operand must be a VGPR");
4082 return false;
4083}
4084
4085bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4086 const OperandVector &Operands) {
4087
4088 const unsigned Opc = Inst.getOpcode();
4089
4090 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4091 return true;
4092
4093 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4094 assert(Src0Idx != -1);
4095
4096 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4097 if (!Src0.isReg())
4098 return true;
4099
4100 auto Reg = mc2PseudoReg(Src0.getReg());
4101 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4102 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4103 Error(getRegLoc(Reg, Operands),
4104 "source operand must be either a VGPR or an inline constant");
4105 return false;
4106 }
4107
4108 return true;
4109}
4110
4111bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4112 const OperandVector &Operands) {
4113 unsigned Opcode = Inst.getOpcode();
4114 const MCInstrDesc &Desc = MII.get(Opcode);
4115
4116 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4117 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4118 return true;
4119
4120 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4121 if (Src2Idx == -1)
4122 return true;
4123
4124 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4125 Error(getConstLoc(Operands),
4126 "inline constants are not allowed for this operand");
4127 return false;
4128 }
4129
4130 return true;
4131}
4132
4133bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4134 const OperandVector &Operands) {
4135 const unsigned Opc = Inst.getOpcode();
4136 const MCInstrDesc &Desc = MII.get(Opc);
4137
4138 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4139 return true;
4140
4141 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4142 if (Src2Idx == -1)
4143 return true;
4144
4145 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4146 if (!Src2.isReg())
4147 return true;
4148
4149 MCRegister Src2Reg = Src2.getReg();
4150 MCRegister DstReg = Inst.getOperand(0).getReg();
4151 if (Src2Reg == DstReg)
4152 return true;
4153
4154 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4155 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4156 return true;
4157
4158 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4159 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4160 "source 2 operand must not partially overlap with dst");
4161 return false;
4162 }
4163
4164 return true;
4165}
4166
4167bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4168 switch (Inst.getOpcode()) {
4169 default:
4170 return true;
4171 case V_DIV_SCALE_F32_gfx6_gfx7:
4172 case V_DIV_SCALE_F32_vi:
4173 case V_DIV_SCALE_F32_gfx10:
4174 case V_DIV_SCALE_F64_gfx6_gfx7:
4175 case V_DIV_SCALE_F64_vi:
4176 case V_DIV_SCALE_F64_gfx10:
4177 break;
4178 }
4179
4180 // TODO: Check that src0 = src1 or src2.
4181
4182 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4183 AMDGPU::OpName::src2_modifiers,
4184 AMDGPU::OpName::src2_modifiers}) {
4186 .getImm() &
4188 return false;
4189 }
4190 }
4191
4192 return true;
4193}
4194
4195bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4196
4197 const unsigned Opc = Inst.getOpcode();
4198 const MCInstrDesc &Desc = MII.get(Opc);
4199
4200 if ((Desc.TSFlags & MIMGFlags) == 0)
4201 return true;
4202
4203 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4204 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4205 if (isCI() || isSI())
4206 return false;
4207 }
4208
4209 return true;
4210}
4211
4212static bool IsRevOpcode(const unsigned Opcode)
4213{
4214 switch (Opcode) {
4215 case AMDGPU::V_SUBREV_F32_e32:
4216 case AMDGPU::V_SUBREV_F32_e64:
4217 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4218 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4219 case AMDGPU::V_SUBREV_F32_e32_vi:
4220 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4221 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4222 case AMDGPU::V_SUBREV_F32_e64_vi:
4223
4224 case AMDGPU::V_SUBREV_CO_U32_e32:
4225 case AMDGPU::V_SUBREV_CO_U32_e64:
4226 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4227 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4228
4229 case AMDGPU::V_SUBBREV_U32_e32:
4230 case AMDGPU::V_SUBBREV_U32_e64:
4231 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4232 case AMDGPU::V_SUBBREV_U32_e32_vi:
4233 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4234 case AMDGPU::V_SUBBREV_U32_e64_vi:
4235
4236 case AMDGPU::V_SUBREV_U32_e32:
4237 case AMDGPU::V_SUBREV_U32_e64:
4238 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4239 case AMDGPU::V_SUBREV_U32_e32_vi:
4240 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4241 case AMDGPU::V_SUBREV_U32_e64_vi:
4242
4243 case AMDGPU::V_SUBREV_F16_e32:
4244 case AMDGPU::V_SUBREV_F16_e64:
4245 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4246 case AMDGPU::V_SUBREV_F16_e32_vi:
4247 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4248 case AMDGPU::V_SUBREV_F16_e64_vi:
4249
4250 case AMDGPU::V_SUBREV_U16_e32:
4251 case AMDGPU::V_SUBREV_U16_e64:
4252 case AMDGPU::V_SUBREV_U16_e32_vi:
4253 case AMDGPU::V_SUBREV_U16_e64_vi:
4254
4255 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4256 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4257 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4258
4259 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4260 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4261
4262 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4263 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4264
4265 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4266 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4267
4268 case AMDGPU::V_LSHRREV_B32_e32:
4269 case AMDGPU::V_LSHRREV_B32_e64:
4270 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4271 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4272 case AMDGPU::V_LSHRREV_B32_e32_vi:
4273 case AMDGPU::V_LSHRREV_B32_e64_vi:
4274 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4275 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4276
4277 case AMDGPU::V_ASHRREV_I32_e32:
4278 case AMDGPU::V_ASHRREV_I32_e64:
4279 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4280 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4281 case AMDGPU::V_ASHRREV_I32_e32_vi:
4282 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4283 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4284 case AMDGPU::V_ASHRREV_I32_e64_vi:
4285
4286 case AMDGPU::V_LSHLREV_B32_e32:
4287 case AMDGPU::V_LSHLREV_B32_e64:
4288 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4289 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4290 case AMDGPU::V_LSHLREV_B32_e32_vi:
4291 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4292 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4293 case AMDGPU::V_LSHLREV_B32_e64_vi:
4294
4295 case AMDGPU::V_LSHLREV_B16_e32:
4296 case AMDGPU::V_LSHLREV_B16_e64:
4297 case AMDGPU::V_LSHLREV_B16_e32_vi:
4298 case AMDGPU::V_LSHLREV_B16_e64_vi:
4299 case AMDGPU::V_LSHLREV_B16_gfx10:
4300
4301 case AMDGPU::V_LSHRREV_B16_e32:
4302 case AMDGPU::V_LSHRREV_B16_e64:
4303 case AMDGPU::V_LSHRREV_B16_e32_vi:
4304 case AMDGPU::V_LSHRREV_B16_e64_vi:
4305 case AMDGPU::V_LSHRREV_B16_gfx10:
4306
4307 case AMDGPU::V_ASHRREV_I16_e32:
4308 case AMDGPU::V_ASHRREV_I16_e64:
4309 case AMDGPU::V_ASHRREV_I16_e32_vi:
4310 case AMDGPU::V_ASHRREV_I16_e64_vi:
4311 case AMDGPU::V_ASHRREV_I16_gfx10:
4312
4313 case AMDGPU::V_LSHLREV_B64_e64:
4314 case AMDGPU::V_LSHLREV_B64_gfx10:
4315 case AMDGPU::V_LSHLREV_B64_vi:
4316
4317 case AMDGPU::V_LSHRREV_B64_e64:
4318 case AMDGPU::V_LSHRREV_B64_gfx10:
4319 case AMDGPU::V_LSHRREV_B64_vi:
4320
4321 case AMDGPU::V_ASHRREV_I64_e64:
4322 case AMDGPU::V_ASHRREV_I64_gfx10:
4323 case AMDGPU::V_ASHRREV_I64_vi:
4324
4325 case AMDGPU::V_PK_LSHLREV_B16:
4326 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4327 case AMDGPU::V_PK_LSHLREV_B16_vi:
4328
4329 case AMDGPU::V_PK_LSHRREV_B16:
4330 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4331 case AMDGPU::V_PK_LSHRREV_B16_vi:
4332 case AMDGPU::V_PK_ASHRREV_I16:
4333 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4334 case AMDGPU::V_PK_ASHRREV_I16_vi:
4335 return true;
4336 default:
4337 return false;
4338 }
4339}
4340
4341std::optional<StringRef>
4342AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4343
4344 using namespace SIInstrFlags;
4345 const unsigned Opcode = Inst.getOpcode();
4346 const MCInstrDesc &Desc = MII.get(Opcode);
4347
4348 // lds_direct register is defined so that it can be used
4349 // with 9-bit operands only. Ignore encodings which do not accept these.
4350 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4351 if ((Desc.TSFlags & Enc) == 0)
4352 return std::nullopt;
4353
4354 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4355 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4356 if (SrcIdx == -1)
4357 break;
4358 const auto &Src = Inst.getOperand(SrcIdx);
4359 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4360
4361 if (isGFX90A() || isGFX11Plus())
4362 return StringRef("lds_direct is not supported on this GPU");
4363
4364 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4365 return StringRef("lds_direct cannot be used with this instruction");
4366
4367 if (SrcName != OpName::src0)
4368 return StringRef("lds_direct may be used as src0 only");
4369 }
4370 }
4371
4372 return std::nullopt;
4373}
4374
4375SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4376 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4377 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4378 if (Op.isFlatOffset())
4379 return Op.getStartLoc();
4380 }
4381 return getLoc();
4382}
4383
4384bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4385 const OperandVector &Operands) {
4386 auto Opcode = Inst.getOpcode();
4387 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4388 if (OpNum == -1)
4389 return true;
4390
4391 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4392 if ((TSFlags & SIInstrFlags::FLAT))
4393 return validateFlatOffset(Inst, Operands);
4394
4395 if ((TSFlags & SIInstrFlags::SMRD))
4396 return validateSMEMOffset(Inst, Operands);
4397
4398 const auto &Op = Inst.getOperand(OpNum);
4399 if (isGFX12Plus() &&
4400 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4401 const unsigned OffsetSize = 24;
4402 if (!isIntN(OffsetSize, Op.getImm())) {
4403 Error(getFlatOffsetLoc(Operands),
4404 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4405 return false;
4406 }
4407 } else {
4408 const unsigned OffsetSize = 16;
4409 if (!isUIntN(OffsetSize, Op.getImm())) {
4410 Error(getFlatOffsetLoc(Operands),
4411 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4412 return false;
4413 }
4414 }
4415 return true;
4416}
4417
4418bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4419 const OperandVector &Operands) {
4420 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4421 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4422 return true;
4423
4424 auto Opcode = Inst.getOpcode();
4425 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4426 assert(OpNum != -1);
4427
4428 const auto &Op = Inst.getOperand(OpNum);
4429 if (!hasFlatOffsets() && Op.getImm() != 0) {
4430 Error(getFlatOffsetLoc(Operands),
4431 "flat offset modifier is not supported on this GPU");
4432 return false;
4433 }
4434
4435 // For pre-GFX12 FLAT instructions the offset must be positive;
4436 // MSB is ignored and forced to zero.
4437 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4438 bool AllowNegative =
4440 isGFX12Plus();
4441 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4442 Error(getFlatOffsetLoc(Operands),
4443 Twine("expected a ") +
4444 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4445 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4446 return false;
4447 }
4448
4449 return true;
4450}
4451
4452SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4453 // Start with second operand because SMEM Offset cannot be dst or src0.
4454 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4455 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4456 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4457 return Op.getStartLoc();
4458 }
4459 return getLoc();
4460}
4461
4462bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4463 const OperandVector &Operands) {
4464 if (isCI() || isSI())
4465 return true;
4466
4467 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4468 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4469 return true;
4470
4471 auto Opcode = Inst.getOpcode();
4472 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4473 if (OpNum == -1)
4474 return true;
4475
4476 const auto &Op = Inst.getOperand(OpNum);
4477 if (!Op.isImm())
4478 return true;
4479
4480 uint64_t Offset = Op.getImm();
4481 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4484 return true;
4485
4486 Error(getSMEMOffsetLoc(Operands),
4487 isGFX12Plus() ? "expected a 24-bit signed offset"
4488 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4489 : "expected a 21-bit signed offset");
4490
4491 return false;
4492}
4493
4494bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4495 unsigned Opcode = Inst.getOpcode();
4496 const MCInstrDesc &Desc = MII.get(Opcode);
4497 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4498 return true;
4499
4500 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4501 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4502
4503 const int OpIndices[] = { Src0Idx, Src1Idx };
4504
4505 unsigned NumExprs = 0;
4506 unsigned NumLiterals = 0;
4508
4509 for (int OpIdx : OpIndices) {
4510 if (OpIdx == -1) break;
4511
4512 const MCOperand &MO = Inst.getOperand(OpIdx);
4513 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4514 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4515 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4516 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4517 if (NumLiterals == 0 || LiteralValue != Value) {
4519 ++NumLiterals;
4520 }
4521 } else if (MO.isExpr()) {
4522 ++NumExprs;
4523 }
4524 }
4525 }
4526
4527 return NumLiterals + NumExprs <= 1;
4528}
4529
4530bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4531 const unsigned Opc = Inst.getOpcode();
4532 if (isPermlane16(Opc)) {
4533 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4534 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4535
4536 if (OpSel & ~3)
4537 return false;
4538 }
4539
4540 uint64_t TSFlags = MII.get(Opc).TSFlags;
4541
4542 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4543 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4544 if (OpSelIdx != -1) {
4545 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4546 return false;
4547 }
4548 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4549 if (OpSelHiIdx != -1) {
4550 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4551 return false;
4552 }
4553 }
4554
4555 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4556 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4557 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4559 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4560 if (OpSel & 3)
4561 return false;
4562 }
4563
4564 return true;
4565}
4566
4567bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4568 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4569
4570 const unsigned Opc = Inst.getOpcode();
4571 uint64_t TSFlags = MII.get(Opc).TSFlags;
4572
4573 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4574 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4575 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4576 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4577 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4578 !(TSFlags & SIInstrFlags::IsSWMMAC))
4579 return true;
4580
4581 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4582 if (NegIdx == -1)
4583 return true;
4584
4585 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4586
4587 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4588 // on some src operands but not allowed on other.
4589 // It is convenient that such instructions don't have src_modifiers operand
4590 // for src operands that don't allow neg because they also don't allow opsel.
4591
4592 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4593 AMDGPU::OpName::src1_modifiers,
4594 AMDGPU::OpName::src2_modifiers};
4595
4596 for (unsigned i = 0; i < 3; ++i) {
4597 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4598 if (Neg & (1 << i))
4599 return false;
4600 }
4601 }
4602
4603 return true;
4604}
4605
4606bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4607 const OperandVector &Operands) {
4608 const unsigned Opc = Inst.getOpcode();
4609 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4610 if (DppCtrlIdx >= 0) {
4611 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4612
4613 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4614 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4615 // DP ALU DPP is supported for row_newbcast only on GFX9*
4616 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4617 Error(S, "DP ALU dpp only supports row_newbcast");
4618 return false;
4619 }
4620 }
4621
4622 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4623 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4624
4625 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4626 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4627 if (Src1Idx >= 0) {
4628 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4629 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4630 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4631 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4632 SMLoc S = getRegLoc(Reg, Operands);
4633 Error(S, "invalid operand for instruction");
4634 return false;
4635 }
4636 if (Src1.isImm()) {
4637 Error(getInstLoc(Operands),
4638 "src1 immediate operand invalid for instruction");
4639 return false;
4640 }
4641 }
4642 }
4643
4644 return true;
4645}
4646
4647// Check if VCC register matches wavefront size
4648bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4649 auto FB = getFeatureBits();
4650 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4651 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4652}
4653
4654// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4655bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4656 const OperandVector &Operands) {
4657 unsigned Opcode = Inst.getOpcode();
4658 const MCInstrDesc &Desc = MII.get(Opcode);
4659 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4660 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4661 !HasMandatoryLiteral && !isVOPD(Opcode))
4662 return true;
4663
4664 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4665
4666 unsigned NumExprs = 0;
4667 unsigned NumLiterals = 0;
4669
4670 for (int OpIdx : OpIndices) {
4671 if (OpIdx == -1)
4672 continue;
4673
4674 const MCOperand &MO = Inst.getOperand(OpIdx);
4675 if (!MO.isImm() && !MO.isExpr())
4676 continue;
4677 if (!isSISrcOperand(Desc, OpIdx))
4678 continue;
4679
4680 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4681 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4682 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4683 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4684 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4685
4686 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4687 Error(getLitLoc(Operands), "invalid operand for instruction");
4688 return false;
4689 }
4690
4691 if (IsFP64 && IsValid32Op)
4692 Value = Hi_32(Value);
4693
4694 if (NumLiterals == 0 || LiteralValue != Value) {
4696 ++NumLiterals;
4697 }
4698 } else if (MO.isExpr()) {
4699 ++NumExprs;
4700 }
4701 }
4702 NumLiterals += NumExprs;
4703
4704 if (!NumLiterals)
4705 return true;
4706
4707 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4708 Error(getLitLoc(Operands), "literal operands are not supported");
4709 return false;
4710 }
4711
4712 if (NumLiterals > 1) {
4713 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4714 return false;
4715 }
4716
4717 return true;
4718}
4719
4720// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4721static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4722 const MCRegisterInfo *MRI) {
4723 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4724 if (OpIdx < 0)
4725 return -1;
4726
4727 const MCOperand &Op = Inst.getOperand(OpIdx);
4728 if (!Op.isReg())
4729 return -1;
4730
4731 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4732 auto Reg = Sub ? Sub : Op.getReg();
4733 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4734 return AGPR32.contains(Reg) ? 1 : 0;
4735}
4736
4737bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4738 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4739 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4741 SIInstrFlags::DS)) == 0)
4742 return true;
4743
4744 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4745 : AMDGPU::OpName::vdata;
4746
4747 const MCRegisterInfo *MRI = getMRI();
4748 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4749 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4750
4751 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4752 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4753 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4754 return false;
4755 }
4756
4757 auto FB = getFeatureBits();
4758 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4759 if (DataAreg < 0 || DstAreg < 0)
4760 return true;
4761 return DstAreg == DataAreg;
4762 }
4763
4764 return DstAreg < 1 && DataAreg < 1;
4765}
4766
4767bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4768 auto FB = getFeatureBits();
4769 if (!FB[AMDGPU::FeatureGFX90AInsts])
4770 return true;
4771
4772 const MCRegisterInfo *MRI = getMRI();
4773 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4774 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4775 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4776 const MCOperand &Op = Inst.getOperand(I);
4777 if (!Op.isReg())
4778 continue;
4779
4780 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4781 if (!Sub)
4782 continue;
4783
4784 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4785 return false;
4786 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4787 return false;
4788 }
4789
4790 return true;
4791}
4792
4793SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4794 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4795 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4796 if (Op.isBLGP())
4797 return Op.getStartLoc();
4798 }
4799 return SMLoc();
4800}
4801
4802bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4803 const OperandVector &Operands) {
4804 unsigned Opc = Inst.getOpcode();
4805 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4806 if (BlgpIdx == -1)
4807 return true;
4808 SMLoc BLGPLoc = getBLGPLoc(Operands);
4809 if (!BLGPLoc.isValid())
4810 return true;
4811 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4812 auto FB = getFeatureBits();
4813 bool UsesNeg = false;
4814 if (FB[AMDGPU::FeatureGFX940Insts]) {
4815 switch (Opc) {
4816 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4817 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4818 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4819 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4820 UsesNeg = true;
4821 }
4822 }
4823
4824 if (IsNeg == UsesNeg)
4825 return true;
4826
4827 Error(BLGPLoc,
4828 UsesNeg ? "invalid modifier: blgp is not supported"
4829 : "invalid modifier: neg is not supported");
4830
4831 return false;
4832}
4833
4834bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4835 const OperandVector &Operands) {
4836 if (!isGFX11Plus())
4837 return true;
4838
4839 unsigned Opc = Inst.getOpcode();
4840 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4841 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4842 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4843 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4844 return true;
4845
4846 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4847 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4848 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4849 if (Reg == AMDGPU::SGPR_NULL)
4850 return true;
4851
4852 SMLoc RegLoc = getRegLoc(Reg, Operands);
4853 Error(RegLoc, "src0 must be null");
4854 return false;
4855}
4856
4857bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4858 const OperandVector &Operands) {
4859 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4860 if ((TSFlags & SIInstrFlags::DS) == 0)
4861 return true;
4862 if (TSFlags & SIInstrFlags::GWS)
4863 return validateGWS(Inst, Operands);
4864 // Only validate GDS for non-GWS instructions.
4865 if (hasGDS())
4866 return true;
4867 int GDSIdx =
4868 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4869 if (GDSIdx < 0)
4870 return true;
4871 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4872 if (GDS) {
4873 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4874 Error(S, "gds modifier is not supported on this GPU");
4875 return false;
4876 }
4877 return true;
4878}
4879
4880// gfx90a has an undocumented limitation:
4881// DS_GWS opcodes must use even aligned registers.
4882bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4883 const OperandVector &Operands) {
4884 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4885 return true;
4886
4887 int Opc = Inst.getOpcode();
4888 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4889 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4890 return true;
4891
4892 const MCRegisterInfo *MRI = getMRI();
4893 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4894 int Data0Pos =
4895 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4896 assert(Data0Pos != -1);
4897 auto Reg = Inst.getOperand(Data0Pos).getReg();
4898 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4899 if (RegIdx & 1) {
4900 SMLoc RegLoc = getRegLoc(Reg, Operands);
4901 Error(RegLoc, "vgpr must be even aligned");
4902 return false;
4903 }
4904
4905 return true;
4906}
4907
4908bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4909 const OperandVector &Operands,
4910 const SMLoc &IDLoc) {
4911 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4912 AMDGPU::OpName::cpol);
4913 if (CPolPos == -1)
4914 return true;
4915
4916 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4917
4918 if (isGFX12Plus())
4919 return validateTHAndScopeBits(Inst, Operands, CPol);
4920
4921 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4922 if (TSFlags & SIInstrFlags::SMRD) {
4923 if (CPol && (isSI() || isCI())) {
4924 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4925 Error(S, "cache policy is not supported for SMRD instructions");
4926 return false;
4927 }
4928 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4929 Error(IDLoc, "invalid cache policy for SMEM instruction");
4930 return false;
4931 }
4932 }
4933
4934 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4935 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4938 if (!(TSFlags & AllowSCCModifier)) {
4939 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4940 StringRef CStr(S.getPointer());
4941 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4942 Error(S,
4943 "scc modifier is not supported for this instruction on this GPU");
4944 return false;
4945 }
4946 }
4947
4949 return true;
4950
4951 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4952 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4953 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4954 : "instruction must use glc");
4955 return false;
4956 }
4957 } else {
4958 if (CPol & CPol::GLC) {
4959 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4960 StringRef CStr(S.getPointer());
4962 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4963 Error(S, isGFX940() ? "instruction must not use sc0"
4964 : "instruction must not use glc");
4965 return false;
4966 }
4967 }
4968
4969 return true;
4970}
4971
4972bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4973 const OperandVector &Operands,
4974 const unsigned CPol) {
4975 const unsigned TH = CPol & AMDGPU::CPol::TH;
4976 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4977
4978 const unsigned Opcode = Inst.getOpcode();
4979 const MCInstrDesc &TID = MII.get(Opcode);
4980
4981 auto PrintError = [&](StringRef Msg) {
4982 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4983 Error(S, Msg);
4984 return false;
4985 };
4986
4987 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4990 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4991
4992 if (TH == 0)
4993 return true;
4994
4995 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4996 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4997 (TH == AMDGPU::CPol::TH_NT_HT)))
4998 return PrintError("invalid th value for SMEM instruction");
4999
5000 if (TH == AMDGPU::CPol::TH_BYPASS) {
5001 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5003 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5005 return PrintError("scope and th combination is not valid");
5006 }
5007
5008 bool IsStore = TID.mayStore();
5009 bool IsAtomic =
5011
5012 if (IsAtomic) {
5013 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5014 return PrintError("invalid th value for atomic instructions");
5015 } else if (IsStore) {
5016 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5017 return PrintError("invalid th value for store instructions");
5018 } else {
5019 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5020 return PrintError("invalid th value for load instructions");
5021 }
5022
5023 return true;
5024}
5025
5026bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5027 if (!isGFX11Plus())
5028 return true;
5029 for (auto &Operand : Operands) {
5030 if (!Operand->isReg())
5031 continue;
5032 unsigned Reg = Operand->getReg();
5033 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5034 Error(getRegLoc(Reg, Operands),
5035 "execz and vccz are not supported on this GPU");
5036 return false;
5037 }
5038 }
5039 return true;
5040}
5041
5042bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5043 const OperandVector &Operands) {
5044 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5045 if (Desc.mayStore() &&
5047 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5048 if (Loc != getInstLoc(Operands)) {
5049 Error(Loc, "TFE modifier has no meaning for store instructions");
5050 return false;
5051 }
5052 }
5053
5054 return true;
5055}
5056
5057bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5058 const SMLoc &IDLoc,
5059 const OperandVector &Operands) {
5060 if (auto ErrMsg = validateLdsDirect(Inst)) {
5061 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5062 return false;
5063 }
5064 if (!validateSOPLiteral(Inst)) {
5065 Error(getLitLoc(Operands),
5066 "only one unique literal operand is allowed");
5067 return false;
5068 }
5069 if (!validateVOPLiteral(Inst, Operands)) {
5070 return false;
5071 }
5072 if (!validateConstantBusLimitations(Inst, Operands)) {
5073 return false;
5074 }
5075 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5076 return false;
5077 }
5078 if (!validateIntClampSupported(Inst)) {
5079 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
5080 "integer clamping is not supported on this GPU");
5081 return false;
5082 }
5083 if (!validateOpSel(Inst)) {
5084 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5085 "invalid op_sel operand");
5086 return false;
5087 }
5088 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5089 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5090 "invalid neg_lo operand");
5091 return false;
5092 }
5093 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5094 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5095 "invalid neg_hi operand");
5096 return false;
5097 }
5098 if (!validateDPP(Inst, Operands)) {
5099 return false;
5100 }
5101 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5102 if (!validateMIMGD16(Inst)) {
5103 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5104 "d16 modifier is not supported on this GPU");
5105 return false;
5106 }
5107 if (!validateMIMGMSAA(Inst)) {
5108 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5109 "invalid dim; must be MSAA type");
5110 return false;
5111 }
5112 if (!validateMIMGDataSize(Inst, IDLoc)) {
5113 return false;
5114 }
5115 if (!validateMIMGAddrSize(Inst, IDLoc))
5116 return false;
5117 if (!validateMIMGAtomicDMask(Inst)) {
5118 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5119 "invalid atomic image dmask");
5120 return false;
5121 }
5122 if (!validateMIMGGatherDMask(Inst)) {
5123 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5124 "invalid image_gather dmask: only one bit must be set");
5125 return false;
5126 }
5127 if (!validateMovrels(Inst, Operands)) {
5128 return false;
5129 }
5130 if (!validateOffset(Inst, Operands)) {
5131 return false;
5132 }
5133 if (!validateMAIAccWrite(Inst, Operands)) {
5134 return false;
5135 }
5136 if (!validateMAISrc2(Inst, Operands)) {
5137 return false;
5138 }
5139 if (!validateMFMA(Inst, Operands)) {
5140 return false;
5141 }
5142 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5143 return false;
5144 }
5145
5146 if (!validateAGPRLdSt(Inst)) {
5147 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5148 ? "invalid register class: data and dst should be all VGPR or AGPR"
5149 : "invalid register class: agpr loads and stores not supported on this GPU"
5150 );
5151 return false;
5152 }
5153 if (!validateVGPRAlign(Inst)) {
5154 Error(IDLoc,
5155 "invalid register class: vgpr tuples must be 64 bit aligned");
5156 return false;
5157 }
5158 if (!validateDS(Inst, Operands)) {
5159 return false;
5160 }
5161
5162 if (!validateBLGP(Inst, Operands)) {
5163 return false;
5164 }
5165
5166 if (!validateDivScale(Inst)) {
5167 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5168 return false;
5169 }
5170 if (!validateWaitCnt(Inst, Operands)) {
5171 return false;
5172 }
5173 if (!validateExeczVcczOperands(Operands)) {
5174 return false;
5175 }
5176 if (!validateTFE(Inst, Operands)) {
5177 return false;
5178 }
5179
5180 return true;
5181}
5182
5184 const FeatureBitset &FBS,
5185 unsigned VariantID = 0);
5186
5187static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5188 const FeatureBitset &AvailableFeatures,
5189 unsigned VariantID);
5190
5191bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5192 const FeatureBitset &FBS) {
5193 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5194}
5195
5196bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5197 const FeatureBitset &FBS,
5198 ArrayRef<unsigned> Variants) {
5199 for (auto Variant : Variants) {
5200 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5201 return true;
5202 }
5203
5204 return false;
5205}
5206
5207bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5208 const SMLoc &IDLoc) {
5209 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5210
5211 // Check if requested instruction variant is supported.
5212 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5213