LLVM 20.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
15#include "SIDefines.h"
16#include "SIInstrInfo.h"
17#include "SIRegisterInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
37#include "llvm/MC/MCSymbol.h"
44#include <optional>
45
46using namespace llvm;
47using namespace llvm::AMDGPU;
48using namespace llvm::amdhsa;
49
50namespace {
51
52class AMDGPUAsmParser;
53
54enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
55
56//===----------------------------------------------------------------------===//
57// Operand
58//===----------------------------------------------------------------------===//
59
60class AMDGPUOperand : public MCParsedAsmOperand {
61 enum KindTy {
62 Token,
63 Immediate,
66 } Kind;
67
68 SMLoc StartLoc, EndLoc;
69 const AMDGPUAsmParser *AsmParser;
70
71public:
72 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
73 : Kind(Kind_), AsmParser(AsmParser_) {}
74
75 using Ptr = std::unique_ptr<AMDGPUOperand>;
76
77 struct Modifiers {
78 bool Abs = false;
79 bool Neg = false;
80 bool Sext = false;
81 bool Lit = false;
82
83 bool hasFPModifiers() const { return Abs || Neg; }
84 bool hasIntModifiers() const { return Sext; }
85 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
86
87 int64_t getFPModifiersOperand() const {
88 int64_t Operand = 0;
89 Operand |= Abs ? SISrcMods::ABS : 0u;
90 Operand |= Neg ? SISrcMods::NEG : 0u;
91 return Operand;
92 }
93
94 int64_t getIntModifiersOperand() const {
95 int64_t Operand = 0;
96 Operand |= Sext ? SISrcMods::SEXT : 0u;
97 return Operand;
98 }
99
100 int64_t getModifiersOperand() const {
101 assert(!(hasFPModifiers() && hasIntModifiers())
102 && "fp and int modifiers should not be used simultaneously");
103 if (hasFPModifiers())
104 return getFPModifiersOperand();
105 if (hasIntModifiers())
106 return getIntModifiersOperand();
107 return 0;
108 }
109
110 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
111 };
112
113 enum ImmTy {
114 ImmTyNone,
115 ImmTyGDS,
116 ImmTyLDS,
117 ImmTyOffen,
118 ImmTyIdxen,
119 ImmTyAddr64,
120 ImmTyOffset,
121 ImmTyInstOffset,
122 ImmTyOffset0,
123 ImmTyOffset1,
124 ImmTySMEMOffsetMod,
125 ImmTyCPol,
126 ImmTyTFE,
127 ImmTyD16,
128 ImmTyClamp,
129 ImmTyOModSI,
130 ImmTySDWADstSel,
131 ImmTySDWASrc0Sel,
132 ImmTySDWASrc1Sel,
133 ImmTySDWADstUnused,
134 ImmTyDMask,
135 ImmTyDim,
136 ImmTyUNorm,
137 ImmTyDA,
138 ImmTyR128A16,
139 ImmTyA16,
140 ImmTyLWE,
141 ImmTyExpTgt,
142 ImmTyExpCompr,
143 ImmTyExpVM,
144 ImmTyFORMAT,
145 ImmTyHwreg,
146 ImmTyOff,
147 ImmTySendMsg,
148 ImmTyInterpSlot,
149 ImmTyInterpAttr,
150 ImmTyInterpAttrChan,
151 ImmTyOpSel,
152 ImmTyOpSelHi,
153 ImmTyNegLo,
154 ImmTyNegHi,
155 ImmTyIndexKey8bit,
156 ImmTyIndexKey16bit,
157 ImmTyDPP8,
158 ImmTyDppCtrl,
159 ImmTyDppRowMask,
160 ImmTyDppBankMask,
161 ImmTyDppBoundCtrl,
162 ImmTyDppFI,
163 ImmTySwizzle,
164 ImmTyGprIdxMode,
165 ImmTyHigh,
166 ImmTyBLGP,
167 ImmTyCBSZ,
168 ImmTyABID,
169 ImmTyEndpgm,
170 ImmTyWaitVDST,
171 ImmTyWaitEXP,
172 ImmTyWaitVAVDst,
173 ImmTyWaitVMVSrc,
174 ImmTyByteSel,
175 };
176
177 // Immediate operand kind.
178 // It helps to identify the location of an offending operand after an error.
179 // Note that regular literals and mandatory literals (KImm) must be handled
180 // differently. When looking for an offending operand, we should usually
181 // ignore mandatory literals because they are part of the instruction and
182 // cannot be changed. Report location of mandatory operands only for VOPD,
183 // when both OpX and OpY have a KImm and there are no other literals.
184 enum ImmKindTy {
185 ImmKindTyNone,
186 ImmKindTyLiteral,
187 ImmKindTyMandatoryLiteral,
188 ImmKindTyConst,
189 };
190
191private:
192 struct TokOp {
193 const char *Data;
194 unsigned Length;
195 };
196
197 struct ImmOp {
198 int64_t Val;
199 ImmTy Type;
200 bool IsFPImm;
201 mutable ImmKindTy Kind;
202 Modifiers Mods;
203 };
204
205 struct RegOp {
206 unsigned RegNo;
207 Modifiers Mods;
208 };
209
210 union {
211 TokOp Tok;
212 ImmOp Imm;
213 RegOp Reg;
214 const MCExpr *Expr;
215 };
216
217public:
218 bool isToken() const override { return Kind == Token; }
219
220 bool isSymbolRefExpr() const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
222 }
223
224 bool isImm() const override {
225 return Kind == Immediate;
226 }
227
228 void setImmKindNone() const {
229 assert(isImm());
230 Imm.Kind = ImmKindTyNone;
231 }
232
233 void setImmKindLiteral() const {
234 assert(isImm());
235 Imm.Kind = ImmKindTyLiteral;
236 }
237
238 void setImmKindMandatoryLiteral() const {
239 assert(isImm());
240 Imm.Kind = ImmKindTyMandatoryLiteral;
241 }
242
243 void setImmKindConst() const {
244 assert(isImm());
245 Imm.Kind = ImmKindTyConst;
246 }
247
248 bool IsImmKindLiteral() const {
249 return isImm() && Imm.Kind == ImmKindTyLiteral;
250 }
251
252 bool IsImmKindMandatoryLiteral() const {
253 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
254 }
255
256 bool isImmKindConst() const {
257 return isImm() && Imm.Kind == ImmKindTyConst;
258 }
259
260 bool isInlinableImm(MVT type) const;
261 bool isLiteralImm(MVT type) const;
262
263 bool isRegKind() const {
264 return Kind == Register;
265 }
266
267 bool isReg() const override {
268 return isRegKind() && !hasModifiers();
269 }
270
271 bool isRegOrInline(unsigned RCID, MVT type) const {
272 return isRegClass(RCID) || isInlinableImm(type);
273 }
274
275 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
277 }
278
279 bool isRegOrImmWithInt16InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281 }
282
283 bool isRegOrImmWithIntT16InputMods() const {
284 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
285 }
286
287 bool isRegOrImmWithInt32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
289 }
290
291 bool isRegOrInlineImmWithInt16InputMods() const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
293 }
294
295 bool isRegOrInlineImmWithInt32InputMods() const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
297 }
298
299 bool isRegOrImmWithInt64InputMods() const {
300 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
301 }
302
303 bool isRegOrImmWithFP16InputMods() const {
304 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
305 }
306
307 bool isRegOrImmWithFPT16InputMods() const {
308 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
309 }
310
311 bool isRegOrImmWithFP32InputMods() const {
312 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
313 }
314
315 bool isRegOrImmWithFP64InputMods() const {
316 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
317 }
318
319 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
322 }
323
324 bool isRegOrInlineImmWithFP32InputMods() const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
326 }
327
328 bool isPackedFP16InputMods() const {
329 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
330 }
331
332 bool isVReg() const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
342 }
343
344 bool isVReg32() const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
346 }
347
348 bool isVReg32OrOff() const {
349 return isOff() || isVReg32();
350 }
351
352 bool isNull() const {
353 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
354 }
355
356 bool isVRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDim() const { return isImmTy(ImmTyDim); }
379 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
380 bool isOff() const { return isImmTy(ImmTyOff); }
381 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
382 bool isOffen() const { return isImmTy(ImmTyOffen); }
383 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
384 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
385 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
386 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
387 bool isGDS() const { return isImmTy(ImmTyGDS); }
388 bool isLDS() const { return isImmTy(ImmTyLDS); }
389 bool isCPol() const { return isImmTy(ImmTyCPol); }
390 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
391 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
392 bool isTFE() const { return isImmTy(ImmTyTFE); }
393 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
394 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
395 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
396 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
397 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
398 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
399 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
400 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
401 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
402 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
403 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
404 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
405 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
406
407 bool isRegOrImm() const {
408 return isReg() || isImm();
409 }
410
411 bool isRegClass(unsigned RCID) const;
412
413 bool isInlineValue() const;
414
415 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
416 return isRegOrInline(RCID, type) && !hasModifiers();
417 }
418
419 bool isSCSrcB16() const {
420 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
421 }
422
423 bool isSCSrcV2B16() const {
424 return isSCSrcB16();
425 }
426
427 bool isSCSrc_b32() const {
428 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
429 }
430
431 bool isSCSrc_b64() const {
432 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
433 }
434
435 bool isBoolReg() const;
436
437 bool isSCSrcF16() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
439 }
440
441 bool isSCSrcV2F16() const {
442 return isSCSrcF16();
443 }
444
445 bool isSCSrcF32() const {
446 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
447 }
448
449 bool isSCSrcF64() const {
450 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
451 }
452
453 bool isSSrc_b32() const {
454 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
455 }
456
457 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
458
459 bool isSSrcV2B16() const {
460 llvm_unreachable("cannot happen");
461 return isSSrc_b16();
462 }
463
464 bool isSSrc_b64() const {
465 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
466 // See isVSrc64().
467 return isSCSrc_b64() || isLiteralImm(MVT::i64);
468 }
469
470 bool isSSrc_f32() const {
471 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
472 }
473
474 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
475
476 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
477
478 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
479
480 bool isSSrcV2F16() const {
481 llvm_unreachable("cannot happen");
482 return isSSrc_f16();
483 }
484
485 bool isSSrcV2FP32() const {
486 llvm_unreachable("cannot happen");
487 return isSSrc_f32();
488 }
489
490 bool isSCSrcV2FP32() const {
491 llvm_unreachable("cannot happen");
492 return isSCSrcF32();
493 }
494
495 bool isSSrcV2INT32() const {
496 llvm_unreachable("cannot happen");
497 return isSSrc_b32();
498 }
499
500 bool isSCSrcV2INT32() const {
501 llvm_unreachable("cannot happen");
502 return isSCSrc_b32();
503 }
504
505 bool isSSrcOrLds_b32() const {
506 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
507 isLiteralImm(MVT::i32) || isExpr();
508 }
509
510 bool isVCSrc_b32() const {
511 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
512 }
513
514 bool isVCSrcB64() const {
515 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
516 }
517
518 bool isVCSrcTB16() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
520 }
521
522 bool isVCSrcTB16_Lo128() const {
523 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
524 }
525
526 bool isVCSrcFake16B16_Lo128() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
528 }
529
530 bool isVCSrc_b16() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
532 }
533
534 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
535
536 bool isVCSrc_f32() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
538 }
539
540 bool isVCSrcF64() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
542 }
543
544 bool isVCSrcTBF16() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
546 }
547
548 bool isVCSrcTF16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
550 }
551
552 bool isVCSrcTBF16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
554 }
555
556 bool isVCSrcTF16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
558 }
559
560 bool isVCSrcFake16BF16_Lo128() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
562 }
563
564 bool isVCSrcFake16F16_Lo128() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
566 }
567
568 bool isVCSrc_bf16() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
570 }
571
572 bool isVCSrc_f16() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
574 }
575
576 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
577
578 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
579
580 bool isVSrc_b32() const {
581 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
582 }
583
584 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
585
586 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
587
588 bool isVSrcT_b16_Lo128() const {
589 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
590 }
591
592 bool isVSrcFake16_b16_Lo128() const {
593 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
594 }
595
596 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
597
598 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
599
600 bool isVCSrcV2FP32() const {
601 return isVCSrcF64();
602 }
603
604 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
605
606 bool isVCSrcV2INT32() const {
607 return isVCSrcB64();
608 }
609
610 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
611
612 bool isVSrc_f32() const {
613 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
614 }
615
616 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
617
618 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
619
620 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
621
622 bool isVSrcT_bf16_Lo128() const {
623 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
624 }
625
626 bool isVSrcT_f16_Lo128() const {
627 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
628 }
629
630 bool isVSrcFake16_bf16_Lo128() const {
631 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
632 }
633
634 bool isVSrcFake16_f16_Lo128() const {
635 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
636 }
637
638 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
639
640 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
641
642 bool isVSrc_v2bf16() const {
643 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
644 }
645
646 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
647
648 bool isVISrcB32() const {
649 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
650 }
651
652 bool isVISrcB16() const {
653 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
654 }
655
656 bool isVISrcV2B16() const {
657 return isVISrcB16();
658 }
659
660 bool isVISrcF32() const {
661 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
662 }
663
664 bool isVISrcF16() const {
665 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
666 }
667
668 bool isVISrcV2F16() const {
669 return isVISrcF16() || isVISrcB32();
670 }
671
672 bool isVISrc_64_bf16() const {
673 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
674 }
675
676 bool isVISrc_64_f16() const {
677 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
678 }
679
680 bool isVISrc_64_b32() const {
681 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
682 }
683
684 bool isVISrc_64B64() const {
685 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
686 }
687
688 bool isVISrc_64_f64() const {
689 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
690 }
691
692 bool isVISrc_64V2FP32() const {
693 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
694 }
695
696 bool isVISrc_64V2INT32() const {
697 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
698 }
699
700 bool isVISrc_256_b32() const {
701 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
702 }
703
704 bool isVISrc_256_f32() const {
705 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
706 }
707
708 bool isVISrc_256B64() const {
709 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
710 }
711
712 bool isVISrc_256_f64() const {
713 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
714 }
715
716 bool isVISrc_128B16() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
718 }
719
720 bool isVISrc_128V2B16() const {
721 return isVISrc_128B16();
722 }
723
724 bool isVISrc_128_b32() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
726 }
727
728 bool isVISrc_128_f32() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
730 }
731
732 bool isVISrc_256V2FP32() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
734 }
735
736 bool isVISrc_256V2INT32() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
738 }
739
740 bool isVISrc_512_b32() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
742 }
743
744 bool isVISrc_512B16() const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
746 }
747
748 bool isVISrc_512V2B16() const {
749 return isVISrc_512B16();
750 }
751
752 bool isVISrc_512_f32() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
754 }
755
756 bool isVISrc_512F16() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
758 }
759
760 bool isVISrc_512V2F16() const {
761 return isVISrc_512F16() || isVISrc_512_b32();
762 }
763
764 bool isVISrc_1024_b32() const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
766 }
767
768 bool isVISrc_1024B16() const {
769 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
770 }
771
772 bool isVISrc_1024V2B16() const {
773 return isVISrc_1024B16();
774 }
775
776 bool isVISrc_1024_f32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
778 }
779
780 bool isVISrc_1024F16() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
782 }
783
784 bool isVISrc_1024V2F16() const {
785 return isVISrc_1024F16() || isVISrc_1024_b32();
786 }
787
788 bool isAISrcB32() const {
789 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
790 }
791
792 bool isAISrcB16() const {
793 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
794 }
795
796 bool isAISrcV2B16() const {
797 return isAISrcB16();
798 }
799
800 bool isAISrcF32() const {
801 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
802 }
803
804 bool isAISrcF16() const {
805 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
806 }
807
808 bool isAISrcV2F16() const {
809 return isAISrcF16() || isAISrcB32();
810 }
811
812 bool isAISrc_64B64() const {
813 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
814 }
815
816 bool isAISrc_64_f64() const {
817 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
818 }
819
820 bool isAISrc_128_b32() const {
821 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
822 }
823
824 bool isAISrc_128B16() const {
825 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
826 }
827
828 bool isAISrc_128V2B16() const {
829 return isAISrc_128B16();
830 }
831
832 bool isAISrc_128_f32() const {
833 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
834 }
835
836 bool isAISrc_128F16() const {
837 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
838 }
839
840 bool isAISrc_128V2F16() const {
841 return isAISrc_128F16() || isAISrc_128_b32();
842 }
843
844 bool isVISrc_128_bf16() const {
845 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
846 }
847
848 bool isVISrc_128_f16() const {
849 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
850 }
851
852 bool isVISrc_128V2F16() const {
853 return isVISrc_128_f16() || isVISrc_128_b32();
854 }
855
856 bool isAISrc_256B64() const {
857 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
858 }
859
860 bool isAISrc_256_f64() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
862 }
863
864 bool isAISrc_512_b32() const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
866 }
867
868 bool isAISrc_512B16() const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
870 }
871
872 bool isAISrc_512V2B16() const {
873 return isAISrc_512B16();
874 }
875
876 bool isAISrc_512_f32() const {
877 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
878 }
879
880 bool isAISrc_512F16() const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
882 }
883
884 bool isAISrc_512V2F16() const {
885 return isAISrc_512F16() || isAISrc_512_b32();
886 }
887
888 bool isAISrc_1024_b32() const {
889 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
890 }
891
892 bool isAISrc_1024B16() const {
893 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
894 }
895
896 bool isAISrc_1024V2B16() const {
897 return isAISrc_1024B16();
898 }
899
900 bool isAISrc_1024_f32() const {
901 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
902 }
903
904 bool isAISrc_1024F16() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
906 }
907
908 bool isAISrc_1024V2F16() const {
909 return isAISrc_1024F16() || isAISrc_1024_b32();
910 }
911
912 bool isKImmFP32() const {
913 return isLiteralImm(MVT::f32);
914 }
915
916 bool isKImmFP16() const {
917 return isLiteralImm(MVT::f16);
918 }
919
920 bool isMem() const override {
921 return false;
922 }
923
924 bool isExpr() const {
925 return Kind == Expression;
926 }
927
928 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
929
930 bool isSWaitCnt() const;
931 bool isDepCtr() const;
932 bool isSDelayALU() const;
933 bool isHwreg() const;
934 bool isSendMsg() const;
935 bool isSplitBarrier() const;
936 bool isSwizzle() const;
937 bool isSMRDOffset8() const;
938 bool isSMEMOffset() const;
939 bool isSMRDLiteralOffset() const;
940 bool isDPP8() const;
941 bool isDPPCtrl() const;
942 bool isBLGP() const;
943 bool isGPRIdxMode() const;
944 bool isS16Imm() const;
945 bool isU16Imm() const;
946 bool isEndpgm() const;
947
948 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
949 return [=](){ return P(*this); };
950 }
951
952 StringRef getToken() const {
953 assert(isToken());
954 return StringRef(Tok.Data, Tok.Length);
955 }
956
957 int64_t getImm() const {
958 assert(isImm());
959 return Imm.Val;
960 }
961
962 void setImm(int64_t Val) {
963 assert(isImm());
964 Imm.Val = Val;
965 }
966
967 ImmTy getImmTy() const {
968 assert(isImm());
969 return Imm.Type;
970 }
971
972 MCRegister getReg() const override {
973 assert(isRegKind());
974 return Reg.RegNo;
975 }
976
977 SMLoc getStartLoc() const override {
978 return StartLoc;
979 }
980
981 SMLoc getEndLoc() const override {
982 return EndLoc;
983 }
984
985 SMRange getLocRange() const {
986 return SMRange(StartLoc, EndLoc);
987 }
988
989 Modifiers getModifiers() const {
990 assert(isRegKind() || isImmTy(ImmTyNone));
991 return isRegKind() ? Reg.Mods : Imm.Mods;
992 }
993
994 void setModifiers(Modifiers Mods) {
995 assert(isRegKind() || isImmTy(ImmTyNone));
996 if (isRegKind())
997 Reg.Mods = Mods;
998 else
999 Imm.Mods = Mods;
1000 }
1001
1002 bool hasModifiers() const {
1003 return getModifiers().hasModifiers();
1004 }
1005
1006 bool hasFPModifiers() const {
1007 return getModifiers().hasFPModifiers();
1008 }
1009
1010 bool hasIntModifiers() const {
1011 return getModifiers().hasIntModifiers();
1012 }
1013
1014 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1015
1016 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1017
1018 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1019
1020 void addRegOperands(MCInst &Inst, unsigned N) const;
1021
1022 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1023 if (isRegKind())
1024 addRegOperands(Inst, N);
1025 else
1026 addImmOperands(Inst, N);
1027 }
1028
1029 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1030 Modifiers Mods = getModifiers();
1031 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1032 if (isRegKind()) {
1033 addRegOperands(Inst, N);
1034 } else {
1035 addImmOperands(Inst, N, false);
1036 }
1037 }
1038
1039 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1040 assert(!hasIntModifiers());
1041 addRegOrImmWithInputModsOperands(Inst, N);
1042 }
1043
1044 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1045 assert(!hasFPModifiers());
1046 addRegOrImmWithInputModsOperands(Inst, N);
1047 }
1048
1049 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1050 Modifiers Mods = getModifiers();
1051 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1052 assert(isRegKind());
1053 addRegOperands(Inst, N);
1054 }
1055
1056 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1057 assert(!hasIntModifiers());
1058 addRegWithInputModsOperands(Inst, N);
1059 }
1060
1061 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1062 assert(!hasFPModifiers());
1063 addRegWithInputModsOperands(Inst, N);
1064 }
1065
1066 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1067 // clang-format off
1068 switch (Type) {
1069 case ImmTyNone: OS << "None"; break;
1070 case ImmTyGDS: OS << "GDS"; break;
1071 case ImmTyLDS: OS << "LDS"; break;
1072 case ImmTyOffen: OS << "Offen"; break;
1073 case ImmTyIdxen: OS << "Idxen"; break;
1074 case ImmTyAddr64: OS << "Addr64"; break;
1075 case ImmTyOffset: OS << "Offset"; break;
1076 case ImmTyInstOffset: OS << "InstOffset"; break;
1077 case ImmTyOffset0: OS << "Offset0"; break;
1078 case ImmTyOffset1: OS << "Offset1"; break;
1079 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1080 case ImmTyCPol: OS << "CPol"; break;
1081 case ImmTyIndexKey8bit: OS << "index_key"; break;
1082 case ImmTyIndexKey16bit: OS << "index_key"; break;
1083 case ImmTyTFE: OS << "TFE"; break;
1084 case ImmTyD16: OS << "D16"; break;
1085 case ImmTyFORMAT: OS << "FORMAT"; break;
1086 case ImmTyClamp: OS << "Clamp"; break;
1087 case ImmTyOModSI: OS << "OModSI"; break;
1088 case ImmTyDPP8: OS << "DPP8"; break;
1089 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1090 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1091 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1092 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1093 case ImmTyDppFI: OS << "DppFI"; break;
1094 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1095 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1096 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1097 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1098 case ImmTyDMask: OS << "DMask"; break;
1099 case ImmTyDim: OS << "Dim"; break;
1100 case ImmTyUNorm: OS << "UNorm"; break;
1101 case ImmTyDA: OS << "DA"; break;
1102 case ImmTyR128A16: OS << "R128A16"; break;
1103 case ImmTyA16: OS << "A16"; break;
1104 case ImmTyLWE: OS << "LWE"; break;
1105 case ImmTyOff: OS << "Off"; break;
1106 case ImmTyExpTgt: OS << "ExpTgt"; break;
1107 case ImmTyExpCompr: OS << "ExpCompr"; break;
1108 case ImmTyExpVM: OS << "ExpVM"; break;
1109 case ImmTyHwreg: OS << "Hwreg"; break;
1110 case ImmTySendMsg: OS << "SendMsg"; break;
1111 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1112 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1113 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1114 case ImmTyOpSel: OS << "OpSel"; break;
1115 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1116 case ImmTyNegLo: OS << "NegLo"; break;
1117 case ImmTyNegHi: OS << "NegHi"; break;
1118 case ImmTySwizzle: OS << "Swizzle"; break;
1119 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1120 case ImmTyHigh: OS << "High"; break;
1121 case ImmTyBLGP: OS << "BLGP"; break;
1122 case ImmTyCBSZ: OS << "CBSZ"; break;
1123 case ImmTyABID: OS << "ABID"; break;
1124 case ImmTyEndpgm: OS << "Endpgm"; break;
1125 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1126 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1127 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1128 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1129 case ImmTyByteSel: OS << "ByteSel" ; break;
1130 }
1131 // clang-format on
1132 }
1133
1134 void print(raw_ostream &OS) const override {
1135 switch (Kind) {
1136 case Register:
1137 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1138 << " mods: " << Reg.Mods << '>';
1139 break;
1140 case Immediate:
1141 OS << '<' << getImm();
1142 if (getImmTy() != ImmTyNone) {
1143 OS << " type: "; printImmTy(OS, getImmTy());
1144 }
1145 OS << " mods: " << Imm.Mods << '>';
1146 break;
1147 case Token:
1148 OS << '\'' << getToken() << '\'';
1149 break;
1150 case Expression:
1151 OS << "<expr " << *Expr << '>';
1152 break;
1153 }
1154 }
1155
1156 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1157 int64_t Val, SMLoc Loc,
1158 ImmTy Type = ImmTyNone,
1159 bool IsFPImm = false) {
1160 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1161 Op->Imm.Val = Val;
1162 Op->Imm.IsFPImm = IsFPImm;
1163 Op->Imm.Kind = ImmKindTyNone;
1164 Op->Imm.Type = Type;
1165 Op->Imm.Mods = Modifiers();
1166 Op->StartLoc = Loc;
1167 Op->EndLoc = Loc;
1168 return Op;
1169 }
1170
1171 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1172 StringRef Str, SMLoc Loc,
1173 bool HasExplicitEncodingSize = true) {
1174 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1175 Res->Tok.Data = Str.data();
1176 Res->Tok.Length = Str.size();
1177 Res->StartLoc = Loc;
1178 Res->EndLoc = Loc;
1179 return Res;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1183 unsigned RegNo, SMLoc S,
1184 SMLoc E) {
1185 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1186 Op->Reg.RegNo = RegNo;
1187 Op->Reg.Mods = Modifiers();
1188 Op->StartLoc = S;
1189 Op->EndLoc = E;
1190 return Op;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1194 const class MCExpr *Expr, SMLoc S) {
1195 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1196 Op->Expr = Expr;
1197 Op->StartLoc = S;
1198 Op->EndLoc = S;
1199 return Op;
1200 }
1201};
1202
1203raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1204 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1205 return OS;
1206}
1207
1208//===----------------------------------------------------------------------===//
1209// AsmParser
1210//===----------------------------------------------------------------------===//
1211
1212// Holds info related to the current kernel, e.g. count of SGPRs used.
1213// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1214// .amdgpu_hsa_kernel or at EOF.
1215class KernelScopeInfo {
1216 int SgprIndexUnusedMin = -1;
1217 int VgprIndexUnusedMin = -1;
1218 int AgprIndexUnusedMin = -1;
1219 MCContext *Ctx = nullptr;
1220 MCSubtargetInfo const *MSTI = nullptr;
1221
1222 void usesSgprAt(int i) {
1223 if (i >= SgprIndexUnusedMin) {
1224 SgprIndexUnusedMin = ++i;
1225 if (Ctx) {
1226 MCSymbol* const Sym =
1227 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1228 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1229 }
1230 }
1231 }
1232
1233 void usesVgprAt(int i) {
1234 if (i >= VgprIndexUnusedMin) {
1235 VgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1239 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1240 VgprIndexUnusedMin);
1241 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1242 }
1243 }
1244 }
1245
1246 void usesAgprAt(int i) {
1247 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1248 if (!hasMAIInsts(*MSTI))
1249 return;
1250
1251 if (i >= AgprIndexUnusedMin) {
1252 AgprIndexUnusedMin = ++i;
1253 if (Ctx) {
1254 MCSymbol* const Sym =
1255 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1256 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1257
1258 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1259 MCSymbol* const vSym =
1260 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1261 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1262 VgprIndexUnusedMin);
1263 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1264 }
1265 }
1266 }
1267
1268public:
1269 KernelScopeInfo() = default;
1270
1271 void initialize(MCContext &Context) {
1272 Ctx = &Context;
1273 MSTI = Ctx->getSubtargetInfo();
1274
1275 usesSgprAt(SgprIndexUnusedMin = -1);
1276 usesVgprAt(VgprIndexUnusedMin = -1);
1277 if (hasMAIInsts(*MSTI)) {
1278 usesAgprAt(AgprIndexUnusedMin = -1);
1279 }
1280 }
1281
1282 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1283 unsigned RegWidth) {
1284 switch (RegKind) {
1285 case IS_SGPR:
1286 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1287 break;
1288 case IS_AGPR:
1289 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1290 break;
1291 case IS_VGPR:
1292 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1293 break;
1294 default:
1295 break;
1296 }
1297 }
1298};
1299
1300class AMDGPUAsmParser : public MCTargetAsmParser {
1301 MCAsmParser &Parser;
1302
1303 unsigned ForcedEncodingSize = 0;
1304 bool ForcedDPP = false;
1305 bool ForcedSDWA = false;
1306 KernelScopeInfo KernelScope;
1307
1308 /// @name Auto-generated Match Functions
1309 /// {
1310
1311#define GET_ASSEMBLER_HEADER
1312#include "AMDGPUGenAsmMatcher.inc"
1313
1314 /// }
1315
1316private:
1317 void createConstantSymbol(StringRef Id, int64_t Val);
1318
1319 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1320 bool OutOfRangeError(SMRange Range);
1321 /// Calculate VGPR/SGPR blocks required for given target, reserved
1322 /// registers, and user-specified NextFreeXGPR values.
1323 ///
1324 /// \param Features [in] Target features, used for bug corrections.
1325 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1326 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1327 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1328 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1329 /// descriptor field, if valid.
1330 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1331 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1332 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1333 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1334 /// \param VGPRBlocks [out] Result VGPR block count.
1335 /// \param SGPRBlocks [out] Result SGPR block count.
1336 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1337 const MCExpr *FlatScrUsed, bool XNACKUsed,
1338 std::optional<bool> EnableWavefrontSize32,
1339 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1340 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1341 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1342 bool ParseDirectiveAMDGCNTarget();
1343 bool ParseDirectiveAMDHSACodeObjectVersion();
1344 bool ParseDirectiveAMDHSAKernel();
1345 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1346 bool ParseDirectiveAMDKernelCodeT();
1347 // TODO: Possibly make subtargetHasRegister const.
1348 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1349 bool ParseDirectiveAMDGPUHsaKernel();
1350
1351 bool ParseDirectiveISAVersion();
1352 bool ParseDirectiveHSAMetadata();
1353 bool ParseDirectivePALMetadataBegin();
1354 bool ParseDirectivePALMetadata();
1355 bool ParseDirectiveAMDGPULDS();
1356
1357 /// Common code to parse out a block of text (typically YAML) between start and
1358 /// end directives.
1359 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1360 const char *AssemblerDirectiveEnd,
1361 std::string &CollectString);
1362
1363 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1364 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1365 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1366 unsigned &RegNum, unsigned &RegWidth,
1367 bool RestoreOnFailure = false);
1368 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1369 unsigned &RegNum, unsigned &RegWidth,
1371 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1372 unsigned &RegWidth,
1374 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1375 unsigned &RegWidth,
1377 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1378 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1379 bool ParseRegRange(unsigned& Num, unsigned& Width);
1380 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1381 unsigned RegWidth, SMLoc Loc);
1382
1383 bool isRegister();
1384 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1385 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1386 void initializeGprCountSymbol(RegisterKind RegKind);
1387 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1388 unsigned RegWidth);
1389 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1390 bool IsAtomic);
1391
1392public:
1393 enum OperandMode {
1394 OperandMode_Default,
1395 OperandMode_NSA,
1396 };
1397
1398 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1399
1400 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1401 const MCInstrInfo &MII,
1402 const MCTargetOptions &Options)
1403 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1405
1406 if (getFeatureBits().none()) {
1407 // Set default features.
1408 copySTI().ToggleFeature("southern-islands");
1409 }
1410
1411 FeatureBitset FB = getFeatureBits();
1412 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1413 !FB[AMDGPU::FeatureWavefrontSize32]) {
1414 // If there is no default wave size it must be a generation before gfx10,
1415 // these have FeatureWavefrontSize64 in their definition already. For
1416 // gfx10+ set wave32 as a default.
1417 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1418 }
1419
1420 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1421
1423 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1424 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1425 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1426 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1427 } else {
1428 createConstantSymbol(".option.machine_version_major", ISA.Major);
1429 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1430 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1431 }
1432 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1433 initializeGprCountSymbol(IS_VGPR);
1434 initializeGprCountSymbol(IS_SGPR);
1435 } else
1436 KernelScope.initialize(getContext());
1437
1438 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1439 createConstantSymbol(Symbol, Code);
1440
1441 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1442 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1443 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1444 }
1445
1446 bool hasMIMG_R128() const {
1447 return AMDGPU::hasMIMG_R128(getSTI());
1448 }
1449
1450 bool hasPackedD16() const {
1451 return AMDGPU::hasPackedD16(getSTI());
1452 }
1453
1454 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1455
1456 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1457
1458 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1459
1460 bool isSI() const {
1461 return AMDGPU::isSI(getSTI());
1462 }
1463
1464 bool isCI() const {
1465 return AMDGPU::isCI(getSTI());
1466 }
1467
1468 bool isVI() const {
1469 return AMDGPU::isVI(getSTI());
1470 }
1471
1472 bool isGFX9() const {
1473 return AMDGPU::isGFX9(getSTI());
1474 }
1475
1476 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1477 bool isGFX90A() const {
1478 return AMDGPU::isGFX90A(getSTI());
1479 }
1480
1481 bool isGFX940() const {
1482 return AMDGPU::isGFX940(getSTI());
1483 }
1484
1485 bool isGFX9Plus() const {
1486 return AMDGPU::isGFX9Plus(getSTI());
1487 }
1488
1489 bool isGFX10() const {
1490 return AMDGPU::isGFX10(getSTI());
1491 }
1492
1493 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1494
1495 bool isGFX11() const {
1496 return AMDGPU::isGFX11(getSTI());
1497 }
1498
1499 bool isGFX11Plus() const {
1500 return AMDGPU::isGFX11Plus(getSTI());
1501 }
1502
1503 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1504
1505 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1506
1507 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1508
1509 bool isGFX10_BEncoding() const {
1511 }
1512
1513 bool hasInv2PiInlineImm() const {
1514 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1515 }
1516
1517 bool hasFlatOffsets() const {
1518 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1519 }
1520
1521 bool hasArchitectedFlatScratch() const {
1522 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1523 }
1524
1525 bool hasSGPR102_SGPR103() const {
1526 return !isVI() && !isGFX9();
1527 }
1528
1529 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1530
1531 bool hasIntClamp() const {
1532 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1533 }
1534
1535 bool hasPartialNSAEncoding() const {
1536 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1537 }
1538
1539 unsigned getNSAMaxSize(bool HasSampler = false) const {
1540 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1541 }
1542
1543 unsigned getMaxNumUserSGPRs() const {
1545 }
1546
1547 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1548
1549 AMDGPUTargetStreamer &getTargetStreamer() {
1551 return static_cast<AMDGPUTargetStreamer &>(TS);
1552 }
1553
1554 const MCRegisterInfo *getMRI() const {
1555 // We need this const_cast because for some reason getContext() is not const
1556 // in MCAsmParser.
1557 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1558 }
1559
1560 const MCInstrInfo *getMII() const {
1561 return &MII;
1562 }
1563
1564 const FeatureBitset &getFeatureBits() const {
1565 return getSTI().getFeatureBits();
1566 }
1567
1568 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1569 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1570 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1571
1572 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1573 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1574 bool isForcedDPP() const { return ForcedDPP; }
1575 bool isForcedSDWA() const { return ForcedSDWA; }
1576 ArrayRef<unsigned> getMatchedVariants() const;
1577 StringRef getMatchedVariantName() const;
1578
1579 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1580 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1581 bool RestoreOnFailure);
1582 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1584 SMLoc &EndLoc) override;
1585 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1587 unsigned Kind) override;
1588 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1591 bool MatchingInlineAsm) override;
1592 bool ParseDirective(AsmToken DirectiveID) override;
1593 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1594 OperandMode Mode = OperandMode_Default);
1595 StringRef parseMnemonicSuffix(StringRef Name);
1597 SMLoc NameLoc, OperandVector &Operands) override;
1598 //bool ProcessInstruction(MCInst &Inst);
1599
1601
1602 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1603
1605 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1606 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1607 std::function<bool(int64_t &)> ConvertResult = nullptr);
1608
1609 ParseStatus parseOperandArrayWithPrefix(
1610 const char *Prefix, OperandVector &Operands,
1611 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1612 bool (*ConvertResult)(int64_t &) = nullptr);
1613
1615 parseNamedBit(StringRef Name, OperandVector &Operands,
1616 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1617 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1619 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1620 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1621 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1622 SMLoc &StringLoc);
1623 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1626 int64_t &IntVal);
1627 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1630 AMDGPUOperand::ImmTy Type);
1631
1632 bool isModifier();
1633 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1634 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1635 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1636 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1637 bool parseSP3NegModifier();
1638 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1639 bool HasLit = false);
1641 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1642 bool HasLit = false);
1643 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1644 bool AllowImm = true);
1645 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1646 bool AllowImm = true);
1647 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1648 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1649 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1650 ParseStatus tryParseIndexKey(OperandVector &Operands,
1651 AMDGPUOperand::ImmTy ImmTy);
1652 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1653 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1654
1655 ParseStatus parseDfmtNfmt(int64_t &Format);
1656 ParseStatus parseUfmt(int64_t &Format);
1657 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1658 int64_t &Format);
1659 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1660 int64_t &Format);
1661 ParseStatus parseFORMAT(OperandVector &Operands);
1662 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1663 ParseStatus parseNumericFormat(int64_t &Format);
1664 ParseStatus parseFlatOffset(OperandVector &Operands);
1665 ParseStatus parseR128A16(OperandVector &Operands);
1667 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1668 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1669
1670 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1671
1672 bool parseCnt(int64_t &IntVal);
1673 ParseStatus parseSWaitCnt(OperandVector &Operands);
1674
1675 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1676 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1677 ParseStatus parseDepCtr(OperandVector &Operands);
1678
1679 bool parseDelay(int64_t &Delay);
1680 ParseStatus parseSDelayALU(OperandVector &Operands);
1681
1682 ParseStatus parseHwreg(OperandVector &Operands);
1683
1684private:
1685 struct OperandInfoTy {
1686 SMLoc Loc;
1687 int64_t Val;
1688 bool IsSymbolic = false;
1689 bool IsDefined = false;
1690
1691 OperandInfoTy(int64_t Val) : Val(Val) {}
1692 };
1693
1694 struct StructuredOpField : OperandInfoTy {
1697 unsigned Width;
1698 bool IsDefined = false;
1699
1700 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1701 int64_t Default)
1702 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1703 virtual ~StructuredOpField() = default;
1704
1705 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1706 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1707 return false;
1708 }
1709
1710 virtual bool validate(AMDGPUAsmParser &Parser) const {
1711 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1712 return Error(Parser, "not supported on this GPU");
1713 if (!isUIntN(Width, Val))
1714 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1715 return true;
1716 }
1717 };
1718
1719 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1720 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1721
1722 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1723 bool validateSendMsg(const OperandInfoTy &Msg,
1724 const OperandInfoTy &Op,
1725 const OperandInfoTy &Stream);
1726
1727 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1728 OperandInfoTy &Width);
1729
1730 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1731 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1732 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1733
1734 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1735 const OperandVector &Operands) const;
1736 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1737 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1738 SMLoc getLitLoc(const OperandVector &Operands,
1739 bool SearchMandatoryLiterals = false) const;
1740 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1741 SMLoc getConstLoc(const OperandVector &Operands) const;
1742 SMLoc getInstLoc(const OperandVector &Operands) const;
1743
1744 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1745 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1746 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1747 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1748 bool validateSOPLiteral(const MCInst &Inst) const;
1749 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1750 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1751 const OperandVector &Operands);
1752 bool validateIntClampSupported(const MCInst &Inst);
1753 bool validateMIMGAtomicDMask(const MCInst &Inst);
1754 bool validateMIMGGatherDMask(const MCInst &Inst);
1755 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1757 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1758 bool validateMIMGD16(const MCInst &Inst);
1759 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1760 bool validateMIMGMSAA(const MCInst &Inst);
1761 bool validateOpSel(const MCInst &Inst);
1762 bool validateNeg(const MCInst &Inst, int OpName);
1763 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateVccOperand(unsigned Reg) const;
1765 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1766 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1767 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1768 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1769 bool validateAGPRLdSt(const MCInst &Inst) const;
1770 bool validateVGPRAlign(const MCInst &Inst) const;
1771 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1772 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1773 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateDivScale(const MCInst &Inst);
1775 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1776 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1777 const SMLoc &IDLoc);
1778 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1779 const unsigned CPol);
1780 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1781 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1782 unsigned getConstantBusLimit(unsigned Opcode) const;
1783 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1784 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1785 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1786
1787 bool isSupportedMnemo(StringRef Mnemo,
1788 const FeatureBitset &FBS);
1789 bool isSupportedMnemo(StringRef Mnemo,
1790 const FeatureBitset &FBS,
1791 ArrayRef<unsigned> Variants);
1792 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1793
1794 bool isId(const StringRef Id) const;
1795 bool isId(const AsmToken &Token, const StringRef Id) const;
1796 bool isToken(const AsmToken::TokenKind Kind) const;
1797 StringRef getId() const;
1798 bool trySkipId(const StringRef Id);
1799 bool trySkipId(const StringRef Pref, const StringRef Id);
1800 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1801 bool trySkipToken(const AsmToken::TokenKind Kind);
1802 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1803 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1804 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1805
1806 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1807 AsmToken::TokenKind getTokenKind() const;
1808 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1810 StringRef getTokenStr() const;
1811 AsmToken peekToken(bool ShouldSkipSpace = true);
1812 AsmToken getToken() const;
1813 SMLoc getLoc() const;
1814 void lex();
1815
1816public:
1817 void onBeginOfFile() override;
1818 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1819
1820 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1821
1822 ParseStatus parseExpTgt(OperandVector &Operands);
1823 ParseStatus parseSendMsg(OperandVector &Operands);
1824 ParseStatus parseInterpSlot(OperandVector &Operands);
1825 ParseStatus parseInterpAttr(OperandVector &Operands);
1826 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1827 ParseStatus parseBoolReg(OperandVector &Operands);
1828
1829 bool parseSwizzleOperand(int64_t &Op,
1830 const unsigned MinVal,
1831 const unsigned MaxVal,
1832 const StringRef ErrMsg,
1833 SMLoc &Loc);
1834 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1835 const unsigned MinVal,
1836 const unsigned MaxVal,
1837 const StringRef ErrMsg);
1838 ParseStatus parseSwizzle(OperandVector &Operands);
1839 bool parseSwizzleOffset(int64_t &Imm);
1840 bool parseSwizzleMacro(int64_t &Imm);
1841 bool parseSwizzleQuadPerm(int64_t &Imm);
1842 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1843 bool parseSwizzleBroadcast(int64_t &Imm);
1844 bool parseSwizzleSwap(int64_t &Imm);
1845 bool parseSwizzleReverse(int64_t &Imm);
1846
1847 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1848 int64_t parseGPRIdxMacro();
1849
1850 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1851 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1852
1853 ParseStatus parseOModSI(OperandVector &Operands);
1854
1855 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1856 OptionalImmIndexMap &OptionalIdx);
1857 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1858 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1859 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1860 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1861
1862 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1863 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1864 OptionalImmIndexMap &OptionalIdx);
1865 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1866 OptionalImmIndexMap &OptionalIdx);
1867
1868 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1869 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1870
1871 bool parseDimId(unsigned &Encoding);
1873 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1875 ParseStatus parseDPPCtrl(OperandVector &Operands);
1876 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1877 int64_t parseDPPCtrlSel(StringRef Ctrl);
1878 int64_t parseDPPCtrlPerm();
1879 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1880 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1881 cvtDPP(Inst, Operands, true);
1882 }
1883 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1884 bool IsDPP8 = false);
1885 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1886 cvtVOP3DPP(Inst, Operands, true);
1887 }
1888
1889 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1890 AMDGPUOperand::ImmTy Type);
1891 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1892 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1893 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1894 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1895 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1896 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1897 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1898 uint64_t BasicInstType,
1899 bool SkipDstVcc = false,
1900 bool SkipSrcVcc = false);
1901
1902 ParseStatus parseEndpgm(OperandVector &Operands);
1903
1905};
1906
1907} // end anonymous namespace
1908
1909// May be called with integer type with equivalent bitwidth.
1910static const fltSemantics *getFltSemantics(unsigned Size) {
1911 switch (Size) {
1912 case 4:
1913 return &APFloat::IEEEsingle();
1914 case 8:
1915 return &APFloat::IEEEdouble();
1916 case 2:
1917 return &APFloat::IEEEhalf();
1918 default:
1919 llvm_unreachable("unsupported fp type");
1920 }
1921}
1922
1924 return getFltSemantics(VT.getSizeInBits() / 8);
1925}
1926
1928 switch (OperandType) {
1929 // When floating-point immediate is used as operand of type i16, the 32-bit
1930 // representation of the constant truncated to the 16 LSBs should be used.
1950 return &APFloat::IEEEsingle();
1956 return &APFloat::IEEEdouble();
1965 return &APFloat::IEEEhalf();
1973 return &APFloat::BFloat();
1974 default:
1975 llvm_unreachable("unsupported fp type");
1976 }
1977}
1978
1979//===----------------------------------------------------------------------===//
1980// Operand
1981//===----------------------------------------------------------------------===//
1982
1983static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1984 bool Lost;
1985
1986 // Convert literal to single precision
1988 APFloat::rmNearestTiesToEven,
1989 &Lost);
1990 // We allow precision lost but not overflow or underflow
1991 if (Status != APFloat::opOK &&
1992 Lost &&
1993 ((Status & APFloat::opOverflow) != 0 ||
1994 (Status & APFloat::opUnderflow) != 0)) {
1995 return false;
1996 }
1997
1998 return true;
1999}
2000
2001static bool isSafeTruncation(int64_t Val, unsigned Size) {
2002 return isUIntN(Size, Val) || isIntN(Size, Val);
2003}
2004
2005static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2006 if (VT.getScalarType() == MVT::i16)
2007 return isInlinableLiteral32(Val, HasInv2Pi);
2008
2009 if (VT.getScalarType() == MVT::f16)
2010 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2011
2012 assert(VT.getScalarType() == MVT::bf16);
2013
2014 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2015}
2016
2017bool AMDGPUOperand::isInlinableImm(MVT type) const {
2018
2019 // This is a hack to enable named inline values like
2020 // shared_base with both 32-bit and 64-bit operands.
2021 // Note that these values are defined as
2022 // 32-bit operands only.
2023 if (isInlineValue()) {
2024 return true;
2025 }
2026
2027 if (!isImmTy(ImmTyNone)) {
2028 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2029 return false;
2030 }
2031 // TODO: We should avoid using host float here. It would be better to
2032 // check the float bit values which is what a few other places do.
2033 // We've had bot failures before due to weird NaN support on mips hosts.
2034
2035 APInt Literal(64, Imm.Val);
2036
2037 if (Imm.IsFPImm) { // We got fp literal token
2038 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2040 AsmParser->hasInv2PiInlineImm());
2041 }
2042
2043 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2044 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2045 return false;
2046
2047 if (type.getScalarSizeInBits() == 16) {
2048 bool Lost = false;
2049 switch (type.getScalarType().SimpleTy) {
2050 default:
2051 llvm_unreachable("unknown 16-bit type");
2052 case MVT::bf16:
2053 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2054 &Lost);
2055 break;
2056 case MVT::f16:
2057 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2058 &Lost);
2059 break;
2060 case MVT::i16:
2061 FPLiteral.convert(APFloatBase::IEEEsingle(),
2062 APFloat::rmNearestTiesToEven, &Lost);
2063 break;
2064 }
2065 // We need to use 32-bit representation here because when a floating-point
2066 // inline constant is used as an i16 operand, its 32-bit representation
2067 // representation will be used. We will need the 32-bit value to check if
2068 // it is FP inline constant.
2069 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2070 return isInlineableLiteralOp16(ImmVal, type,
2071 AsmParser->hasInv2PiInlineImm());
2072 }
2073
2074 // Check if single precision literal is inlinable
2076 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2077 AsmParser->hasInv2PiInlineImm());
2078 }
2079
2080 // We got int literal token.
2081 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2083 AsmParser->hasInv2PiInlineImm());
2084 }
2085
2086 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2087 return false;
2088 }
2089
2090 if (type.getScalarSizeInBits() == 16) {
2092 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2093 type, AsmParser->hasInv2PiInlineImm());
2094 }
2095
2097 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2098 AsmParser->hasInv2PiInlineImm());
2099}
2100
2101bool AMDGPUOperand::isLiteralImm(MVT type) const {
2102 // Check that this immediate can be added as literal
2103 if (!isImmTy(ImmTyNone)) {
2104 return false;
2105 }
2106
2107 if (!Imm.IsFPImm) {
2108 // We got int literal token.
2109
2110 if (type == MVT::f64 && hasFPModifiers()) {
2111 // Cannot apply fp modifiers to int literals preserving the same semantics
2112 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2113 // disable these cases.
2114 return false;
2115 }
2116
2117 unsigned Size = type.getSizeInBits();
2118 if (Size == 64)
2119 Size = 32;
2120
2121 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2122 // types.
2123 return isSafeTruncation(Imm.Val, Size);
2124 }
2125
2126 // We got fp literal token
2127 if (type == MVT::f64) { // Expected 64-bit fp operand
2128 // We would set low 64-bits of literal to zeroes but we accept this literals
2129 return true;
2130 }
2131
2132 if (type == MVT::i64) { // Expected 64-bit int operand
2133 // We don't allow fp literals in 64-bit integer instructions. It is
2134 // unclear how we should encode them.
2135 return false;
2136 }
2137
2138 // We allow fp literals with f16x2 operands assuming that the specified
2139 // literal goes into the lower half and the upper half is zero. We also
2140 // require that the literal may be losslessly converted to f16.
2141 //
2142 // For i16x2 operands, we assume that the specified literal is encoded as a
2143 // single-precision float. This is pretty odd, but it matches SP3 and what
2144 // happens in hardware.
2145 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2146 : (type == MVT::v2i16) ? MVT::f32
2147 : (type == MVT::v2f32) ? MVT::f32
2148 : type;
2149
2150 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2151 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2152}
2153
2154bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2155 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2156}
2157
2158bool AMDGPUOperand::isVRegWithInputMods() const {
2159 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2160 // GFX90A allows DPP on 64-bit operands.
2161 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2162 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2163}
2164
2165template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2166 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2167 : AMDGPU::VGPR_16_Lo128RegClassID);
2168}
2169
2170bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2171 if (AsmParser->isVI())
2172 return isVReg32();
2173 if (AsmParser->isGFX9Plus())
2174 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2175 return false;
2176}
2177
2178bool AMDGPUOperand::isSDWAFP16Operand() const {
2179 return isSDWAOperand(MVT::f16);
2180}
2181
2182bool AMDGPUOperand::isSDWAFP32Operand() const {
2183 return isSDWAOperand(MVT::f32);
2184}
2185
2186bool AMDGPUOperand::isSDWAInt16Operand() const {
2187 return isSDWAOperand(MVT::i16);
2188}
2189
2190bool AMDGPUOperand::isSDWAInt32Operand() const {
2191 return isSDWAOperand(MVT::i32);
2192}
2193
2194bool AMDGPUOperand::isBoolReg() const {
2195 auto FB = AsmParser->getFeatureBits();
2196 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2197 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2198}
2199
2200uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2201{
2202 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2203 assert(Size == 2 || Size == 4 || Size == 8);
2204
2205 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2206
2207 if (Imm.Mods.Abs) {
2208 Val &= ~FpSignMask;
2209 }
2210 if (Imm.Mods.Neg) {
2211 Val ^= FpSignMask;
2212 }
2213
2214 return Val;
2215}
2216
2217void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2218 if (isExpr()) {
2220 return;
2221 }
2222
2223 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2224 Inst.getNumOperands())) {
2225 addLiteralImmOperand(Inst, Imm.Val,
2226 ApplyModifiers &
2227 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2228 } else {
2229 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2231 setImmKindNone();
2232 }
2233}
2234
2235void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2236 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2237 auto OpNum = Inst.getNumOperands();
2238 // Check that this operand accepts literals
2239 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2240
2241 if (ApplyModifiers) {
2242 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2243 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2244 Val = applyInputFPModifiers(Val, Size);
2245 }
2246
2247 APInt Literal(64, Val);
2248 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2249
2250 if (Imm.IsFPImm) { // We got fp literal token
2251 switch (OpTy) {
2257 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2258 AsmParser->hasInv2PiInlineImm())) {
2259 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2260 setImmKindConst();
2261 return;
2262 }
2263
2264 // Non-inlineable
2265 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2266 // For fp operands we check if low 32 bits are zeros
2267 if (Literal.getLoBits(32) != 0) {
2268 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2269 "Can't encode literal as exact 64-bit floating-point operand. "
2270 "Low 32-bits will be set to zero");
2271 Val &= 0xffffffff00000000u;
2272 }
2273
2275 setImmKindLiteral();
2276 return;
2277 }
2278
2279 // We don't allow fp literals in 64-bit integer instructions. It is
2280 // unclear how we should encode them. This case should be checked earlier
2281 // in predicate methods (isLiteralImm())
2282 llvm_unreachable("fp literal in 64-bit integer instruction.");
2283
2291 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2292 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2293 // loss of precision. The constant represents ideomatic fp32 value of
2294 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2295 // bits. Prevent rounding below.
2296 Inst.addOperand(MCOperand::createImm(0x3e22));
2297 setImmKindLiteral();
2298 return;
2299 }
2300 [[fallthrough]];
2301
2329 bool lost;
2330 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2331 // Convert literal to single precision
2332 FPLiteral.convert(*getOpFltSemantics(OpTy),
2333 APFloat::rmNearestTiesToEven, &lost);
2334 // We allow precision lost but not overflow or underflow. This should be
2335 // checked earlier in isLiteralImm()
2336
2337 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2338 Inst.addOperand(MCOperand::createImm(ImmVal));
2339 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2340 setImmKindMandatoryLiteral();
2341 } else {
2342 setImmKindLiteral();
2343 }
2344 return;
2345 }
2346 default:
2347 llvm_unreachable("invalid operand size");
2348 }
2349
2350 return;
2351 }
2352
2353 // We got int literal token.
2354 // Only sign extend inline immediates.
2355 switch (OpTy) {
2371 if (isSafeTruncation(Val, 32) &&
2372 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2373 AsmParser->hasInv2PiInlineImm())) {
2375 setImmKindConst();
2376 return;
2377 }
2378
2379 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2380 setImmKindLiteral();
2381 return;
2382
2388 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2390 setImmKindConst();
2391 return;
2392 }
2393
2394 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2395 : Lo_32(Val);
2396
2398 setImmKindLiteral();
2399 return;
2400
2404 if (isSafeTruncation(Val, 16) &&
2405 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2406 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2407 setImmKindConst();
2408 return;
2409 }
2410
2411 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2412 setImmKindLiteral();
2413 return;
2414
2419 if (isSafeTruncation(Val, 16) &&
2420 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2421 AsmParser->hasInv2PiInlineImm())) {
2423 setImmKindConst();
2424 return;
2425 }
2426
2427 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2428 setImmKindLiteral();
2429 return;
2430
2435 if (isSafeTruncation(Val, 16) &&
2436 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2437 AsmParser->hasInv2PiInlineImm())) {
2439 setImmKindConst();
2440 return;
2441 }
2442
2443 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2444 setImmKindLiteral();
2445 return;
2446
2449 assert(isSafeTruncation(Val, 16));
2450 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2452 return;
2453 }
2456 assert(isSafeTruncation(Val, 16));
2457 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2458 AsmParser->hasInv2PiInlineImm()));
2459
2461 return;
2462 }
2463
2466 assert(isSafeTruncation(Val, 16));
2467 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2468 AsmParser->hasInv2PiInlineImm()));
2469
2471 return;
2472 }
2473
2475 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2476 setImmKindMandatoryLiteral();
2477 return;
2479 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2480 setImmKindMandatoryLiteral();
2481 return;
2482 default:
2483 llvm_unreachable("invalid operand size");
2484 }
2485}
2486
2487void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2488 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2489}
2490
2491bool AMDGPUOperand::isInlineValue() const {
2492 return isRegKind() && ::isInlineValue(getReg());
2493}
2494
2495//===----------------------------------------------------------------------===//
2496// AsmParser
2497//===----------------------------------------------------------------------===//
2498
2499void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2500 // TODO: make those pre-defined variables read-only.
2501 // Currently there is none suitable machinery in the core llvm-mc for this.
2502 // MCSymbol::isRedefinable is intended for another purpose, and
2503 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2504 MCContext &Ctx = getContext();
2505 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2506 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2507}
2508
2509static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2510 if (Is == IS_VGPR) {
2511 switch (RegWidth) {
2512 default: return -1;
2513 case 32:
2514 return AMDGPU::VGPR_32RegClassID;
2515 case 64:
2516 return AMDGPU::VReg_64RegClassID;
2517 case 96:
2518 return AMDGPU::VReg_96RegClassID;
2519 case 128:
2520 return AMDGPU::VReg_128RegClassID;
2521 case 160:
2522 return AMDGPU::VReg_160RegClassID;
2523 case 192:
2524 return AMDGPU::VReg_192RegClassID;
2525 case 224:
2526 return AMDGPU::VReg_224RegClassID;
2527 case 256:
2528 return AMDGPU::VReg_256RegClassID;
2529 case 288:
2530 return AMDGPU::VReg_288RegClassID;
2531 case 320:
2532 return AMDGPU::VReg_320RegClassID;
2533 case 352:
2534 return AMDGPU::VReg_352RegClassID;
2535 case 384:
2536 return AMDGPU::VReg_384RegClassID;
2537 case 512:
2538 return AMDGPU::VReg_512RegClassID;
2539 case 1024:
2540 return AMDGPU::VReg_1024RegClassID;
2541 }
2542 } else if (Is == IS_TTMP) {
2543 switch (RegWidth) {
2544 default: return -1;
2545 case 32:
2546 return AMDGPU::TTMP_32RegClassID;
2547 case 64:
2548 return AMDGPU::TTMP_64RegClassID;
2549 case 128:
2550 return AMDGPU::TTMP_128RegClassID;
2551 case 256:
2552 return AMDGPU::TTMP_256RegClassID;
2553 case 512:
2554 return AMDGPU::TTMP_512RegClassID;
2555 }
2556 } else if (Is == IS_SGPR) {
2557 switch (RegWidth) {
2558 default: return -1;
2559 case 32:
2560 return AMDGPU::SGPR_32RegClassID;
2561 case 64:
2562 return AMDGPU::SGPR_64RegClassID;
2563 case 96:
2564 return AMDGPU::SGPR_96RegClassID;
2565 case 128:
2566 return AMDGPU::SGPR_128RegClassID;
2567 case 160:
2568 return AMDGPU::SGPR_160RegClassID;
2569 case 192:
2570 return AMDGPU::SGPR_192RegClassID;
2571 case 224:
2572 return AMDGPU::SGPR_224RegClassID;
2573 case 256:
2574 return AMDGPU::SGPR_256RegClassID;
2575 case 288:
2576 return AMDGPU::SGPR_288RegClassID;
2577 case 320:
2578 return AMDGPU::SGPR_320RegClassID;
2579 case 352:
2580 return AMDGPU::SGPR_352RegClassID;
2581 case 384:
2582 return AMDGPU::SGPR_384RegClassID;
2583 case 512:
2584 return AMDGPU::SGPR_512RegClassID;
2585 }
2586 } else if (Is == IS_AGPR) {
2587 switch (RegWidth) {
2588 default: return -1;
2589 case 32:
2590 return AMDGPU::AGPR_32RegClassID;
2591 case 64:
2592 return AMDGPU::AReg_64RegClassID;
2593 case 96:
2594 return AMDGPU::AReg_96RegClassID;
2595 case 128:
2596 return AMDGPU::AReg_128RegClassID;
2597 case 160:
2598 return AMDGPU::AReg_160RegClassID;
2599 case 192:
2600 return AMDGPU::AReg_192RegClassID;
2601 case 224:
2602 return AMDGPU::AReg_224RegClassID;
2603 case 256:
2604 return AMDGPU::AReg_256RegClassID;
2605 case 288:
2606 return AMDGPU::AReg_288RegClassID;
2607 case 320:
2608 return AMDGPU::AReg_320RegClassID;
2609 case 352:
2610 return AMDGPU::AReg_352RegClassID;
2611 case 384:
2612 return AMDGPU::AReg_384RegClassID;
2613 case 512:
2614 return AMDGPU::AReg_512RegClassID;
2615 case 1024:
2616 return AMDGPU::AReg_1024RegClassID;
2617 }
2618 }
2619 return -1;
2620}
2621
2624 .Case("exec", AMDGPU::EXEC)
2625 .Case("vcc", AMDGPU::VCC)
2626 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2627 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2628 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2629 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2630 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2631 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2632 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2633 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2634 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2635 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2636 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2637 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2638 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2639 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2640 .Case("m0", AMDGPU::M0)
2641 .Case("vccz", AMDGPU::SRC_VCCZ)
2642 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2643 .Case("execz", AMDGPU::SRC_EXECZ)
2644 .Case("src_execz", AMDGPU::SRC_EXECZ)
2645 .Case("scc", AMDGPU::SRC_SCC)
2646 .Case("src_scc", AMDGPU::SRC_SCC)
2647 .Case("tba", AMDGPU::TBA)
2648 .Case("tma", AMDGPU::TMA)
2649 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2650 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2651 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2652 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2653 .Case("vcc_lo", AMDGPU::VCC_LO)
2654 .Case("vcc_hi", AMDGPU::VCC_HI)
2655 .Case("exec_lo", AMDGPU::EXEC_LO)
2656 .Case("exec_hi", AMDGPU::EXEC_HI)
2657 .Case("tma_lo", AMDGPU::TMA_LO)
2658 .Case("tma_hi", AMDGPU::TMA_HI)
2659 .Case("tba_lo", AMDGPU::TBA_LO)
2660 .Case("tba_hi", AMDGPU::TBA_HI)
2661 .Case("pc", AMDGPU::PC_REG)
2662 .Case("null", AMDGPU::SGPR_NULL)
2663 .Default(AMDGPU::NoRegister);
2664}
2665
2666bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2667 SMLoc &EndLoc, bool RestoreOnFailure) {
2668 auto R = parseRegister();
2669 if (!R) return true;
2670 assert(R->isReg());
2671 RegNo = R->getReg();
2672 StartLoc = R->getStartLoc();
2673 EndLoc = R->getEndLoc();
2674 return false;
2675}
2676
2677bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2678 SMLoc &EndLoc) {
2679 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2680}
2681
2682ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2683 SMLoc &EndLoc) {
2684 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2685 bool PendingErrors = getParser().hasPendingError();
2686 getParser().clearPendingErrors();
2687 if (PendingErrors)
2688 return ParseStatus::Failure;
2689 if (Result)
2690 return ParseStatus::NoMatch;
2691 return ParseStatus::Success;
2692}
2693
2694bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2695 RegisterKind RegKind, unsigned Reg1,
2696 SMLoc Loc) {
2697 switch (RegKind) {
2698 case IS_SPECIAL:
2699 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2700 Reg = AMDGPU::EXEC;
2701 RegWidth = 64;
2702 return true;
2703 }
2704 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2705 Reg = AMDGPU::FLAT_SCR;
2706 RegWidth = 64;
2707 return true;
2708 }
2709 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2710 Reg = AMDGPU::XNACK_MASK;
2711 RegWidth = 64;
2712 return true;
2713 }
2714 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2715 Reg = AMDGPU::VCC;
2716 RegWidth = 64;
2717 return true;
2718 }
2719 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2720 Reg = AMDGPU::TBA;
2721 RegWidth = 64;
2722 return true;
2723 }
2724 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2725 Reg = AMDGPU::TMA;
2726 RegWidth = 64;
2727 return true;
2728 }
2729 Error(Loc, "register does not fit in the list");
2730 return false;
2731 case IS_VGPR:
2732 case IS_SGPR:
2733 case IS_AGPR:
2734 case IS_TTMP:
2735 if (Reg1 != Reg + RegWidth / 32) {
2736 Error(Loc, "registers in a list must have consecutive indices");
2737 return false;
2738 }
2739 RegWidth += 32;
2740 return true;
2741 default:
2742 llvm_unreachable("unexpected register kind");
2743 }
2744}
2745
2746struct RegInfo {
2748 RegisterKind Kind;
2749};
2750
2751static constexpr RegInfo RegularRegisters[] = {
2752 {{"v"}, IS_VGPR},
2753 {{"s"}, IS_SGPR},
2754 {{"ttmp"}, IS_TTMP},
2755 {{"acc"}, IS_AGPR},
2756 {{"a"}, IS_AGPR},
2757};
2758
2759static bool isRegularReg(RegisterKind Kind) {
2760 return Kind == IS_VGPR ||
2761 Kind == IS_SGPR ||
2762 Kind == IS_TTMP ||
2763 Kind == IS_AGPR;
2764}
2765
2767 for (const RegInfo &Reg : RegularRegisters)
2768 if (Str.starts_with(Reg.Name))
2769 return &Reg;
2770 return nullptr;
2771}
2772
2773static bool getRegNum(StringRef Str, unsigned& Num) {
2774 return !Str.getAsInteger(10, Num);
2775}
2776
2777bool
2778AMDGPUAsmParser::isRegister(const AsmToken &Token,
2779 const AsmToken &NextToken) const {
2780
2781 // A list of consecutive registers: [s0,s1,s2,s3]
2782 if (Token.is(AsmToken::LBrac))
2783 return true;
2784
2785 if (!Token.is(AsmToken::Identifier))
2786 return false;
2787
2788 // A single register like s0 or a range of registers like s[0:1]
2789
2790 StringRef Str = Token.getString();
2791 const RegInfo *Reg = getRegularRegInfo(Str);
2792 if (Reg) {
2793 StringRef RegName = Reg->Name;
2794 StringRef RegSuffix = Str.substr(RegName.size());
2795 if (!RegSuffix.empty()) {
2796 RegSuffix.consume_back(".l");
2797 RegSuffix.consume_back(".h");
2798 unsigned Num;
2799 // A single register with an index: rXX
2800 if (getRegNum(RegSuffix, Num))
2801 return true;
2802 } else {
2803 // A range of registers: r[XX:YY].
2804 if (NextToken.is(AsmToken::LBrac))
2805 return true;
2806 }
2807 }
2808
2809 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2810}
2811
2812bool
2813AMDGPUAsmParser::isRegister()
2814{
2815 return isRegister(getToken(), peekToken());
2816}
2817
2818unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2819 unsigned SubReg, unsigned RegWidth,
2820 SMLoc Loc) {
2821 assert(isRegularReg(RegKind));
2822
2823 unsigned AlignSize = 1;
2824 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2825 // SGPR and TTMP registers must be aligned.
2826 // Max required alignment is 4 dwords.
2827 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2828 }
2829
2830 if (RegNum % AlignSize != 0) {
2831 Error(Loc, "invalid register alignment");
2832 return AMDGPU::NoRegister;
2833 }
2834
2835 unsigned RegIdx = RegNum / AlignSize;
2836 int RCID = getRegClass(RegKind, RegWidth);
2837 if (RCID == -1) {
2838 Error(Loc, "invalid or unsupported register size");
2839 return AMDGPU::NoRegister;
2840 }
2841
2842 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2843 const MCRegisterClass RC = TRI->getRegClass(RCID);
2844 if (RegIdx >= RC.getNumRegs()) {
2845 Error(Loc, "register index is out of range");
2846 return AMDGPU::NoRegister;
2847 }
2848
2849 unsigned Reg = RC.getRegister(RegIdx);
2850
2851 if (SubReg) {
2852 Reg = TRI->getSubReg(Reg, SubReg);
2853
2854 // Currently all regular registers have their .l and .h subregisters, so
2855 // we should never need to generate an error here.
2856 assert(Reg && "Invalid subregister!");
2857 }
2858
2859 return Reg;
2860}
2861
2862bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2863 int64_t RegLo, RegHi;
2864 if (!skipToken(AsmToken::LBrac, "missing register index"))
2865 return false;
2866
2867 SMLoc FirstIdxLoc = getLoc();
2868 SMLoc SecondIdxLoc;
2869
2870 if (!parseExpr(RegLo))
2871 return false;
2872
2873 if (trySkipToken(AsmToken::Colon)) {
2874 SecondIdxLoc = getLoc();
2875 if (!parseExpr(RegHi))
2876 return false;
2877 } else {
2878 RegHi = RegLo;
2879 }
2880
2881 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2882 return false;
2883
2884 if (!isUInt<32>(RegLo)) {
2885 Error(FirstIdxLoc, "invalid register index");
2886 return false;
2887 }
2888
2889 if (!isUInt<32>(RegHi)) {
2890 Error(SecondIdxLoc, "invalid register index");
2891 return false;
2892 }
2893
2894 if (RegLo > RegHi) {
2895 Error(FirstIdxLoc, "first register index should not exceed second index");
2896 return false;
2897 }
2898
2899 Num = static_cast<unsigned>(RegLo);
2900 RegWidth = 32 * ((RegHi - RegLo) + 1);
2901 return true;
2902}
2903
2904unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2905 unsigned &RegNum, unsigned &RegWidth,
2906 SmallVectorImpl<AsmToken> &Tokens) {
2907 assert(isToken(AsmToken::Identifier));
2908 unsigned Reg = getSpecialRegForName(getTokenStr());
2909 if (Reg) {
2910 RegNum = 0;
2911 RegWidth = 32;
2912 RegKind = IS_SPECIAL;
2913 Tokens.push_back(getToken());
2914 lex(); // skip register name
2915 }
2916 return Reg;
2917}
2918
2919unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2920 unsigned &RegNum, unsigned &RegWidth,
2921 SmallVectorImpl<AsmToken> &Tokens) {
2922 assert(isToken(AsmToken::Identifier));
2923 StringRef RegName = getTokenStr();
2924 auto Loc = getLoc();
2925
2926 const RegInfo *RI = getRegularRegInfo(RegName);
2927 if (!RI) {
2928 Error(Loc, "invalid register name");
2929 return AMDGPU::NoRegister;
2930 }
2931
2932 Tokens.push_back(getToken());
2933 lex(); // skip register name
2934
2935 RegKind = RI->Kind;
2936 StringRef RegSuffix = RegName.substr(RI->Name.size());
2937 unsigned SubReg = NoSubRegister;
2938 if (!RegSuffix.empty()) {
2939 if (RegSuffix.consume_back(".l"))
2940 SubReg = AMDGPU::lo16;
2941 else if (RegSuffix.consume_back(".h"))
2942 SubReg = AMDGPU::hi16;
2943
2944 // Single 32-bit register: vXX.
2945 if (!getRegNum(RegSuffix, RegNum)) {
2946 Error(Loc, "invalid register index");
2947 return AMDGPU::NoRegister;
2948 }
2949 RegWidth = 32;
2950 } else {
2951 // Range of registers: v[XX:YY]. ":YY" is optional.
2952 if (!ParseRegRange(RegNum, RegWidth))
2953 return AMDGPU::NoRegister;
2954 }
2955
2956 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2957}
2958
2959unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2960 unsigned &RegWidth,
2961 SmallVectorImpl<AsmToken> &Tokens) {
2962 unsigned Reg = AMDGPU::NoRegister;
2963 auto ListLoc = getLoc();
2964
2965 if (!skipToken(AsmToken::LBrac,
2966 "expected a register or a list of registers")) {
2967 return AMDGPU::NoRegister;
2968 }
2969
2970 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2971
2972 auto Loc = getLoc();
2973 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2974 return AMDGPU::NoRegister;
2975 if (RegWidth != 32) {
2976 Error(Loc, "expected a single 32-bit register");
2977 return AMDGPU::NoRegister;
2978 }
2979
2980 for (; trySkipToken(AsmToken::Comma); ) {
2981 RegisterKind NextRegKind;
2982 unsigned NextReg, NextRegNum, NextRegWidth;
2983 Loc = getLoc();
2984
2985 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2986 NextRegNum, NextRegWidth,
2987 Tokens)) {
2988 return AMDGPU::NoRegister;
2989 }
2990 if (NextRegWidth != 32) {
2991 Error(Loc, "expected a single 32-bit register");
2992 return AMDGPU::NoRegister;
2993 }
2994 if (NextRegKind != RegKind) {
2995 Error(Loc, "registers in a list must be of the same kind");
2996 return AMDGPU::NoRegister;
2997 }
2998 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2999 return AMDGPU::NoRegister;
3000 }
3001
3002 if (!skipToken(AsmToken::RBrac,
3003 "expected a comma or a closing square bracket")) {
3004 return AMDGPU::NoRegister;
3005 }
3006
3007 if (isRegularReg(RegKind))
3008 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3009
3010 return Reg;
3011}
3012
3013bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3014 unsigned &RegNum, unsigned &RegWidth,
3015 SmallVectorImpl<AsmToken> &Tokens) {
3016 auto Loc = getLoc();
3017 Reg = AMDGPU::NoRegister;
3018
3019 if (isToken(AsmToken::Identifier)) {
3020 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3021 if (Reg == AMDGPU::NoRegister)
3022 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3023 } else {
3024 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3025 }
3026
3027 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3028 if (Reg == AMDGPU::NoRegister) {
3029 assert(Parser.hasPendingError());
3030 return false;
3031 }
3032
3033 if (!subtargetHasRegister(*TRI, Reg)) {
3034 if (Reg == AMDGPU::SGPR_NULL) {
3035 Error(Loc, "'null' operand is not supported on this GPU");
3036 } else {
3038 " register not available on this GPU");
3039 }
3040 return false;
3041 }
3042
3043 return true;
3044}
3045
3046bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3047 unsigned &RegNum, unsigned &RegWidth,
3048 bool RestoreOnFailure /*=false*/) {
3049 Reg = AMDGPU::NoRegister;
3050
3052 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3053 if (RestoreOnFailure) {
3054 while (!Tokens.empty()) {
3055 getLexer().UnLex(Tokens.pop_back_val());
3056 }
3057 }
3058 return true;
3059 }
3060 return false;
3061}
3062
3063std::optional<StringRef>
3064AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3065 switch (RegKind) {
3066 case IS_VGPR:
3067 return StringRef(".amdgcn.next_free_vgpr");
3068 case IS_SGPR:
3069 return StringRef(".amdgcn.next_free_sgpr");
3070 default:
3071 return std::nullopt;
3072 }
3073}
3074
3075void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3076 auto SymbolName = getGprCountSymbolName(RegKind);
3077 assert(SymbolName && "initializing invalid register kind");
3078 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3079 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3080}
3081
3082bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3083 unsigned DwordRegIndex,
3084 unsigned RegWidth) {
3085 // Symbols are only defined for GCN targets
3086 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3087 return true;
3088
3089 auto SymbolName = getGprCountSymbolName(RegKind);
3090 if (!SymbolName)
3091 return true;
3092 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3093
3094 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3095 int64_t OldCount;
3096
3097 if (!Sym->isVariable())
3098 return !Error(getLoc(),
3099 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3100 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3101 return !Error(
3102 getLoc(),
3103 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3104
3105 if (OldCount <= NewMax)
3106 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3107
3108 return true;
3109}
3110
3111std::unique_ptr<AMDGPUOperand>
3112AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3113 const auto &Tok = getToken();
3114 SMLoc StartLoc = Tok.getLoc();
3115 SMLoc EndLoc = Tok.getEndLoc();
3116 RegisterKind RegKind;
3117 unsigned Reg, RegNum, RegWidth;
3118
3119 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3120 return nullptr;
3121 }
3122 if (isHsaAbi(getSTI())) {
3123 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3124 return nullptr;
3125 } else
3126 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3127 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3128}
3129
3130ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3131 bool HasSP3AbsModifier, bool HasLit) {
3132 // TODO: add syntactic sugar for 1/(2*PI)
3133
3134 if (isRegister())
3135 return ParseStatus::NoMatch;
3136 assert(!isModifier());
3137
3138 if (!HasLit) {
3139 HasLit = trySkipId("lit");
3140 if (HasLit) {
3141 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3142 return ParseStatus::Failure;
3143 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3144 if (S.isSuccess() &&
3145 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146 return ParseStatus::Failure;
3147 return S;
3148 }
3149 }
3150
3151 const auto& Tok = getToken();
3152 const auto& NextTok = peekToken();
3153 bool IsReal = Tok.is(AsmToken::Real);
3154 SMLoc S = getLoc();
3155 bool Negate = false;
3156
3157 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3158 lex();
3159 IsReal = true;
3160 Negate = true;
3161 }
3162
3163 AMDGPUOperand::Modifiers Mods;
3164 Mods.Lit = HasLit;
3165
3166 if (IsReal) {
3167 // Floating-point expressions are not supported.
3168 // Can only allow floating-point literals with an
3169 // optional sign.
3170
3171 StringRef Num = getTokenStr();
3172 lex();
3173
3174 APFloat RealVal(APFloat::IEEEdouble());
3175 auto roundMode = APFloat::rmNearestTiesToEven;
3176 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3177 return ParseStatus::Failure;
3178 if (Negate)
3179 RealVal.changeSign();
3180
3181 Operands.push_back(
3182 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3183 AMDGPUOperand::ImmTyNone, true));
3184 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3185 Op.setModifiers(Mods);
3186
3187 return ParseStatus::Success;
3188
3189 } else {
3190 int64_t IntVal;
3191 const MCExpr *Expr;
3192 SMLoc S = getLoc();
3193
3194 if (HasSP3AbsModifier) {
3195 // This is a workaround for handling expressions
3196 // as arguments of SP3 'abs' modifier, for example:
3197 // |1.0|
3198 // |-1|
3199 // |1+x|
3200 // This syntax is not compatible with syntax of standard
3201 // MC expressions (due to the trailing '|').
3202 SMLoc EndLoc;
3203 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3204 return ParseStatus::Failure;
3205 } else {
3206 if (Parser.parseExpression(Expr))
3207 return ParseStatus::Failure;
3208 }
3209
3210 if (Expr->evaluateAsAbsolute(IntVal)) {
3211 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3212 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3213 Op.setModifiers(Mods);
3214 } else {
3215 if (HasLit)
3216 return ParseStatus::NoMatch;
3217 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3218 }
3219
3220 return ParseStatus::Success;
3221 }
3222
3223 return ParseStatus::NoMatch;
3224}
3225
3226ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3227 if (!isRegister())
3228 return ParseStatus::NoMatch;
3229
3230 if (auto R = parseRegister()) {
3231 assert(R->isReg());
3232 Operands.push_back(std::move(R));
3233 return ParseStatus::Success;
3234 }
3235 return ParseStatus::Failure;
3236}
3237
3238ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3239 bool HasSP3AbsMod, bool HasLit) {
3240 ParseStatus Res = parseReg(Operands);
3241 if (!Res.isNoMatch())
3242 return Res;
3243 if (isModifier())
3244 return ParseStatus::NoMatch;
3245 return parseImm(Operands, HasSP3AbsMod, HasLit);
3246}
3247
3248bool
3249AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3250 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3251 const auto &str = Token.getString();
3252 return str == "abs" || str == "neg" || str == "sext";
3253 }
3254 return false;
3255}
3256
3257bool
3258AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3259 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3260}
3261
3262bool
3263AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3264 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3265}
3266
3267bool
3268AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3269 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3270}
3271
3272// Check if this is an operand modifier or an opcode modifier
3273// which may look like an expression but it is not. We should
3274// avoid parsing these modifiers as expressions. Currently
3275// recognized sequences are:
3276// |...|
3277// abs(...)
3278// neg(...)
3279// sext(...)
3280// -reg
3281// -|...|
3282// -abs(...)
3283// name:...
3284//
3285bool
3286AMDGPUAsmParser::isModifier() {
3287
3288 AsmToken Tok = getToken();
3289 AsmToken NextToken[2];
3290 peekTokens(NextToken);
3291
3292 return isOperandModifier(Tok, NextToken[0]) ||
3293 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3294 isOpcodeModifierWithVal(Tok, NextToken[0]);
3295}
3296
3297// Check if the current token is an SP3 'neg' modifier.
3298// Currently this modifier is allowed in the following context:
3299//
3300// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3301// 2. Before an 'abs' modifier: -abs(...)
3302// 3. Before an SP3 'abs' modifier: -|...|
3303//
3304// In all other cases "-" is handled as a part
3305// of an expression that follows the sign.
3306//
3307// Note: When "-" is followed by an integer literal,
3308// this is interpreted as integer negation rather
3309// than a floating-point NEG modifier applied to N.
3310// Beside being contr-intuitive, such use of floating-point
3311// NEG modifier would have resulted in different meaning
3312// of integer literals used with VOP1/2/C and VOP3,
3313// for example:
3314// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3315// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3316// Negative fp literals with preceding "-" are
3317// handled likewise for uniformity
3318//
3319bool
3320AMDGPUAsmParser::parseSP3NegModifier() {
3321
3322 AsmToken NextToken[2];
3323 peekTokens(NextToken);
3324
3325 if (isToken(AsmToken::Minus) &&
3326 (isRegister(NextToken[0], NextToken[1]) ||
3327 NextToken[0].is(AsmToken::Pipe) ||
3328 isId(NextToken[0], "abs"))) {
3329 lex();
3330 return true;
3331 }
3332
3333 return false;
3334}
3335
3337AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3338 bool AllowImm) {
3339 bool Neg, SP3Neg;
3340 bool Abs, SP3Abs;
3341 bool Lit;
3342 SMLoc Loc;
3343
3344 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3345 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3346 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3347
3348 SP3Neg = parseSP3NegModifier();
3349
3350 Loc = getLoc();
3351 Neg = trySkipId("neg");
3352 if (Neg && SP3Neg)
3353 return Error(Loc, "expected register or immediate");
3354 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3355 return ParseStatus::Failure;
3356
3357 Abs = trySkipId("abs");
3358 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3359 return ParseStatus::Failure;
3360
3361 Lit = trySkipId("lit");
3362 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3363 return ParseStatus::Failure;
3364
3365 Loc = getLoc();
3366 SP3Abs = trySkipToken(AsmToken::Pipe);
3367 if (Abs && SP3Abs)
3368 return Error(Loc, "expected register or immediate");
3369
3370 ParseStatus Res;
3371 if (AllowImm) {
3372 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3373 } else {
3374 Res = parseReg(Operands);
3375 }
3376 if (!Res.isSuccess())
3377 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3378
3379 if (Lit && !Operands.back()->isImm())
3380 Error(Loc, "expected immediate with lit modifier");
3381
3382 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3383 return ParseStatus::Failure;
3384 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3385 return ParseStatus::Failure;
3386 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3387 return ParseStatus::Failure;
3388 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3389 return ParseStatus::Failure;
3390
3391 AMDGPUOperand::Modifiers Mods;
3392 Mods.Abs = Abs || SP3Abs;
3393 Mods.Neg = Neg || SP3Neg;
3394 Mods.Lit = Lit;
3395
3396 if (Mods.hasFPModifiers() || Lit) {
3397 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3398 if (Op.isExpr())
3399 return Error(Op.getStartLoc(), "expected an absolute expression");
3400 Op.setModifiers(Mods);
3401 }
3402 return ParseStatus::Success;
3403}
3404
3406AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3407 bool AllowImm) {
3408 bool Sext = trySkipId("sext");
3409 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3410 return ParseStatus::Failure;
3411
3412 ParseStatus Res;
3413 if (AllowImm) {
3414 Res = parseRegOrImm(Operands);
3415 } else {
3416 Res = parseReg(Operands);
3417 }
3418 if (!Res.isSuccess())
3419 return Sext ? ParseStatus::Failure : Res;
3420
3421 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3422 return ParseStatus::Failure;
3423
3424 AMDGPUOperand::Modifiers Mods;
3425 Mods.Sext = Sext;
3426
3427 if (Mods.hasIntModifiers()) {
3428 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3429 if (Op.isExpr())
3430 return Error(Op.getStartLoc(), "expected an absolute expression");
3431 Op.setModifiers(Mods);
3432 }
3433
3434 return ParseStatus::Success;
3435}
3436
3437ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3438 return parseRegOrImmWithFPInputMods(Operands, false);
3439}
3440
3441ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3442 return parseRegOrImmWithIntInputMods(Operands, false);
3443}
3444
3445ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3446 auto Loc = getLoc();
3447 if (trySkipId("off")) {
3448 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3449 AMDGPUOperand::ImmTyOff, false));
3450 return ParseStatus::Success;
3451 }
3452
3453 if (!isRegister())
3454 return ParseStatus::NoMatch;
3455
3456 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3457 if (Reg) {
3458 Operands.push_back(std::move(Reg));
3459 return ParseStatus::Success;
3460 }
3461
3462 return ParseStatus::Failure;
3463}
3464
3465unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3466 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3467
3468 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3469 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3470 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3471 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3472 return Match_InvalidOperand;
3473
3474 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3475 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3476 // v_mac_f32/16 allow only dst_sel == DWORD;
3477 auto OpNum =
3478 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3479 const auto &Op = Inst.getOperand(OpNum);
3480 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3481 return Match_InvalidOperand;
3482 }
3483 }
3484
3485 return Match_Success;
3486}
3487
3489 static const unsigned Variants[] = {
3493 };
3494
3495 return ArrayRef(Variants);
3496}
3497
3498// What asm variants we should check
3499ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3500 if (isForcedDPP() && isForcedVOP3()) {
3501 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3502 return ArrayRef(Variants);
3503 }
3504 if (getForcedEncodingSize() == 32) {
3505 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3506 return ArrayRef(Variants);
3507 }
3508
3509 if (isForcedVOP3()) {
3510 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3511 return ArrayRef(Variants);
3512 }
3513
3514 if (isForcedSDWA()) {
3515 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3517 return ArrayRef(Variants);
3518 }
3519
3520 if (isForcedDPP()) {
3521 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3522 return ArrayRef(Variants);
3523 }
3524
3525 return getAllVariants();
3526}
3527
3528StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3529 if (isForcedDPP() && isForcedVOP3())
3530 return "e64_dpp";
3531
3532 if (getForcedEncodingSize() == 32)
3533 return "e32";
3534
3535 if (isForcedVOP3())
3536 return "e64";
3537
3538 if (isForcedSDWA())
3539 return "sdwa";
3540
3541 if (isForcedDPP())
3542 return "dpp";
3543
3544 return "";
3545}
3546
3547unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3548 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3549 for (MCPhysReg Reg : Desc.implicit_uses()) {
3550 switch (Reg) {
3551 case AMDGPU::FLAT_SCR:
3552 case AMDGPU::VCC:
3553 case AMDGPU::VCC_LO:
3554 case AMDGPU::VCC_HI:
3555 case AMDGPU::M0:
3556 return Reg;
3557 default:
3558 break;
3559 }
3560 }
3561 return AMDGPU::NoRegister;
3562}
3563
3564// NB: This code is correct only when used to check constant
3565// bus limitations because GFX7 support no f16 inline constants.
3566// Note that there are no cases when a GFX7 opcode violates
3567// constant bus limitations due to the use of an f16 constant.
3568bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3569 unsigned OpIdx) const {
3570 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3571
3572 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3573 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3574 return false;
3575 }
3576
3577 const MCOperand &MO = Inst.getOperand(OpIdx);
3578
3579 int64_t Val = MO.getImm();
3580 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3581
3582 switch (OpSize) { // expected operand size
3583 case 8:
3584 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3585 case 4:
3586 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3587 case 2: {
3588 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3592 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3593
3598
3603
3608
3613 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3614
3619 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3620
3621 llvm_unreachable("invalid operand type");
3622 }
3623 default:
3624 llvm_unreachable("invalid operand size");
3625 }
3626}
3627
3628unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3629 if (!isGFX10Plus())
3630 return 1;
3631
3632 switch (Opcode) {
3633 // 64-bit shift instructions can use only one scalar value input
3634 case AMDGPU::V_LSHLREV_B64_e64:
3635 case AMDGPU::V_LSHLREV_B64_gfx10:
3636 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3637 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3638 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3639 case AMDGPU::V_LSHRREV_B64_e64:
3640 case AMDGPU::V_LSHRREV_B64_gfx10:
3641 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3642 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3643 case AMDGPU::V_ASHRREV_I64_e64:
3644 case AMDGPU::V_ASHRREV_I64_gfx10:
3645 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3646 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3647 case AMDGPU::V_LSHL_B64_e64:
3648 case AMDGPU::V_LSHR_B64_e64:
3649 case AMDGPU::V_ASHR_I64_e64:
3650 return 1;
3651 default:
3652 return 2;
3653 }
3654}
3655
3656constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3658
3659// Get regular operand indices in the same order as specified
3660// in the instruction (but append mandatory literals to the end).
3662 bool AddMandatoryLiterals = false) {
3663
3664 int16_t ImmIdx =
3665 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3666
3667 if (isVOPD(Opcode)) {
3668 int16_t ImmDeferredIdx =
3669 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3670 : -1;
3671
3672 return {getNamedOperandIdx(Opcode, OpName::src0X),
3673 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3674 getNamedOperandIdx(Opcode, OpName::src0Y),
3675 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3676 ImmDeferredIdx,
3677 ImmIdx};
3678 }
3679
3680 return {getNamedOperandIdx(Opcode, OpName::src0),
3681 getNamedOperandIdx(Opcode, OpName::src1),
3682 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3683}
3684
3685bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3686 const MCOperand &MO = Inst.getOperand(OpIdx);
3687 if (MO.isImm())
3688 return !isInlineConstant(Inst, OpIdx);
3689 if (MO.isReg()) {
3690 auto Reg = MO.getReg();
3691 if (!Reg)
3692 return false;
3693 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3694 auto PReg = mc2PseudoReg(Reg);
3695 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3696 }
3697 return true;
3698}
3699
3700// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3701// Writelane is special in that it can use SGPR and M0 (which would normally
3702// count as using the constant bus twice - but in this case it is allowed since
3703// the lane selector doesn't count as a use of the constant bus). However, it is
3704// still required to abide by the 1 SGPR rule.
3705static bool checkWriteLane(const MCInst &Inst) {
3706 const unsigned Opcode = Inst.getOpcode();
3707 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3708 return false;
3709 const MCOperand &LaneSelOp = Inst.getOperand(2);
3710 if (!LaneSelOp.isReg())
3711 return false;
3712 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3713 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3714}
3715
3716bool AMDGPUAsmParser::validateConstantBusLimitations(
3717 const MCInst &Inst, const OperandVector &Operands) {
3718 const unsigned Opcode = Inst.getOpcode();
3719 const MCInstrDesc &Desc = MII.get(Opcode);
3720 unsigned LastSGPR = AMDGPU::NoRegister;
3721 unsigned ConstantBusUseCount = 0;
3722 unsigned NumLiterals = 0;
3723 unsigned LiteralSize;
3724
3725 if (!(Desc.TSFlags &
3728 !isVOPD(Opcode))
3729 return true;
3730
3731 if (checkWriteLane(Inst))
3732 return true;
3733
3734 // Check special imm operands (used by madmk, etc)
3735 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3736 ++NumLiterals;
3737 LiteralSize = 4;
3738 }
3739
3740 SmallDenseSet<unsigned> SGPRsUsed;
3741 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3742 if (SGPRUsed != AMDGPU::NoRegister) {
3743 SGPRsUsed.insert(SGPRUsed);
3744 ++ConstantBusUseCount;
3745 }
3746
3747 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3748
3749 for (int OpIdx : OpIndices) {
3750 if (OpIdx == -1)
3751 continue;
3752
3753 const MCOperand &MO = Inst.getOperand(OpIdx);
3754 if (usesConstantBus(Inst, OpIdx)) {
3755 if (MO.isReg()) {
3756 LastSGPR = mc2PseudoReg(MO.getReg());
3757 // Pairs of registers with a partial intersections like these
3758 // s0, s[0:1]
3759 // flat_scratch_lo, flat_scratch
3760 // flat_scratch_lo, flat_scratch_hi
3761 // are theoretically valid but they are disabled anyway.
3762 // Note that this code mimics SIInstrInfo::verifyInstruction
3763 if (SGPRsUsed.insert(LastSGPR).second) {
3764 ++ConstantBusUseCount;
3765 }
3766 } else { // Expression or a literal
3767
3768 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3769 continue; // special operand like VINTERP attr_chan
3770
3771 // An instruction may use only one literal.
3772 // This has been validated on the previous step.
3773 // See validateVOPLiteral.
3774 // This literal may be used as more than one operand.
3775 // If all these operands are of the same size,
3776 // this literal counts as one scalar value.
3777 // Otherwise it counts as 2 scalar values.
3778 // See "GFX10 Shader Programming", section 3.6.2.3.
3779
3780 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3781 if (Size < 4)
3782 Size = 4;
3783
3784 if (NumLiterals == 0) {
3785 NumLiterals = 1;
3786 LiteralSize = Size;
3787 } else if (LiteralSize != Size) {
3788 NumLiterals = 2;
3789 }
3790 }
3791 }
3792 }
3793 ConstantBusUseCount += NumLiterals;
3794
3795 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3796 return true;
3797
3798 SMLoc LitLoc = getLitLoc(Operands);
3799 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3800 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3801 Error(Loc, "invalid operand (violates constant bus restrictions)");
3802 return false;
3803}
3804
3805bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3806 const MCInst &Inst, const OperandVector &Operands) {
3807
3808 const unsigned Opcode = Inst.getOpcode();
3809 if (!isVOPD(Opcode))
3810 return true;
3811
3812 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3813
3814 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3815 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3816 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3817 ? Opr.getReg()
3819 };
3820
3821 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3822 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3823
3824 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3825 auto InvalidCompOprIdx =
3826 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3827 if (!InvalidCompOprIdx)
3828 return true;
3829
3830 auto CompOprIdx = *InvalidCompOprIdx;
3831 auto ParsedIdx =
3832 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3833 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3834 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3835
3836 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3837 if (CompOprIdx == VOPD::Component::DST) {
3838 Error(Loc, "one dst register must be even and the other odd");
3839 } else {
3840 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3841 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3842 " operands must use different VGPR banks");
3843 }
3844
3845 return false;
3846}
3847
3848bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3849
3850 const unsigned Opc = Inst.getOpcode();
3851 const MCInstrDesc &Desc = MII.get(Opc);
3852
3853 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3854 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3855 assert(ClampIdx != -1);
3856 return Inst.getOperand(ClampIdx).getImm() == 0;
3857 }
3858
3859 return true;
3860}
3861
3864
3865bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3866 const SMLoc &IDLoc) {
3867
3868 const unsigned Opc = Inst.getOpcode();
3869 const MCInstrDesc &Desc = MII.get(Opc);
3870
3871 if ((Desc.TSFlags & MIMGFlags) == 0)
3872 return true;
3873
3874 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3875 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3876 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3877
3878 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3879 return true;
3880
3881 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3882 return true;
3883
3884 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3885 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3886 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3887 if (DMask == 0)
3888 DMask = 1;
3889
3890 bool IsPackedD16 = false;
3891 unsigned DataSize =
3892 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3893 if (hasPackedD16()) {
3894 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3895 IsPackedD16 = D16Idx >= 0;
3896 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3897 DataSize = (DataSize + 1) / 2;
3898 }
3899
3900 if ((VDataSize / 4) == DataSize + TFESize)
3901 return true;
3902
3903 StringRef Modifiers;
3904 if (isGFX90A())
3905 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3906 else
3907 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3908
3909 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3910 return false;
3911}
3912
3913bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3914 const SMLoc &IDLoc) {
3915 const unsigned Opc = Inst.getOpcode();
3916 const MCInstrDesc &Desc = MII.get(Opc);
3917
3918 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3919 return true;
3920
3922
3923 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3925 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3926 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3927 : AMDGPU::OpName::rsrc;
3928 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3929 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3930 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3931
3932 assert(VAddr0Idx != -1);
3933 assert(SrsrcIdx != -1);
3934 assert(SrsrcIdx > VAddr0Idx);
3935
3936 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3937 if (BaseOpcode->BVH) {
3938 if (IsA16 == BaseOpcode->A16)
3939 return true;
3940 Error(IDLoc, "image address size does not match a16");
3941 return false;
3942 }
3943
3944 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3946 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3947 unsigned ActualAddrSize =
3948 IsNSA ? SrsrcIdx - VAddr0Idx
3949 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3950
3951 unsigned ExpectedAddrSize =
3952 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3953
3954 if (IsNSA) {
3955 if (hasPartialNSAEncoding() &&
3956 ExpectedAddrSize >
3958 int VAddrLastIdx = SrsrcIdx - 1;
3959 unsigned VAddrLastSize =
3960 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3961
3962 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3963 }
3964 } else {
3965 if (ExpectedAddrSize > 12)
3966 ExpectedAddrSize = 16;
3967
3968 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3969 // This provides backward compatibility for assembly created
3970 // before 160b/192b/224b types were directly supported.
3971 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3972 return true;
3973 }
3974
3975 if (ActualAddrSize == ExpectedAddrSize)
3976 return true;
3977
3978 Error(IDLoc, "image address size does not match dim and a16");
3979 return false;
3980}
3981
3982bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3983
3984 const unsigned Opc = Inst.getOpcode();
3985 const MCInstrDesc &Desc = MII.get(Opc);
3986
3987 if ((Desc.TSFlags & MIMGFlags) == 0)
3988 return true;
3989 if (!Desc.mayLoad() || !Desc.mayStore())
3990 return true; // Not atomic
3991
3992 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3993 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3994
3995 // This is an incomplete check because image_atomic_cmpswap
3996 // may only use 0x3 and 0xf while other atomic operations
3997 // may use 0x1 and 0x3. However these limitations are
3998 // verified when we check that dmask matches dst size.
3999 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4000}
4001
4002bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4003
4004 const unsigned Opc = Inst.getOpcode();
4005 const MCInstrDesc &Desc = MII.get(Opc);
4006
4007 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4008 return true;
4009
4010 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4011 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4012
4013 // GATHER4 instructions use dmask in a different fashion compared to
4014 // other MIMG instructions. The only useful DMASK values are
4015 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4016 // (red,red,red,red) etc.) The ISA document doesn't mention
4017 // this.
4018 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4019}
4020
4021bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4022 const OperandVector &Operands) {
4023 if (!isGFX10Plus())
4024 return true;
4025
4026 const unsigned Opc = Inst.getOpcode();
4027 const MCInstrDesc &Desc = MII.get(Opc);
4028
4029 if ((Desc.TSFlags & MIMGFlags) == 0)
4030 return true;
4031
4032 // image_bvh_intersect_ray instructions do not have dim
4033 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4034 return true;
4035
4036 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4037 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4038 if (Op.isDim())
4039 return true;
4040 }
4041 return false;
4042}
4043
4044bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4045 const unsigned Opc = Inst.getOpcode();
4046 const MCInstrDesc &Desc = MII.get(Opc);
4047
4048 if ((Desc.TSFlags & MIMGFlags) == 0)
4049 return true;
4050
4052 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4054
4055 if (!BaseOpcode->MSAA)
4056 return true;
4057
4058 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4059 assert(DimIdx != -1);
4060
4061 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4063
4064 return DimInfo->MSAA;
4065}
4066
4067static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4068{
4069 switch (Opcode) {
4070 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4071 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4072 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4073 return true;
4074 default:
4075 return false;
4076 }
4077}
4078
4079// movrels* opcodes should only allow VGPRS as src0.
4080// This is specified in .td description for vop1/vop3,
4081// but sdwa is handled differently. See isSDWAOperand.
4082bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4083 const OperandVector &Operands) {
4084
4085 const unsigned Opc = Inst.getOpcode();
4086 const MCInstrDesc &Desc = MII.get(Opc);
4087
4088 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4089 return true;
4090
4091 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4092 assert(Src0Idx != -1);
4093
4094 SMLoc ErrLoc;
4095 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4096 if (Src0.isReg()) {
4097 auto Reg = mc2PseudoReg(Src0.getReg());
4098 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4099 if (!isSGPR(Reg, TRI))
4100 return true;
4101 ErrLoc = getRegLoc(Reg, Operands);
4102 } else {
4103 ErrLoc = getConstLoc(Operands);
4104 }
4105
4106 Error(ErrLoc, "source operand must be a VGPR");
4107 return false;
4108}
4109
4110bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4111 const OperandVector &Operands) {
4112
4113 const unsigned Opc = Inst.getOpcode();
4114
4115 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4116 return true;
4117
4118 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4119 assert(Src0Idx != -1);
4120
4121 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4122 if (!Src0.isReg())
4123 return true;
4124
4125 auto Reg = mc2PseudoReg(Src0.getReg());
4126 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4127 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4128 Error(getRegLoc(Reg, Operands),
4129 "source operand must be either a VGPR or an inline constant");
4130 return false;
4131 }
4132
4133 return true;
4134}
4135
4136bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4137 const OperandVector &Operands) {
4138 unsigned Opcode = Inst.getOpcode();
4139 const MCInstrDesc &Desc = MII.get(Opcode);
4140
4141 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4142 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4143 return true;
4144
4145 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4146 if (Src2Idx == -1)
4147 return true;
4148
4149 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4150 Error(getConstLoc(Operands),
4151 "inline constants are not allowed for this operand");
4152 return false;
4153 }
4154
4155 return true;
4156}
4157
4158bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4159 const OperandVector &Operands) {
4160 const unsigned Opc = Inst.getOpcode();
4161 const MCInstrDesc &Desc = MII.get(Opc);
4162
4163 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4164 return true;
4165
4166 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4167 if (Src2Idx == -1)
4168 return true;
4169
4170 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4171 if (!Src2.isReg())
4172 return true;
4173
4174 MCRegister Src2Reg = Src2.getReg();
4175 MCRegister DstReg = Inst.getOperand(0).getReg();
4176 if (Src2Reg == DstReg)
4177 return true;
4178
4179 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4180 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4181 return true;
4182
4183 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4184 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4185 "source 2 operand must not partially overlap with dst");
4186 return false;
4187 }
4188
4189 return true;
4190}
4191
4192bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4193 switch (Inst.getOpcode()) {
4194 default:
4195 return true;
4196 case V_DIV_SCALE_F32_gfx6_gfx7:
4197 case V_DIV_SCALE_F32_vi:
4198 case V_DIV_SCALE_F32_gfx10:
4199 case V_DIV_SCALE_F64_gfx6_gfx7:
4200 case V_DIV_SCALE_F64_vi:
4201 case V_DIV_SCALE_F64_gfx10:
4202 break;
4203 }
4204
4205 // TODO: Check that src0 = src1 or src2.
4206
4207 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4208 AMDGPU::OpName::src2_modifiers,
4209 AMDGPU::OpName::src2_modifiers}) {
4211 .getImm() &
4213 return false;
4214 }
4215 }
4216
4217 return true;
4218}
4219
4220bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4221
4222 const unsigned Opc = Inst.getOpcode();
4223 const MCInstrDesc &Desc = MII.get(Opc);
4224
4225 if ((Desc.TSFlags & MIMGFlags) == 0)
4226 return true;
4227
4228 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4229 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4230 if (isCI() || isSI())
4231 return false;
4232 }
4233
4234 return true;
4235}
4236
4237static bool IsRevOpcode(const unsigned Opcode)
4238{
4239 switch (Opcode) {
4240 case AMDGPU::V_SUBREV_F32_e32:
4241 case AMDGPU::V_SUBREV_F32_e64:
4242 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4243 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4244 case AMDGPU::V_SUBREV_F32_e32_vi:
4245 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4246 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4247 case AMDGPU::V_SUBREV_F32_e64_vi:
4248
4249 case AMDGPU::V_SUBREV_CO_U32_e32:
4250 case AMDGPU::V_SUBREV_CO_U32_e64:
4251 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4252 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4253
4254 case AMDGPU::V_SUBBREV_U32_e32:
4255 case AMDGPU::V_SUBBREV_U32_e64:
4256 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4257 case AMDGPU::V_SUBBREV_U32_e32_vi:
4258 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4259 case AMDGPU::V_SUBBREV_U32_e64_vi:
4260
4261 case AMDGPU::V_SUBREV_U32_e32:
4262 case AMDGPU::V_SUBREV_U32_e64:
4263 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4264 case AMDGPU::V_SUBREV_U32_e32_vi:
4265 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4266 case AMDGPU::V_SUBREV_U32_e64_vi:
4267
4268 case AMDGPU::V_SUBREV_F16_e32:
4269 case AMDGPU::V_SUBREV_F16_e64:
4270 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4271 case AMDGPU::V_SUBREV_F16_e32_vi:
4272 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4273 case AMDGPU::V_SUBREV_F16_e64_vi:
4274
4275 case AMDGPU::V_SUBREV_U16_e32:
4276 case AMDGPU::V_SUBREV_U16_e64:
4277 case AMDGPU::V_SUBREV_U16_e32_vi:
4278 case AMDGPU::V_SUBREV_U16_e64_vi:
4279
4280 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4281 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4282 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4283
4284 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4285 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4286
4287 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4288 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4289
4290 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4291 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4292
4293 case AMDGPU::V_LSHRREV_B32_e32:
4294 case AMDGPU::V_LSHRREV_B32_e64:
4295 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4296 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4297 case AMDGPU::V_LSHRREV_B32_e32_vi:
4298 case AMDGPU::V_LSHRREV_B32_e64_vi:
4299 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4300 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4301
4302 case AMDGPU::V_ASHRREV_I32_e32:
4303 case AMDGPU::V_ASHRREV_I32_e64:
4304 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4305 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4306 case AMDGPU::V_ASHRREV_I32_e32_vi:
4307 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4308 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4309 case AMDGPU::V_ASHRREV_I32_e64_vi:
4310
4311 case AMDGPU::V_LSHLREV_B32_e32:
4312 case AMDGPU::V_LSHLREV_B32_e64:
4313 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4314 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4315 case AMDGPU::V_LSHLREV_B32_e32_vi:
4316 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4317 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4318 case AMDGPU::V_LSHLREV_B32_e64_vi:
4319
4320 case AMDGPU::V_LSHLREV_B16_e32:
4321 case AMDGPU::V_LSHLREV_B16_e64:
4322 case AMDGPU::V_LSHLREV_B16_e32_vi:
4323 case AMDGPU::V_LSHLREV_B16_e64_vi:
4324 case AMDGPU::V_LSHLREV_B16_gfx10:
4325
4326 case AMDGPU::V_LSHRREV_B16_e32:
4327 case AMDGPU::V_LSHRREV_B16_e64:
4328 case AMDGPU::V_LSHRREV_B16_e32_vi:
4329 case AMDGPU::V_LSHRREV_B16_e64_vi:
4330 case AMDGPU::V_LSHRREV_B16_gfx10:
4331
4332 case AMDGPU::V_ASHRREV_I16_e32:
4333 case AMDGPU::V_ASHRREV_I16_e64:
4334 case AMDGPU::V_ASHRREV_I16_e32_vi:
4335 case AMDGPU::V_ASHRREV_I16_e64_vi:
4336 case AMDGPU::V_ASHRREV_I16_gfx10:
4337
4338 case AMDGPU::V_LSHLREV_B64_e64:
4339 case AMDGPU::V_LSHLREV_B64_gfx10:
4340 case AMDGPU::V_LSHLREV_B64_vi:
4341
4342 case AMDGPU::V_LSHRREV_B64_e64:
4343 case AMDGPU::V_LSHRREV_B64_gfx10:
4344 case AMDGPU::V_LSHRREV_B64_vi:
4345
4346 case AMDGPU::V_ASHRREV_I64_e64:
4347 case AMDGPU::V_ASHRREV_I64_gfx10:
4348 case AMDGPU::V_ASHRREV_I64_vi:
4349
4350 case AMDGPU::V_PK_LSHLREV_B16:
4351 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4352 case AMDGPU::V_PK_LSHLREV_B16_vi:
4353
4354 case AMDGPU::V_PK_LSHRREV_B16:
4355 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4356 case AMDGPU::V_PK_LSHRREV_B16_vi:
4357 case AMDGPU::V_PK_ASHRREV_I16:
4358 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4359 case AMDGPU::V_PK_ASHRREV_I16_vi:
4360 return true;
4361 default:
4362 return false;
4363 }
4364}
4365
4366std::optional<StringRef>
4367AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4368
4369 using namespace SIInstrFlags;
4370 const unsigned Opcode = Inst.getOpcode();
4371 const MCInstrDesc &Desc = MII.get(Opcode);
4372
4373 // lds_direct register is defined so that it can be used
4374 // with 9-bit operands only. Ignore encodings which do not accept these.
4375 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4376 if ((Desc.TSFlags & Enc) == 0)
4377 return std::nullopt;
4378
4379 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4380 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4381 if (SrcIdx == -1)
4382 break;
4383 const auto &Src = Inst.getOperand(SrcIdx);
4384 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4385
4386 if (isGFX90A() || isGFX11Plus())
4387 return StringRef("lds_direct is not supported on this GPU");
4388
4389 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4390 return StringRef("lds_direct cannot be used with this instruction");
4391
4392 if (SrcName != OpName::src0)
4393 return StringRef("lds_direct may be used as src0 only");
4394 }
4395 }
4396
4397 return std::nullopt;
4398}
4399
4400SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4401 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4402 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4403 if (Op.isFlatOffset())
4404 return Op.getStartLoc();
4405 }
4406 return getLoc();
4407}
4408
4409bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4410 const OperandVector &Operands) {
4411 auto Opcode = Inst.getOpcode();
4412 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4413 if (OpNum == -1)
4414 return true;
4415
4416 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4417 if ((TSFlags & SIInstrFlags::FLAT))
4418 return validateFlatOffset(Inst, Operands);
4419
4420 if ((TSFlags & SIInstrFlags::SMRD))
4421 return validateSMEMOffset(Inst, Operands);
4422
4423 const auto &Op = Inst.getOperand(OpNum);
4424 if (isGFX12Plus() &&
4425 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4426 const unsigned OffsetSize = 24;
4427 if (!isIntN(OffsetSize, Op.getImm())) {
4428 Error(getFlatOffsetLoc(Operands),
4429 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4430 return false;
4431 }
4432 } else {
4433 const unsigned OffsetSize = 16;
4434 if (!isUIntN(OffsetSize, Op.getImm())) {
4435 Error(getFlatOffsetLoc(Operands),
4436 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4437 return false;
4438 }
4439 }
4440 return true;
4441}
4442
4443bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4444 const OperandVector &Operands) {
4445 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4446 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4447 return true;
4448
4449 auto Opcode = Inst.getOpcode();
4450 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4451 assert(OpNum != -1);
4452
4453 const auto &Op = Inst.getOperand(OpNum);
4454 if (!hasFlatOffsets() && Op.getImm() != 0) {
4455 Error(getFlatOffsetLoc(Operands),
4456 "flat offset modifier is not supported on this GPU");
4457 return false;
4458 }
4459
4460 // For pre-GFX12 FLAT instructions the offset must be positive;
4461 // MSB is ignored and forced to zero.
4462 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4463 bool AllowNegative =
4465 isGFX12Plus();
4466 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4467 Error(getFlatOffsetLoc(Operands),
4468 Twine("expected a ") +
4469 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4470 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4471 return false;
4472 }
4473
4474 return true;
4475}
4476
4477SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4478 // Start with second operand because SMEM Offset cannot be dst or src0.
4479 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4480 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4481 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4482 return Op.getStartLoc();
4483 }
4484 return getLoc();
4485}
4486
4487bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4488 const OperandVector &Operands) {
4489 if (isCI() || isSI())
4490 return true;
4491
4492 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4493 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4494 return true;
4495
4496 auto Opcode = Inst.getOpcode();
4497 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4498 if (OpNum == -1)
4499 return true;
4500
4501 const auto &Op = Inst.getOperand(OpNum);
4502 if (!Op.isImm())
4503 return true;
4504
4505 uint64_t Offset = Op.getImm();
4506 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4509 return true;
4510
4511 Error(getSMEMOffsetLoc(Operands),
4512 isGFX12Plus() ? "expected a 24-bit signed offset"
4513 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4514 : "expected a 21-bit signed offset");
4515
4516 return false;
4517}
4518
4519bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4520 unsigned Opcode = Inst.getOpcode();
4521 const MCInstrDesc &Desc = MII.get(Opcode);
4522 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4523 return true;
4524
4525 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4526 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4527
4528 const int OpIndices[] = { Src0Idx, Src1Idx };
4529
4530 unsigned NumExprs = 0;
4531 unsigned NumLiterals = 0;
4533
4534 for (int OpIdx : OpIndices) {
4535 if (OpIdx == -1) break;
4536
4537 const MCOperand &MO = Inst.getOperand(OpIdx);
4538 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4539 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4540 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4541 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4542 if (NumLiterals == 0 || LiteralValue != Value) {
4544 ++NumLiterals;
4545 }
4546 } else if (MO.isExpr()) {
4547 ++NumExprs;
4548 }
4549 }
4550 }
4551
4552 return NumLiterals + NumExprs <= 1;
4553}
4554
4555bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4556 const unsigned Opc = Inst.getOpcode();
4557 if (isPermlane16(Opc)) {
4558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4559 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4560
4561 if (OpSel & ~3)
4562 return false;
4563 }
4564
4565 uint64_t TSFlags = MII.get(Opc).TSFlags;
4566
4567 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4568 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4569 if (OpSelIdx != -1) {
4570 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4571 return false;
4572 }
4573 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4574 if (OpSelHiIdx != -1) {
4575 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4576 return false;
4577 }
4578 }
4579
4580 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4581 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4582 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4583 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4584 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4585 if (OpSel & 3)
4586 return false;
4587 }
4588
4589 return true;
4590}
4591
4592bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4593 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4594
4595 const unsigned Opc = Inst.getOpcode();
4596 uint64_t TSFlags = MII.get(Opc).TSFlags;
4597
4598 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4599 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4600 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4601 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4602 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4603 !(TSFlags & SIInstrFlags::IsSWMMAC))
4604 return true;
4605
4606 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4607 if (NegIdx == -1)
4608 return true;
4609
4610 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4611
4612 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4613 // on some src operands but not allowed on other.
4614 // It is convenient that such instructions don't have src_modifiers operand
4615 // for src operands that don't allow neg because they also don't allow opsel.
4616
4617 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4618 AMDGPU::OpName::src1_modifiers,
4619 AMDGPU::OpName::src2_modifiers};
4620
4621 for (unsigned i = 0; i < 3; ++i) {
4622 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4623 if (Neg & (1 << i))
4624 return false;
4625 }
4626 }
4627
4628 return true;
4629}
4630
4631bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4632 const OperandVector &Operands) {
4633 const unsigned Opc = Inst.getOpcode();
4634 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4635 if (DppCtrlIdx >= 0) {
4636 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4637
4638 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4639 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4640 // DP ALU DPP is supported for row_newbcast only on GFX9*
4641 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4642 Error(S, "DP ALU dpp only supports row_newbcast");
4643 return false;
4644 }
4645 }
4646
4647 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4648 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4649
4650 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4651 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4652 if (Src1Idx >= 0) {
4653 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4654 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4655 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4656 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4657 SMLoc S = getRegLoc(Reg, Operands);
4658 Error(S, "invalid operand for instruction");
4659 return false;
4660 }
4661 if (Src1.isImm()) {
4662 Error(getInstLoc(Operands),
4663 "src1 immediate operand invalid for instruction");
4664 return false;
4665 }
4666 }
4667 }
4668
4669 return true;
4670}
4671
4672// Check if VCC register matches wavefront size
4673bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4674 auto FB = getFeatureBits();
4675 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4676 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4677}
4678
4679// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4680bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4681 const OperandVector &Operands) {
4682 unsigned Opcode = Inst.getOpcode();
4683 const MCInstrDesc &Desc = MII.get(Opcode);
4684 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4685 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4686 !HasMandatoryLiteral && !isVOPD(Opcode))
4687 return true;
4688
4689 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4690
4691 unsigned NumExprs = 0;
4692 unsigned NumLiterals = 0;
4694
4695 for (int OpIdx : OpIndices) {
4696 if (OpIdx == -1)
4697 continue;
4698
4699 const MCOperand &MO = Inst.getOperand(OpIdx);
4700 if (!MO.isImm() && !MO.isExpr())
4701 continue;
4702 if (!isSISrcOperand(Desc, OpIdx))
4703 continue;
4704
4705 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4706 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4707 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4708 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4709 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4710
4711 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4712 Error(getLitLoc(Operands), "invalid operand for instruction");
4713 return false;
4714 }
4715
4716 if (IsFP64 && IsValid32Op)
4717 Value = Hi_32(Value);
4718
4719 if (NumLiterals == 0 || LiteralValue != Value) {
4721 ++NumLiterals;
4722 }
4723 } else if (MO.isExpr()) {
4724 ++NumExprs;
4725 }
4726 }
4727 NumLiterals += NumExprs;
4728
4729 if (!NumLiterals)
4730 return true;
4731
4732 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4733 Error(getLitLoc(Operands), "literal operands are not supported");
4734 return false;
4735 }
4736
4737 if (NumLiterals > 1) {
4738 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4739 return false;
4740 }
4741
4742 return true;
4743}
4744
4745// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4746static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4747 const MCRegisterInfo *MRI) {
4748 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4749 if (OpIdx < 0)
4750 return -1;
4751
4752 const MCOperand &Op = Inst.getOperand(OpIdx);
4753 if (!Op.isReg())
4754 return -1;
4755
4756 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4757 auto Reg = Sub ? Sub : Op.getReg();
4758 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4759 return AGPR32.contains(Reg) ? 1 : 0;
4760}
4761
4762bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4763 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4764 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4766 SIInstrFlags::DS)) == 0)
4767 return true;
4768
4769 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4770 : AMDGPU::OpName::vdata;
4771
4772 const MCRegisterInfo *MRI = getMRI();
4773 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4774 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4775
4776 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4777 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4778 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4779 return false;
4780 }
4781
4782 auto FB = getFeatureBits();
4783 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4784 if (DataAreg < 0 || DstAreg < 0)
4785 return true;
4786 return DstAreg == DataAreg;
4787 }
4788
4789 return DstAreg < 1 && DataAreg < 1;
4790}
4791
4792bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4793 auto FB = getFeatureBits();
4794 if (!FB[AMDGPU::FeatureGFX90AInsts])
4795 return true;
4796
4797 const MCRegisterInfo *MRI = getMRI();
4798 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4799 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4800 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4801 const MCOperand &Op = Inst.getOperand(I);
4802 if (!Op.isReg())
4803 continue;
4804
4805 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4806 if (!Sub)
4807 continue;
4808
4809 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4810 return false;
4811 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4812 return false;
4813 }
4814
4815 return true;
4816}
4817
4818SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4819 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4820 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4821 if (Op.isBLGP())
4822 return Op.getStartLoc();
4823 }
4824 return SMLoc();
4825}
4826
4827bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4828 const OperandVector &Operands) {
4829 unsigned Opc = Inst.getOpcode();
4830 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4831 if (BlgpIdx == -1)
4832 return true;
4833 SMLoc BLGPLoc = getBLGPLoc(Operands);
4834 if (!BLGPLoc.isValid())
4835 return true;
4836 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4837 auto FB = getFeatureBits();
4838 bool UsesNeg = false;
4839 if (FB[AMDGPU::FeatureGFX940Insts]) {
4840 switch (Opc) {
4841 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4842 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4843 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4844 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4845 UsesNeg = true;
4846 }
4847 }
4848
4849 if (IsNeg == UsesNeg)
4850 return true;
4851
4852 Error(BLGPLoc,
4853 UsesNeg ? "invalid modifier: blgp is not supported"
4854 : "invalid modifier: neg is not supported");
4855
4856 return false;
4857}
4858
4859bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4860 const OperandVector &Operands) {
4861 if (!isGFX11Plus())
4862 return true;
4863
4864 unsigned Opc = Inst.getOpcode();
4865 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4866 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4867 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4868 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4869 return true;
4870
4871 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4872 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4873 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4874 if (Reg == AMDGPU::SGPR_NULL)
4875 return true;
4876
4877 SMLoc RegLoc = getRegLoc(Reg, Operands);
4878 Error(RegLoc, "src0 must be null");
4879 return false;
4880}
4881
4882bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4883 const OperandVector &Operands) {
4884 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4885 if ((TSFlags & SIInstrFlags::DS) == 0)
4886 return true;
4887 if (TSFlags & SIInstrFlags::GWS)
4888 return validateGWS(Inst, Operands);
4889 // Only validate GDS for non-GWS instructions.
4890 if (hasGDS())
4891 return true;
4892 int GDSIdx =
4893 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4894 if (GDSIdx < 0)
4895 return true;
4896 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4897 if (GDS) {
4898 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4899 Error(S, "gds modifier is not supported on this GPU");
4900 return false;
4901 }
4902 return true;
4903}
4904
4905// gfx90a has an undocumented limitation:
4906// DS_GWS opcodes must use even aligned registers.
4907bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4908 const OperandVector &Operands) {
4909 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4910 return true;
4911
4912 int Opc = Inst.getOpcode();
4913 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4914 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4915 return true;
4916
4917 const MCRegisterInfo *MRI = getMRI();
4918 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4919 int Data0Pos =
4920 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4921 assert(Data0Pos != -1);
4922 auto Reg = Inst.getOperand(Data0Pos).getReg();
4923 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4924 if (RegIdx & 1) {
4925 SMLoc RegLoc = getRegLoc(Reg, Operands);
4926 Error(RegLoc, "vgpr must be even aligned");
4927 return false;
4928 }
4929
4930 return true;
4931}
4932
4933bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4934 const OperandVector &Operands,
4935 const SMLoc &IDLoc) {
4936 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4937 AMDGPU::OpName::cpol);
4938 if (CPolPos == -1)
4939 return true;
4940
4941 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4942
4943 if (isGFX12Plus())
4944 return validateTHAndScopeBits(Inst, Operands, CPol);
4945
4946 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4947 if (TSFlags & SIInstrFlags::SMRD) {
4948 if (CPol && (isSI() || isCI())) {
4949 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4950 Error(S, "cache policy is not supported for SMRD instructions");
4951 return false;
4952 }
4953 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4954 Error(IDLoc, "invalid cache policy for SMEM instruction");
4955 return false;
4956 }
4957 }
4958
4959 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4960 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4963 if (!(TSFlags & AllowSCCModifier)) {
4964 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4965 StringRef CStr(S.getPointer());
4966 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4967 Error(S,
4968 "scc modifier is not supported for this instruction on this GPU");
4969 return false;
4970 }
4971 }
4972
4974 return true;
4975
4976 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4977 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4978 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4979 : "instruction must use glc");
4980 return false;
4981 }
4982 } else {
4983 if (CPol & CPol::GLC) {
4984 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4985 StringRef CStr(S.getPointer());
4987 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4988 Error(S, isGFX940() ? "instruction must not use sc0"
4989 : "instruction must not use glc");
4990 return false;
4991 }
4992 }
4993
4994 return true;
4995}
4996
4997bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4998 const OperandVector &Operands,
4999 const unsigned CPol) {
5000 const unsigned TH = CPol & AMDGPU::CPol::TH;
5001 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5002
5003 const unsigned Opcode = Inst.getOpcode();
5004 const MCInstrDesc &TID = MII.get(Opcode);
5005
5006 auto PrintError = [&](StringRef Msg) {
5007 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5008 Error(S, Msg);
5009 return false;
5010 };
5011
5012 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5015 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5016
5017 if (TH == 0)
5018 return true;
5019
5020 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5021 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5022 (TH == AMDGPU::CPol::TH_NT_HT)))
5023 return PrintError("invalid th value for SMEM instruction");
5024
5025 if (TH == AMDGPU::CPol::TH_BYPASS) {
5026 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5028 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5030 return PrintError("scope and th combination is not valid");
5031 }
5032
5033 bool IsStore = TID.mayStore();
5034 bool IsAtomic =
5036
5037 if (IsAtomic) {
5038 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5039 return PrintError("invalid th value for atomic instructions");
5040 } else if (IsStore) {
5041 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5042 return PrintError("invalid th value for store instructions");
5043 } else {
5044 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5045 return PrintError("invalid th value for load instructions");
5046 }
5047
5048 return true;
5049}
5050
5051bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5052 const OperandVector &Operands) {
5053 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5054 if (Desc.mayStore() &&
5056 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5057 if (Loc != getInstLoc(Operands)) {
5058 Error(Loc, "TFE modifier has no meaning for store instructions");
5059 return false;
5060 }
5061 }
5062
5063 return true;
5064}
5065
5066bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5067 const SMLoc &IDLoc,
5068 const OperandVector &Operands) {
5069 if (auto ErrMsg = validateLdsDirect(Inst)) {
5070 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5071 return false;
5072 }
5073 if (!validateSOPLiteral(Inst)) {
5074 Error(getLitLoc(Operands),
5075 "only one unique literal operand is allowed");
5076 return false;
5077 }
5078 if (!validateVOPLiteral(Inst, Operands)) {
5079 return false;
5080 }
5081 if (!validateConstantBusLimitations(Inst, Operands)) {
5082 return false;
5083 }
5084 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5085 return false;
5086 }
5087 if (!validateIntClampSupported(Inst)) {
5088 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5089 "integer clamping is not supported on this GPU");
5090 return false;
5091 }
5092 if (!validateOpSel(Inst)) {
5093 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5094 "invalid op_sel operand");
5095 return false;
5096 }
5097 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5098 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5099 "invalid neg_lo operand");
5100 return false;
5101 }
5102 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5103 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5104 "invalid neg_hi operand");
5105 return false;
5106 }
5107 if (!validateDPP(Inst, Operands)) {
5108 return false;
5109 }
5110 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5111 if (!validateMIMGD16(Inst)) {
5112 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5113 "d16 modifier is not supported on this GPU");
5114 return false;
5115 }
5116 if (!validateMIMGDim(Inst, Operands)) {
5117 Error(IDLoc, "missing dim operand");
5118 return false;
5119 }
5120 if (!validateMIMGMSAA(Inst)) {
5121 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5122 "invalid dim; must be MSAA type");
5123 return false;
5124 }
5125 if (!validateMIMGDataSize(Inst, IDLoc)) {
5126 return false;
5127 }
5128 if (!validateMIMGAddrSize(Inst, IDLoc))
5129 return false;
5130 if (!validateMIMGAtomicDMask(Inst)) {
5131 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5132 "invalid atomic image dmask");
5133 return false;
5134 }
5135 if (!validateMIMGGatherDMask(Inst)) {
5136 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5137 "invalid image_gather dmask: only one bit must be set");
5138 return false;
5139 }
5140 if (!validateMovrels(Inst, Operands)) {
5141 return false;
5142 }
5143 if (!validateOffset(Inst, Operands)) {
5144 return false;
5145 }
5146 if (!validateMAIAccWrite(Inst, Operands)) {
5147 return false;
5148 }
5149 if (!validateMAISrc2(Inst, Operands)) {
5150 return false;
5151 }
5152 if (!validateMFMA(Inst, Operands)) {
5153 return false;
5154 }
5155 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5156 return false;
5157 }
5158
5159 if (!validateAGPRLdSt(Inst)) {
5160 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5161 ? "invalid register class: data and dst should be all VGPR or AGPR"
5162 : "invalid register class: agpr loads and stores not supported on this GPU"
5163 );
5164 return false;
5165 }
5166 if (!validateVGPRAlign(Inst)) {
5167 Error(IDLoc,
5168 "invalid register class: vgpr tuples must be 64 bit aligned");
5169 return false;
5170 }
5171 if (!validateDS(Inst, Operands)) {
5172 return false;
5173 }
5174
5175 if (!validateBLGP(Inst, Operands)) {
5176 return false;
5177 }
5178
5179 if (!validateDivScale(Inst)) {
5180 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5181 return false;
5182 }
5183 if (!validateWaitCnt(Inst, Operands)) {
5184 return false;
5185 }
5186 if (!validateTFE(Inst, Operands)) {
5187 return false;
5188 }
5189
5190 return true;
5191}
5192
5194 const FeatureBitset &FBS,
5195 unsigned VariantID = 0);
5196
5197static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5198 const FeatureBitset &AvailableFeatures,
5199 unsigned VariantID);
5200
5201bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5202 const FeatureBitset &FBS) {
5203 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5204}
5205
5206bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5207 const FeatureBitset &FBS,
5208 ArrayRef<unsigned> Variants) {
5209 for (auto Variant : Variants) {
5210 if (