LLVM 18.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
12#include "SIDefines.h"
13#include "SIInstrInfo.h"
14#include "SIRegisterInfo.h"
19#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Twine.h"
25#include "llvm/MC/MCAsmInfo.h"
26#include "llvm/MC/MCContext.h"
27#include "llvm/MC/MCExpr.h"
28#include "llvm/MC/MCInst.h"
29#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/MC/MCSymbol.h"
41#include <optional>
42
43using namespace llvm;
44using namespace llvm::AMDGPU;
45using namespace llvm::amdhsa;
46
47namespace {
48
49class AMDGPUAsmParser;
50
51enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52
53//===----------------------------------------------------------------------===//
54// Operand
55//===----------------------------------------------------------------------===//
56
57class AMDGPUOperand : public MCParsedAsmOperand {
58 enum KindTy {
59 Token,
60 Immediate,
63 } Kind;
64
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
67
68public:
69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70 : Kind(Kind_), AsmParser(AsmParser_) {}
71
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
73
74 struct Modifiers {
75 bool Abs = false;
76 bool Neg = false;
77 bool Sext = false;
78
79 bool hasFPModifiers() const { return Abs || Neg; }
80 bool hasIntModifiers() const { return Sext; }
81 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
82
83 int64_t getFPModifiersOperand() const {
84 int64_t Operand = 0;
85 Operand |= Abs ? SISrcMods::ABS : 0u;
86 Operand |= Neg ? SISrcMods::NEG : 0u;
87 return Operand;
88 }
89
90 int64_t getIntModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Sext ? SISrcMods::SEXT : 0u;
93 return Operand;
94 }
95
96 int64_t getModifiersOperand() const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 && "fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 } else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
103 } else {
104 return 0;
105 }
106 }
107
108 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
109 };
110
111 enum ImmTy {
112 ImmTyNone,
113 ImmTyGDS,
114 ImmTyLDS,
115 ImmTyOffen,
116 ImmTyIdxen,
117 ImmTyAddr64,
118 ImmTyOffset,
119 ImmTyInstOffset,
120 ImmTyOffset0,
121 ImmTyOffset1,
122 ImmTySMEMOffsetMod,
123 ImmTyCPol,
124 ImmTyTFE,
125 ImmTyD16,
126 ImmTyClampSI,
127 ImmTyOModSI,
128 ImmTySDWADstSel,
129 ImmTySDWASrc0Sel,
130 ImmTySDWASrc1Sel,
131 ImmTySDWADstUnused,
132 ImmTyDMask,
133 ImmTyDim,
134 ImmTyUNorm,
135 ImmTyDA,
136 ImmTyR128A16,
137 ImmTyA16,
138 ImmTyLWE,
139 ImmTyExpTgt,
140 ImmTyExpCompr,
141 ImmTyExpVM,
142 ImmTyFORMAT,
143 ImmTyHwreg,
144 ImmTyOff,
145 ImmTySendMsg,
146 ImmTyInterpSlot,
147 ImmTyInterpAttr,
148 ImmTyInterpAttrChan,
149 ImmTyOpSel,
150 ImmTyOpSelHi,
151 ImmTyNegLo,
152 ImmTyNegHi,
153 ImmTyDPP8,
154 ImmTyDppCtrl,
155 ImmTyDppRowMask,
156 ImmTyDppBankMask,
157 ImmTyDppBoundCtrl,
158 ImmTyDppFI,
159 ImmTySwizzle,
160 ImmTyGprIdxMode,
161 ImmTyHigh,
162 ImmTyBLGP,
163 ImmTyCBSZ,
164 ImmTyABID,
165 ImmTyEndpgm,
166 ImmTyWaitVDST,
167 ImmTyWaitEXP,
168 };
169
170 // Immediate operand kind.
171 // It helps to identify the location of an offending operand after an error.
172 // Note that regular literals and mandatory literals (KImm) must be handled
173 // differently. When looking for an offending operand, we should usually
174 // ignore mandatory literals because they are part of the instruction and
175 // cannot be changed. Report location of mandatory operands only for VOPD,
176 // when both OpX and OpY have a KImm and there are no other literals.
177 enum ImmKindTy {
178 ImmKindTyNone,
179 ImmKindTyLiteral,
180 ImmKindTyMandatoryLiteral,
181 ImmKindTyConst,
182 };
183
184private:
185 struct TokOp {
186 const char *Data;
187 unsigned Length;
188 };
189
190 struct ImmOp {
191 int64_t Val;
192 ImmTy Type;
193 bool IsFPImm;
194 mutable ImmKindTy Kind;
195 Modifiers Mods;
196 };
197
198 struct RegOp {
199 unsigned RegNo;
200 Modifiers Mods;
201 };
202
203 union {
204 TokOp Tok;
205 ImmOp Imm;
206 RegOp Reg;
207 const MCExpr *Expr;
208 };
209
210public:
211 bool isToken() const override { return Kind == Token; }
212
213 bool isSymbolRefExpr() const {
214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215 }
216
217 bool isImm() const override {
218 return Kind == Immediate;
219 }
220
221 void setImmKindNone() const {
222 assert(isImm());
223 Imm.Kind = ImmKindTyNone;
224 }
225
226 void setImmKindLiteral() const {
227 assert(isImm());
228 Imm.Kind = ImmKindTyLiteral;
229 }
230
231 void setImmKindMandatoryLiteral() const {
232 assert(isImm());
233 Imm.Kind = ImmKindTyMandatoryLiteral;
234 }
235
236 void setImmKindConst() const {
237 assert(isImm());
238 Imm.Kind = ImmKindTyConst;
239 }
240
241 bool IsImmKindLiteral() const {
242 return isImm() && Imm.Kind == ImmKindTyLiteral;
243 }
244
245 bool IsImmKindMandatoryLiteral() const {
246 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
247 }
248
249 bool isImmKindConst() const {
250 return isImm() && Imm.Kind == ImmKindTyConst;
251 }
252
253 bool isInlinableImm(MVT type) const;
254 bool isLiteralImm(MVT type) const;
255
256 bool isRegKind() const {
257 return Kind == Register;
258 }
259
260 bool isReg() const override {
261 return isRegKind() && !hasModifiers();
262 }
263
264 bool isRegOrInline(unsigned RCID, MVT type) const {
265 return isRegClass(RCID) || isInlinableImm(type);
266 }
267
268 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
269 return isRegOrInline(RCID, type) || isLiteralImm(type);
270 }
271
272 bool isRegOrImmWithInt16InputMods() const {
273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
274 }
275
276 bool isRegOrImmWithIntT16InputMods() const {
277 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
278 }
279
280 bool isRegOrImmWithInt32InputMods() const {
281 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
282 }
283
284 bool isRegOrInlineImmWithInt16InputMods() const {
285 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
286 }
287
288 bool isRegOrInlineImmWithInt32InputMods() const {
289 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
290 }
291
292 bool isRegOrImmWithInt64InputMods() const {
293 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
294 }
295
296 bool isRegOrImmWithFP16InputMods() const {
297 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
298 }
299
300 bool isRegOrImmWithFPT16InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
302 }
303
304 bool isRegOrImmWithFP32InputMods() const {
305 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
306 }
307
308 bool isRegOrImmWithFP64InputMods() const {
309 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
310 }
311
312 bool isRegOrInlineImmWithFP16InputMods() const {
313 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
314 }
315
316 bool isRegOrInlineImmWithFP32InputMods() const {
317 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
318 }
319
320
321 bool isVReg() const {
322 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
323 isRegClass(AMDGPU::VReg_64RegClassID) ||
324 isRegClass(AMDGPU::VReg_96RegClassID) ||
325 isRegClass(AMDGPU::VReg_128RegClassID) ||
326 isRegClass(AMDGPU::VReg_160RegClassID) ||
327 isRegClass(AMDGPU::VReg_192RegClassID) ||
328 isRegClass(AMDGPU::VReg_256RegClassID) ||
329 isRegClass(AMDGPU::VReg_512RegClassID) ||
330 isRegClass(AMDGPU::VReg_1024RegClassID);
331 }
332
333 bool isVReg32() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID);
335 }
336
337 bool isVReg32OrOff() const {
338 return isOff() || isVReg32();
339 }
340
341 bool isNull() const {
342 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
343 }
344
345 bool isVRegWithInputMods() const;
346 bool isT16VRegWithInputMods() const;
347
348 bool isSDWAOperand(MVT type) const;
349 bool isSDWAFP16Operand() const;
350 bool isSDWAFP32Operand() const;
351 bool isSDWAInt16Operand() const;
352 bool isSDWAInt32Operand() const;
353
354 bool isImmTy(ImmTy ImmT) const {
355 return isImm() && Imm.Type == ImmT;
356 }
357
358 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
359
360 bool isImmModifier() const {
361 return isImm() && Imm.Type != ImmTyNone;
362 }
363
364 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
365 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
366 bool isDMask() const { return isImmTy(ImmTyDMask); }
367 bool isDim() const { return isImmTy(ImmTyDim); }
368 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
369 bool isDA() const { return isImmTy(ImmTyDA); }
370 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
371 bool isA16() const { return isImmTy(ImmTyA16); }
372 bool isLWE() const { return isImmTy(ImmTyLWE); }
373 bool isOff() const { return isImmTy(ImmTyOff); }
374 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
375 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
376 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
377 bool isOffen() const { return isImmTy(ImmTyOffen); }
378 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
379 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
380 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
381 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
382 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
383 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
384 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
385 bool isGDS() const { return isImmTy(ImmTyGDS); }
386 bool isLDS() const { return isImmTy(ImmTyLDS); }
387 bool isCPol() const { return isImmTy(ImmTyCPol); }
388 bool isTFE() const { return isImmTy(ImmTyTFE); }
389 bool isD16() const { return isImmTy(ImmTyD16); }
390 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
391 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
392 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
393 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
394 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
395 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
396 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
397 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
398 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
399 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
400 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
401 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
402 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
403 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
404 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
405 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
406 bool isHigh() const { return isImmTy(ImmTyHigh); }
407
408 bool isRegOrImm() const {
409 return isReg() || isImm();
410 }
411
412 bool isRegClass(unsigned RCID) const;
413
414 bool isInlineValue() const;
415
416 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
417 return isRegOrInline(RCID, type) && !hasModifiers();
418 }
419
420 bool isSCSrcB16() const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
422 }
423
424 bool isSCSrcV2B16() const {
425 return isSCSrcB16();
426 }
427
428 bool isSCSrcB32() const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
430 }
431
432 bool isSCSrcB64() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
434 }
435
436 bool isBoolReg() const;
437
438 bool isSCSrcF16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
440 }
441
442 bool isSCSrcV2F16() const {
443 return isSCSrcF16();
444 }
445
446 bool isSCSrcF32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
448 }
449
450 bool isSCSrcF64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
452 }
453
454 bool isSSrcB32() const {
455 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
456 }
457
458 bool isSSrcB16() const {
459 return isSCSrcB16() || isLiteralImm(MVT::i16);
460 }
461
462 bool isSSrcV2B16() const {
463 llvm_unreachable("cannot happen");
464 return isSSrcB16();
465 }
466
467 bool isSSrcB64() const {
468 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
469 // See isVSrc64().
470 return isSCSrcB64() || isLiteralImm(MVT::i64);
471 }
472
473 bool isSSrcF32() const {
474 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
475 }
476
477 bool isSSrcF64() const {
478 return isSCSrcB64() || isLiteralImm(MVT::f64);
479 }
480
481 bool isSSrcF16() const {
482 return isSCSrcB16() || isLiteralImm(MVT::f16);
483 }
484
485 bool isSSrcV2F16() const {
486 llvm_unreachable("cannot happen");
487 return isSSrcF16();
488 }
489
490 bool isSSrcV2FP32() const {
491 llvm_unreachable("cannot happen");
492 return isSSrcF32();
493 }
494
495 bool isSCSrcV2FP32() const {
496 llvm_unreachable("cannot happen");
497 return isSCSrcF32();
498 }
499
500 bool isSSrcV2INT32() const {
501 llvm_unreachable("cannot happen");
502 return isSSrcB32();
503 }
504
505 bool isSCSrcV2INT32() const {
506 llvm_unreachable("cannot happen");
507 return isSCSrcB32();
508 }
509
510 bool isSSrcOrLdsB32() const {
511 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
512 isLiteralImm(MVT::i32) || isExpr();
513 }
514
515 bool isVCSrcB32() const {
516 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
517 }
518
519 bool isVCSrcB64() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
521 }
522
523 bool isVCSrcTB16() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
525 }
526
527 bool isVCSrcTB16_Lo128() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
529 }
530
531 bool isVCSrcFake16B16_Lo128() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
533 }
534
535 bool isVCSrcB16() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
537 }
538
539 bool isVCSrcV2B16() const {
540 return isVCSrcB16();
541 }
542
543 bool isVCSrcF32() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
545 }
546
547 bool isVCSrcF64() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
549 }
550
551 bool isVCSrcTF16() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
553 }
554
555 bool isVCSrcTF16_Lo128() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
557 }
558
559 bool isVCSrcFake16F16_Lo128() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
561 }
562
563 bool isVCSrcF16() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
565 }
566
567 bool isVCSrcV2F16() const {
568 return isVCSrcF16();
569 }
570
571 bool isVSrcB32() const {
572 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
573 }
574
575 bool isVSrcB64() const {
576 return isVCSrcF64() || isLiteralImm(MVT::i64);
577 }
578
579 bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
580
581 bool isVSrcTB16_Lo128() const {
582 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
583 }
584
585 bool isVSrcFake16B16_Lo128() const {
586 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
587 }
588
589 bool isVSrcB16() const {
590 return isVCSrcB16() || isLiteralImm(MVT::i16);
591 }
592
593 bool isVSrcV2B16() const {
594 return isVSrcB16() || isLiteralImm(MVT::v2i16);
595 }
596
597 bool isVCSrcV2FP32() const {
598 return isVCSrcF64();
599 }
600
601 bool isVSrcV2FP32() const {
602 return isVSrcF64() || isLiteralImm(MVT::v2f32);
603 }
604
605 bool isVCSrcV2INT32() const {
606 return isVCSrcB64();
607 }
608
609 bool isVSrcV2INT32() const {
610 return isVSrcB64() || isLiteralImm(MVT::v2i32);
611 }
612
613 bool isVSrcF32() const {
614 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
615 }
616
617 bool isVSrcF64() const {
618 return isVCSrcF64() || isLiteralImm(MVT::f64);
619 }
620
621 bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
622
623 bool isVSrcTF16_Lo128() const {
624 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
625 }
626
627 bool isVSrcFake16F16_Lo128() const {
628 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
629 }
630
631 bool isVSrcF16() const {
632 return isVCSrcF16() || isLiteralImm(MVT::f16);
633 }
634
635 bool isVSrcV2F16() const {
636 return isVSrcF16() || isLiteralImm(MVT::v2f16);
637 }
638
639 bool isVISrcB32() const {
640 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
641 }
642
643 bool isVISrcB16() const {
644 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
645 }
646
647 bool isVISrcV2B16() const {
648 return isVISrcB16();
649 }
650
651 bool isVISrcF32() const {
652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
653 }
654
655 bool isVISrcF16() const {
656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
657 }
658
659 bool isVISrcV2F16() const {
660 return isVISrcF16() || isVISrcB32();
661 }
662
663 bool isVISrc_64B64() const {
664 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
665 }
666
667 bool isVISrc_64F64() const {
668 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
669 }
670
671 bool isVISrc_64V2FP32() const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
673 }
674
675 bool isVISrc_64V2INT32() const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
677 }
678
679 bool isVISrc_256B64() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
681 }
682
683 bool isVISrc_256F64() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
685 }
686
687 bool isVISrc_128B16() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
689 }
690
691 bool isVISrc_128V2B16() const {
692 return isVISrc_128B16();
693 }
694
695 bool isVISrc_128B32() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
697 }
698
699 bool isVISrc_128F32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
701 }
702
703 bool isVISrc_256V2FP32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
705 }
706
707 bool isVISrc_256V2INT32() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
709 }
710
711 bool isVISrc_512B32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
713 }
714
715 bool isVISrc_512B16() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
717 }
718
719 bool isVISrc_512V2B16() const {
720 return isVISrc_512B16();
721 }
722
723 bool isVISrc_512F32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
725 }
726
727 bool isVISrc_512F16() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
729 }
730
731 bool isVISrc_512V2F16() const {
732 return isVISrc_512F16() || isVISrc_512B32();
733 }
734
735 bool isVISrc_1024B32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
737 }
738
739 bool isVISrc_1024B16() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
741 }
742
743 bool isVISrc_1024V2B16() const {
744 return isVISrc_1024B16();
745 }
746
747 bool isVISrc_1024F32() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
749 }
750
751 bool isVISrc_1024F16() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
753 }
754
755 bool isVISrc_1024V2F16() const {
756 return isVISrc_1024F16() || isVISrc_1024B32();
757 }
758
759 bool isAISrcB32() const {
760 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
761 }
762
763 bool isAISrcB16() const {
764 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
765 }
766
767 bool isAISrcV2B16() const {
768 return isAISrcB16();
769 }
770
771 bool isAISrcF32() const {
772 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
773 }
774
775 bool isAISrcF16() const {
776 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
777 }
778
779 bool isAISrcV2F16() const {
780 return isAISrcF16() || isAISrcB32();
781 }
782
783 bool isAISrc_64B64() const {
784 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
785 }
786
787 bool isAISrc_64F64() const {
788 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
789 }
790
791 bool isAISrc_128B32() const {
792 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
793 }
794
795 bool isAISrc_128B16() const {
796 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
797 }
798
799 bool isAISrc_128V2B16() const {
800 return isAISrc_128B16();
801 }
802
803 bool isAISrc_128F32() const {
804 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
805 }
806
807 bool isAISrc_128F16() const {
808 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
809 }
810
811 bool isAISrc_128V2F16() const {
812 return isAISrc_128F16() || isAISrc_128B32();
813 }
814
815 bool isVISrc_128F16() const {
816 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
817 }
818
819 bool isVISrc_128V2F16() const {
820 return isVISrc_128F16() || isVISrc_128B32();
821 }
822
823 bool isAISrc_256B64() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
825 }
826
827 bool isAISrc_256F64() const {
828 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
829 }
830
831 bool isAISrc_512B32() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
833 }
834
835 bool isAISrc_512B16() const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
837 }
838
839 bool isAISrc_512V2B16() const {
840 return isAISrc_512B16();
841 }
842
843 bool isAISrc_512F32() const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
845 }
846
847 bool isAISrc_512F16() const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
849 }
850
851 bool isAISrc_512V2F16() const {
852 return isAISrc_512F16() || isAISrc_512B32();
853 }
854
855 bool isAISrc_1024B32() const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
857 }
858
859 bool isAISrc_1024B16() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
861 }
862
863 bool isAISrc_1024V2B16() const {
864 return isAISrc_1024B16();
865 }
866
867 bool isAISrc_1024F32() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
869 }
870
871 bool isAISrc_1024F16() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
873 }
874
875 bool isAISrc_1024V2F16() const {
876 return isAISrc_1024F16() || isAISrc_1024B32();
877 }
878
879 bool isKImmFP32() const {
880 return isLiteralImm(MVT::f32);
881 }
882
883 bool isKImmFP16() const {
884 return isLiteralImm(MVT::f16);
885 }
886
887 bool isMem() const override {
888 return false;
889 }
890
891 bool isExpr() const {
892 return Kind == Expression;
893 }
894
895 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
896
897 bool isSWaitCnt() const;
898 bool isDepCtr() const;
899 bool isSDelayALU() const;
900 bool isHwreg() const;
901 bool isSendMsg() const;
902 bool isSwizzle() const;
903 bool isSMRDOffset8() const;
904 bool isSMEMOffset() const;
905 bool isSMRDLiteralOffset() const;
906 bool isDPP8() const;
907 bool isDPPCtrl() const;
908 bool isBLGP() const;
909 bool isCBSZ() const;
910 bool isABID() const;
911 bool isGPRIdxMode() const;
912 bool isS16Imm() const;
913 bool isU16Imm() const;
914 bool isEndpgm() const;
915 bool isWaitVDST() const;
916 bool isWaitEXP() const;
917
918 StringRef getToken() const {
919 assert(isToken());
920 return StringRef(Tok.Data, Tok.Length);
921 }
922
923 int64_t getImm() const {
924 assert(isImm());
925 return Imm.Val;
926 }
927
928 void setImm(int64_t Val) {
929 assert(isImm());
930 Imm.Val = Val;
931 }
932
933 ImmTy getImmTy() const {
934 assert(isImm());
935 return Imm.Type;
936 }
937
938 unsigned getReg() const override {
939 assert(isRegKind());
940 return Reg.RegNo;
941 }
942
943 SMLoc getStartLoc() const override {
944 return StartLoc;
945 }
946
947 SMLoc getEndLoc() const override {
948 return EndLoc;
949 }
950
951 SMRange getLocRange() const {
952 return SMRange(StartLoc, EndLoc);
953 }
954
955 Modifiers getModifiers() const {
956 assert(isRegKind() || isImmTy(ImmTyNone));
957 return isRegKind() ? Reg.Mods : Imm.Mods;
958 }
959
960 void setModifiers(Modifiers Mods) {
961 assert(isRegKind() || isImmTy(ImmTyNone));
962 if (isRegKind())
963 Reg.Mods = Mods;
964 else
965 Imm.Mods = Mods;
966 }
967
968 bool hasModifiers() const {
969 return getModifiers().hasModifiers();
970 }
971
972 bool hasFPModifiers() const {
973 return getModifiers().hasFPModifiers();
974 }
975
976 bool hasIntModifiers() const {
977 return getModifiers().hasIntModifiers();
978 }
979
980 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
981
982 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
983
984 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
985
986 void addRegOperands(MCInst &Inst, unsigned N) const;
987
988 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
989 if (isRegKind())
990 addRegOperands(Inst, N);
991 else
992 addImmOperands(Inst, N);
993 }
994
995 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
996 Modifiers Mods = getModifiers();
997 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
998 if (isRegKind()) {
999 addRegOperands(Inst, N);
1000 } else {
1001 addImmOperands(Inst, N, false);
1002 }
1003 }
1004
1005 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1006 assert(!hasIntModifiers());
1007 addRegOrImmWithInputModsOperands(Inst, N);
1008 }
1009
1010 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1011 assert(!hasFPModifiers());
1012 addRegOrImmWithInputModsOperands(Inst, N);
1013 }
1014
1015 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1016 Modifiers Mods = getModifiers();
1017 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1018 assert(isRegKind());
1019 addRegOperands(Inst, N);
1020 }
1021
1022 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1023 assert(!hasIntModifiers());
1024 addRegWithInputModsOperands(Inst, N);
1025 }
1026
1027 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1028 assert(!hasFPModifiers());
1029 addRegWithInputModsOperands(Inst, N);
1030 }
1031
1032 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1033 switch (Type) {
1034 case ImmTyNone: OS << "None"; break;
1035 case ImmTyGDS: OS << "GDS"; break;
1036 case ImmTyLDS: OS << "LDS"; break;
1037 case ImmTyOffen: OS << "Offen"; break;
1038 case ImmTyIdxen: OS << "Idxen"; break;
1039 case ImmTyAddr64: OS << "Addr64"; break;
1040 case ImmTyOffset: OS << "Offset"; break;
1041 case ImmTyInstOffset: OS << "InstOffset"; break;
1042 case ImmTyOffset0: OS << "Offset0"; break;
1043 case ImmTyOffset1: OS << "Offset1"; break;
1044 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1045 case ImmTyCPol: OS << "CPol"; break;
1046 case ImmTyTFE: OS << "TFE"; break;
1047 case ImmTyD16: OS << "D16"; break;
1048 case ImmTyFORMAT: OS << "FORMAT"; break;
1049 case ImmTyClampSI: OS << "ClampSI"; break;
1050 case ImmTyOModSI: OS << "OModSI"; break;
1051 case ImmTyDPP8: OS << "DPP8"; break;
1052 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1053 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1054 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1055 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1056 case ImmTyDppFI: OS << "DppFI"; break;
1057 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1058 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1059 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1060 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1061 case ImmTyDMask: OS << "DMask"; break;
1062 case ImmTyDim: OS << "Dim"; break;
1063 case ImmTyUNorm: OS << "UNorm"; break;
1064 case ImmTyDA: OS << "DA"; break;
1065 case ImmTyR128A16: OS << "R128A16"; break;
1066 case ImmTyA16: OS << "A16"; break;
1067 case ImmTyLWE: OS << "LWE"; break;
1068 case ImmTyOff: OS << "Off"; break;
1069 case ImmTyExpTgt: OS << "ExpTgt"; break;
1070 case ImmTyExpCompr: OS << "ExpCompr"; break;
1071 case ImmTyExpVM: OS << "ExpVM"; break;
1072 case ImmTyHwreg: OS << "Hwreg"; break;
1073 case ImmTySendMsg: OS << "SendMsg"; break;
1074 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1075 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1076 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1077 case ImmTyOpSel: OS << "OpSel"; break;
1078 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1079 case ImmTyNegLo: OS << "NegLo"; break;
1080 case ImmTyNegHi: OS << "NegHi"; break;
1081 case ImmTySwizzle: OS << "Swizzle"; break;
1082 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1083 case ImmTyHigh: OS << "High"; break;
1084 case ImmTyBLGP: OS << "BLGP"; break;
1085 case ImmTyCBSZ: OS << "CBSZ"; break;
1086 case ImmTyABID: OS << "ABID"; break;
1087 case ImmTyEndpgm: OS << "Endpgm"; break;
1088 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1089 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1090 }
1091 }
1092
1093 void print(raw_ostream &OS) const override {
1094 switch (Kind) {
1095 case Register:
1096 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1097 break;
1098 case Immediate:
1099 OS << '<' << getImm();
1100 if (getImmTy() != ImmTyNone) {
1101 OS << " type: "; printImmTy(OS, getImmTy());
1102 }
1103 OS << " mods: " << Imm.Mods << '>';
1104 break;
1105 case Token:
1106 OS << '\'' << getToken() << '\'';
1107 break;
1108 case Expression:
1109 OS << "<expr " << *Expr << '>';
1110 break;
1111 }
1112 }
1113
1114 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1115 int64_t Val, SMLoc Loc,
1116 ImmTy Type = ImmTyNone,
1117 bool IsFPImm = false) {
1118 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1119 Op->Imm.Val = Val;
1120 Op->Imm.IsFPImm = IsFPImm;
1121 Op->Imm.Kind = ImmKindTyNone;
1122 Op->Imm.Type = Type;
1123 Op->Imm.Mods = Modifiers();
1124 Op->StartLoc = Loc;
1125 Op->EndLoc = Loc;
1126 return Op;
1127 }
1128
1129 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1130 StringRef Str, SMLoc Loc,
1131 bool HasExplicitEncodingSize = true) {
1132 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1133 Res->Tok.Data = Str.data();
1134 Res->Tok.Length = Str.size();
1135 Res->StartLoc = Loc;
1136 Res->EndLoc = Loc;
1137 return Res;
1138 }
1139
1140 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1141 unsigned RegNo, SMLoc S,
1142 SMLoc E) {
1143 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1144 Op->Reg.RegNo = RegNo;
1145 Op->Reg.Mods = Modifiers();
1146 Op->StartLoc = S;
1147 Op->EndLoc = E;
1148 return Op;
1149 }
1150
1151 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1152 const class MCExpr *Expr, SMLoc S) {
1153 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1154 Op->Expr = Expr;
1155 Op->StartLoc = S;
1156 Op->EndLoc = S;
1157 return Op;
1158 }
1159};
1160
1161raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1162 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1163 return OS;
1164}
1165
1166//===----------------------------------------------------------------------===//
1167// AsmParser
1168//===----------------------------------------------------------------------===//
1169
1170// Holds info related to the current kernel, e.g. count of SGPRs used.
1171// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1172// .amdgpu_hsa_kernel or at EOF.
1173class KernelScopeInfo {
1174 int SgprIndexUnusedMin = -1;
1175 int VgprIndexUnusedMin = -1;
1176 int AgprIndexUnusedMin = -1;
1177 MCContext *Ctx = nullptr;
1178 MCSubtargetInfo const *MSTI = nullptr;
1179
1180 void usesSgprAt(int i) {
1181 if (i >= SgprIndexUnusedMin) {
1182 SgprIndexUnusedMin = ++i;
1183 if (Ctx) {
1184 MCSymbol* const Sym =
1185 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1186 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1187 }
1188 }
1189 }
1190
1191 void usesVgprAt(int i) {
1192 if (i >= VgprIndexUnusedMin) {
1193 VgprIndexUnusedMin = ++i;
1194 if (Ctx) {
1195 MCSymbol* const Sym =
1196 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1197 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1198 VgprIndexUnusedMin);
1199 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1200 }
1201 }
1202 }
1203
1204 void usesAgprAt(int i) {
1205 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1206 if (!hasMAIInsts(*MSTI))
1207 return;
1208
1209 if (i >= AgprIndexUnusedMin) {
1210 AgprIndexUnusedMin = ++i;
1211 if (Ctx) {
1212 MCSymbol* const Sym =
1213 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1214 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1215
1216 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1217 MCSymbol* const vSym =
1218 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1219 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1220 VgprIndexUnusedMin);
1221 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1222 }
1223 }
1224 }
1225
1226public:
1227 KernelScopeInfo() = default;
1228
1229 void initialize(MCContext &Context) {
1230 Ctx = &Context;
1231 MSTI = Ctx->getSubtargetInfo();
1232
1233 usesSgprAt(SgprIndexUnusedMin = -1);
1234 usesVgprAt(VgprIndexUnusedMin = -1);
1235 if (hasMAIInsts(*MSTI)) {
1236 usesAgprAt(AgprIndexUnusedMin = -1);
1237 }
1238 }
1239
1240 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1241 unsigned RegWidth) {
1242 switch (RegKind) {
1243 case IS_SGPR:
1244 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1245 break;
1246 case IS_AGPR:
1247 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1248 break;
1249 case IS_VGPR:
1250 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1251 break;
1252 default:
1253 break;
1254 }
1255 }
1256};
1257
1258class AMDGPUAsmParser : public MCTargetAsmParser {
1259 MCAsmParser &Parser;
1260
1261 unsigned ForcedEncodingSize = 0;
1262 bool ForcedDPP = false;
1263 bool ForcedSDWA = false;
1264 KernelScopeInfo KernelScope;
1265
1266 /// @name Auto-generated Match Functions
1267 /// {
1268
1269#define GET_ASSEMBLER_HEADER
1270#include "AMDGPUGenAsmMatcher.inc"
1271
1272 /// }
1273
1274private:
1275 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1276 bool OutOfRangeError(SMRange Range);
1277 /// Calculate VGPR/SGPR blocks required for given target, reserved
1278 /// registers, and user-specified NextFreeXGPR values.
1279 ///
1280 /// \param Features [in] Target features, used for bug corrections.
1281 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1282 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1283 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1284 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1285 /// descriptor field, if valid.
1286 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1287 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1288 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1289 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1290 /// \param VGPRBlocks [out] Result VGPR block count.
1291 /// \param SGPRBlocks [out] Result SGPR block count.
1292 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1293 bool FlatScrUsed, bool XNACKUsed,
1294 std::optional<bool> EnableWavefrontSize32,
1295 unsigned NextFreeVGPR, SMRange VGPRRange,
1296 unsigned NextFreeSGPR, SMRange SGPRRange,
1297 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1298 bool ParseDirectiveAMDGCNTarget();
1299 bool ParseDirectiveAMDHSAKernel();
1300 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1301 bool ParseDirectiveHSACodeObjectVersion();
1302 bool ParseDirectiveHSACodeObjectISA();
1303 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1304 bool ParseDirectiveAMDKernelCodeT();
1305 // TODO: Possibly make subtargetHasRegister const.
1306 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1307 bool ParseDirectiveAMDGPUHsaKernel();
1308
1309 bool ParseDirectiveISAVersion();
1310 bool ParseDirectiveHSAMetadata();
1311 bool ParseDirectivePALMetadataBegin();
1312 bool ParseDirectivePALMetadata();
1313 bool ParseDirectiveAMDGPULDS();
1314
1315 /// Common code to parse out a block of text (typically YAML) between start and
1316 /// end directives.
1317 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1318 const char *AssemblerDirectiveEnd,
1319 std::string &CollectString);
1320
1321 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1322 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1323 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1324 unsigned &RegNum, unsigned &RegWidth,
1325 bool RestoreOnFailure = false);
1326 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1327 unsigned &RegNum, unsigned &RegWidth,
1329 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1330 unsigned &RegWidth,
1332 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1333 unsigned &RegWidth,
1335 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1336 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1337 bool ParseRegRange(unsigned& Num, unsigned& Width);
1338 unsigned getRegularReg(RegisterKind RegKind,
1339 unsigned RegNum,
1340 unsigned RegWidth,
1341 SMLoc Loc);
1342
1343 bool isRegister();
1344 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1345 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1346 void initializeGprCountSymbol(RegisterKind RegKind);
1347 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1348 unsigned RegWidth);
1349 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1350 bool IsAtomic);
1351
1352public:
1353 enum AMDGPUMatchResultTy {
1354 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1355 };
1356 enum OperandMode {
1357 OperandMode_Default,
1358 OperandMode_NSA,
1359 };
1360
1361 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1362
1363 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1364 const MCInstrInfo &MII,
1365 const MCTargetOptions &Options)
1366 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1368
1369 if (getFeatureBits().none()) {
1370 // Set default features.
1371 copySTI().ToggleFeature("southern-islands");
1372 }
1373
1374 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1375
1376 {
1377 // TODO: make those pre-defined variables read-only.
1378 // Currently there is none suitable machinery in the core llvm-mc for this.
1379 // MCSymbol::isRedefinable is intended for another purpose, and
1380 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1382 MCContext &Ctx = getContext();
1383 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1384 MCSymbol *Sym =
1385 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1386 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1387 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1388 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1389 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1390 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1391 } else {
1392 MCSymbol *Sym =
1393 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1394 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1395 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1396 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1397 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1398 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1399 }
1400 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1401 initializeGprCountSymbol(IS_VGPR);
1402 initializeGprCountSymbol(IS_SGPR);
1403 } else
1404 KernelScope.initialize(getContext());
1405 }
1406 }
1407
1408 bool hasMIMG_R128() const {
1409 return AMDGPU::hasMIMG_R128(getSTI());
1410 }
1411
1412 bool hasPackedD16() const {
1413 return AMDGPU::hasPackedD16(getSTI());
1414 }
1415
1416 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1417
1418 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1419
1420 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1421
1422 bool isSI() const {
1423 return AMDGPU::isSI(getSTI());
1424 }
1425
1426 bool isCI() const {
1427 return AMDGPU::isCI(getSTI());
1428 }
1429
1430 bool isVI() const {
1431 return AMDGPU::isVI(getSTI());
1432 }
1433
1434 bool isGFX9() const {
1435 return AMDGPU::isGFX9(getSTI());
1436 }
1437
1438 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1439 bool isGFX90A() const {
1440 return AMDGPU::isGFX90A(getSTI());
1441 }
1442
1443 bool isGFX940() const {
1444 return AMDGPU::isGFX940(getSTI());
1445 }
1446
1447 bool isGFX9Plus() const {
1448 return AMDGPU::isGFX9Plus(getSTI());
1449 }
1450
1451 bool isGFX10() const {
1452 return AMDGPU::isGFX10(getSTI());
1453 }
1454
1455 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1456
1457 bool isGFX11() const {
1458 return AMDGPU::isGFX11(getSTI());
1459 }
1460
1461 bool isGFX11Plus() const {
1462 return AMDGPU::isGFX11Plus(getSTI());
1463 }
1464
1465 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1466
1467 bool isGFX10_BEncoding() const {
1469 }
1470
1471 bool hasInv2PiInlineImm() const {
1472 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1473 }
1474
1475 bool hasFlatOffsets() const {
1476 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1477 }
1478
1479 bool hasArchitectedFlatScratch() const {
1480 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1481 }
1482
1483 bool hasSGPR102_SGPR103() const {
1484 return !isVI() && !isGFX9();
1485 }
1486
1487 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1488
1489 bool hasIntClamp() const {
1490 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1491 }
1492
1493 bool hasPartialNSAEncoding() const {
1494 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1495 }
1496
1497 unsigned getNSAMaxSize() const {
1498 return AMDGPU::getNSAMaxSize(getSTI());
1499 }
1500
1501 unsigned getMaxNumUserSGPRs() const {
1503 }
1504
1505 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1506
1507 AMDGPUTargetStreamer &getTargetStreamer() {
1509 return static_cast<AMDGPUTargetStreamer &>(TS);
1510 }
1511
1512 const MCRegisterInfo *getMRI() const {
1513 // We need this const_cast because for some reason getContext() is not const
1514 // in MCAsmParser.
1515 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1516 }
1517
1518 const MCInstrInfo *getMII() const {
1519 return &MII;
1520 }
1521
1522 const FeatureBitset &getFeatureBits() const {
1523 return getSTI().getFeatureBits();
1524 }
1525
1526 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1527 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1528 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1529
1530 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1531 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1532 bool isForcedDPP() const { return ForcedDPP; }
1533 bool isForcedSDWA() const { return ForcedSDWA; }
1534 ArrayRef<unsigned> getMatchedVariants() const;
1535 StringRef getMatchedVariantName() const;
1536
1537 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1538 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1539 bool RestoreOnFailure);
1540 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1542 SMLoc &EndLoc) override;
1543 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1545 unsigned Kind) override;
1546 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1549 bool MatchingInlineAsm) override;
1550 bool ParseDirective(AsmToken DirectiveID) override;
1551 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1552 OperandMode Mode = OperandMode_Default);
1553 StringRef parseMnemonicSuffix(StringRef Name);
1555 SMLoc NameLoc, OperandVector &Operands) override;
1556 //bool ProcessInstruction(MCInst &Inst);
1557
1559
1560 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1561
1563 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1564 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1565 std::function<bool(int64_t &)> ConvertResult = nullptr);
1566
1567 ParseStatus parseOperandArrayWithPrefix(
1568 const char *Prefix, OperandVector &Operands,
1569 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1570 bool (*ConvertResult)(int64_t &) = nullptr);
1571
1573 parseNamedBit(StringRef Name, OperandVector &Operands,
1574 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1575 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1577 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1578 SMLoc &StringLoc);
1579
1580 bool isModifier();
1581 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1582 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1583 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1584 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1585 bool parseSP3NegModifier();
1586 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1588 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1589 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1590 bool AllowImm = true);
1591 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1592 bool AllowImm = true);
1593 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1594 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1595 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1596 ParseStatus parseDfmtNfmt(int64_t &Format);
1597 ParseStatus parseUfmt(int64_t &Format);
1598 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1599 int64_t &Format);
1600 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1601 int64_t &Format);
1602 ParseStatus parseFORMAT(OperandVector &Operands);
1603 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1604 ParseStatus parseNumericFormat(int64_t &Format);
1605 ParseStatus parseFlatOffset(OperandVector &Operands);
1606 ParseStatus parseR128A16(OperandVector &Operands);
1608 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1609 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1610
1611 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1612
1613 bool parseCnt(int64_t &IntVal);
1614 ParseStatus parseSWaitCnt(OperandVector &Operands);
1615
1616 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1617 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1618 ParseStatus parseDepCtr(OperandVector &Operands);
1619
1620 bool parseDelay(int64_t &Delay);
1621 ParseStatus parseSDelayALU(OperandVector &Operands);
1622
1623 ParseStatus parseHwreg(OperandVector &Operands);
1624
1625private:
1626 struct OperandInfoTy {
1627 SMLoc Loc;
1628 int64_t Id;
1629 bool IsSymbolic = false;
1630 bool IsDefined = false;
1631
1632 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1633 };
1634
1635 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1636 bool validateSendMsg(const OperandInfoTy &Msg,
1637 const OperandInfoTy &Op,
1638 const OperandInfoTy &Stream);
1639
1640 bool parseHwregBody(OperandInfoTy &HwReg,
1641 OperandInfoTy &Offset,
1642 OperandInfoTy &Width);
1643 bool validateHwreg(const OperandInfoTy &HwReg,
1644 const OperandInfoTy &Offset,
1645 const OperandInfoTy &Width);
1646
1647 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1648 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1649 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1650
1651 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1652 const OperandVector &Operands) const;
1653 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1654 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1655 SMLoc getLitLoc(const OperandVector &Operands,
1656 bool SearchMandatoryLiterals = false) const;
1657 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1658 SMLoc getConstLoc(const OperandVector &Operands) const;
1659 SMLoc getInstLoc(const OperandVector &Operands) const;
1660
1661 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1662 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1663 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1664 bool validateSOPLiteral(const MCInst &Inst) const;
1665 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1666 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1667 const OperandVector &Operands);
1668 bool validateIntClampSupported(const MCInst &Inst);
1669 bool validateMIMGAtomicDMask(const MCInst &Inst);
1670 bool validateMIMGGatherDMask(const MCInst &Inst);
1671 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1672 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1673 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1674 bool validateMIMGD16(const MCInst &Inst);
1675 bool validateMIMGMSAA(const MCInst &Inst);
1676 bool validateOpSel(const MCInst &Inst);
1677 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1678 bool validateVccOperand(unsigned Reg) const;
1679 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1680 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1681 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1682 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1683 bool validateAGPRLdSt(const MCInst &Inst) const;
1684 bool validateVGPRAlign(const MCInst &Inst) const;
1685 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1686 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1687 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1688 bool validateDivScale(const MCInst &Inst);
1689 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1690 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1691 const SMLoc &IDLoc);
1692 bool validateExeczVcczOperands(const OperandVector &Operands);
1693 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1694 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1695 unsigned getConstantBusLimit(unsigned Opcode) const;
1696 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1697 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1698 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1699
1700 bool isSupportedMnemo(StringRef Mnemo,
1701 const FeatureBitset &FBS);
1702 bool isSupportedMnemo(StringRef Mnemo,
1703 const FeatureBitset &FBS,
1704 ArrayRef<unsigned> Variants);
1705 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1706
1707 bool isId(const StringRef Id) const;
1708 bool isId(const AsmToken &Token, const StringRef Id) const;
1709 bool isToken(const AsmToken::TokenKind Kind) const;
1710 StringRef getId() const;
1711 bool trySkipId(const StringRef Id);
1712 bool trySkipId(const StringRef Pref, const StringRef Id);
1713 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1714 bool trySkipToken(const AsmToken::TokenKind Kind);
1715 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1716 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1717 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1718
1719 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1720 AsmToken::TokenKind getTokenKind() const;
1721 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1722 bool parseExpr(OperandVector &Operands);
1723 StringRef getTokenStr() const;
1724 AsmToken peekToken(bool ShouldSkipSpace = true);
1725 AsmToken getToken() const;
1726 SMLoc getLoc() const;
1727 void lex();
1728
1729public:
1730 void onBeginOfFile() override;
1731
1732 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1733
1734 ParseStatus parseExpTgt(OperandVector &Operands);
1735 ParseStatus parseSendMsg(OperandVector &Operands);
1736 ParseStatus parseInterpSlot(OperandVector &Operands);
1737 ParseStatus parseInterpAttr(OperandVector &Operands);
1738 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1739 ParseStatus parseBoolReg(OperandVector &Operands);
1740
1741 bool parseSwizzleOperand(int64_t &Op,
1742 const unsigned MinVal,
1743 const unsigned MaxVal,
1744 const StringRef ErrMsg,
1745 SMLoc &Loc);
1746 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1747 const unsigned MinVal,
1748 const unsigned MaxVal,
1749 const StringRef ErrMsg);
1750 ParseStatus parseSwizzle(OperandVector &Operands);
1751 bool parseSwizzleOffset(int64_t &Imm);
1752 bool parseSwizzleMacro(int64_t &Imm);
1753 bool parseSwizzleQuadPerm(int64_t &Imm);
1754 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1755 bool parseSwizzleBroadcast(int64_t &Imm);
1756 bool parseSwizzleSwap(int64_t &Imm);
1757 bool parseSwizzleReverse(int64_t &Imm);
1758
1759 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1760 int64_t parseGPRIdxMacro();
1761
1762 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1763 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1764
1765 ParseStatus parseOModSI(OperandVector &Operands);
1766
1767 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1768 OptionalImmIndexMap &OptionalIdx);
1769 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1770 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1771 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1772 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1773 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1774 OptionalImmIndexMap &OptionalIdx);
1775 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1776 OptionalImmIndexMap &OptionalIdx);
1777
1778 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1779 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1780 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1781
1782 bool parseDimId(unsigned &Encoding);
1784 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1786 ParseStatus parseDPPCtrl(OperandVector &Operands);
1787 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1788 int64_t parseDPPCtrlSel(StringRef Ctrl);
1789 int64_t parseDPPCtrlPerm();
1790 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1791 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1792 cvtDPP(Inst, Operands, true);
1793 }
1794 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1795 bool IsDPP8 = false);
1796 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1797 cvtVOP3DPP(Inst, Operands, true);
1798 }
1799
1800 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1801 AMDGPUOperand::ImmTy Type);
1802 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1803 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1804 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1805 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1806 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1807 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1808 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1809 uint64_t BasicInstType,
1810 bool SkipDstVcc = false,
1811 bool SkipSrcVcc = false);
1812
1813 ParseStatus parseEndpgm(OperandVector &Operands);
1814
1816};
1817
1818} // end anonymous namespace
1819
1820// May be called with integer type with equivalent bitwidth.
1821static const fltSemantics *getFltSemantics(unsigned Size) {
1822 switch (Size) {
1823 case 4:
1824 return &APFloat::IEEEsingle();
1825 case 8:
1826 return &APFloat::IEEEdouble();
1827 case 2:
1828 return &APFloat::IEEEhalf();
1829 default:
1830 llvm_unreachable("unsupported fp type");
1831 }
1832}
1833
1835 return getFltSemantics(VT.getSizeInBits() / 8);
1836}
1837
1839 switch (OperandType) {
1852 return &APFloat::IEEEsingle();
1858 return &APFloat::IEEEdouble();
1873 return &APFloat::IEEEhalf();
1874 default:
1875 llvm_unreachable("unsupported fp type");
1876 }
1877}
1878
1879//===----------------------------------------------------------------------===//
1880// Operand
1881//===----------------------------------------------------------------------===//
1882
1883static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1884 bool Lost;
1885
1886 // Convert literal to single precision
1888 APFloat::rmNearestTiesToEven,
1889 &Lost);
1890 // We allow precision lost but not overflow or underflow
1891 if (Status != APFloat::opOK &&
1892 Lost &&
1893 ((Status & APFloat::opOverflow) != 0 ||
1894 (Status & APFloat::opUnderflow) != 0)) {
1895 return false;
1896 }
1897
1898 return true;
1899}
1900
1901static bool isSafeTruncation(int64_t Val, unsigned Size) {
1902 return isUIntN(Size, Val) || isIntN(Size, Val);
1903}
1904
1905static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1906 if (VT.getScalarType() == MVT::i16) {
1907 // FP immediate values are broken.
1908 return isInlinableIntLiteral(Val);
1909 }
1910
1911 // f16/v2f16 operands work correctly for all values.
1912 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1913}
1914
1915bool AMDGPUOperand::isInlinableImm(MVT type) const {
1916
1917 // This is a hack to enable named inline values like
1918 // shared_base with both 32-bit and 64-bit operands.
1919 // Note that these values are defined as
1920 // 32-bit operands only.
1921 if (isInlineValue()) {
1922 return true;
1923 }
1924
1925 if (!isImmTy(ImmTyNone)) {
1926 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1927 return false;
1928 }
1929 // TODO: We should avoid using host float here. It would be better to
1930 // check the float bit values which is what a few other places do.
1931 // We've had bot failures before due to weird NaN support on mips hosts.
1932
1933 APInt Literal(64, Imm.Val);
1934
1935 if (Imm.IsFPImm) { // We got fp literal token
1936 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1938 AsmParser->hasInv2PiInlineImm());
1939 }
1940
1941 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1942 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1943 return false;
1944
1945 if (type.getScalarSizeInBits() == 16) {
1947 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1948 type, AsmParser->hasInv2PiInlineImm());
1949 }
1950
1951 // Check if single precision literal is inlinable
1953 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1954 AsmParser->hasInv2PiInlineImm());
1955 }
1956
1957 // We got int literal token.
1958 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1960 AsmParser->hasInv2PiInlineImm());
1961 }
1962
1963 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1964 return false;
1965 }
1966
1967 if (type.getScalarSizeInBits() == 16) {
1969 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1970 type, AsmParser->hasInv2PiInlineImm());
1971 }
1972
1974 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1975 AsmParser->hasInv2PiInlineImm());
1976}
1977
1978bool AMDGPUOperand::isLiteralImm(MVT type) const {
1979 // Check that this immediate can be added as literal
1980 if (!isImmTy(ImmTyNone)) {
1981 return false;
1982 }
1983
1984 if (!Imm.IsFPImm) {
1985 // We got int literal token.
1986
1987 if (type == MVT::f64 && hasFPModifiers()) {
1988 // Cannot apply fp modifiers to int literals preserving the same semantics
1989 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1990 // disable these cases.
1991 return false;
1992 }
1993
1994 unsigned Size = type.getSizeInBits();
1995 if (Size == 64)
1996 Size = 32;
1997
1998 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1999 // types.
2000 return isSafeTruncation(Imm.Val, Size);
2001 }
2002
2003 // We got fp literal token
2004 if (type == MVT::f64) { // Expected 64-bit fp operand
2005 // We would set low 64-bits of literal to zeroes but we accept this literals
2006 return true;
2007 }
2008
2009 if (type == MVT::i64) { // Expected 64-bit int operand
2010 // We don't allow fp literals in 64-bit integer instructions. It is
2011 // unclear how we should encode them.
2012 return false;
2013 }
2014
2015 // We allow fp literals with f16x2 operands assuming that the specified
2016 // literal goes into the lower half and the upper half is zero. We also
2017 // require that the literal may be losslessly converted to f16.
2018 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2019 (type == MVT::v2i16)? MVT::i16 :
2020 (type == MVT::v2f32)? MVT::f32 : type;
2021
2022 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2023 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2024}
2025
2026bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2027 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2028}
2029
2030bool AMDGPUOperand::isVRegWithInputMods() const {
2031 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2032 // GFX90A allows DPP on 64-bit operands.
2033 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2034 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2035}
2036
2037bool AMDGPUOperand::isT16VRegWithInputMods() const {
2038 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2039}
2040
2041bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2042 if (AsmParser->isVI())
2043 return isVReg32();
2044 else if (AsmParser->isGFX9Plus())
2045 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2046 else
2047 return false;
2048}
2049
2050bool AMDGPUOperand::isSDWAFP16Operand() const {
2051 return isSDWAOperand(MVT::f16);
2052}
2053
2054bool AMDGPUOperand::isSDWAFP32Operand() const {
2055 return isSDWAOperand(MVT::f32);
2056}
2057
2058bool AMDGPUOperand::isSDWAInt16Operand() const {
2059 return isSDWAOperand(MVT::i16);
2060}
2061
2062bool AMDGPUOperand::isSDWAInt32Operand() const {
2063 return isSDWAOperand(MVT::i32);
2064}
2065
2066bool AMDGPUOperand::isBoolReg() const {
2067 auto FB = AsmParser->getFeatureBits();
2068 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2069 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2070}
2071
2072uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2073{
2074 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2075 assert(Size == 2 || Size == 4 || Size == 8);
2076
2077 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2078
2079 if (Imm.Mods.Abs) {
2080 Val &= ~FpSignMask;
2081 }
2082 if (Imm.Mods.Neg) {
2083 Val ^= FpSignMask;
2084 }
2085
2086 return Val;
2087}
2088
2089void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2090 if (isExpr()) {
2092 return;
2093 }
2094
2095 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2096 Inst.getNumOperands())) {
2097 addLiteralImmOperand(Inst, Imm.Val,
2098 ApplyModifiers &
2099 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2100 } else {
2101 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2103 setImmKindNone();
2104 }
2105}
2106
2107void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2108 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2109 auto OpNum = Inst.getNumOperands();
2110 // Check that this operand accepts literals
2111 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2112
2113 if (ApplyModifiers) {
2114 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2115 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2116 Val = applyInputFPModifiers(Val, Size);
2117 }
2118
2119 APInt Literal(64, Val);
2120 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2121
2122 if (Imm.IsFPImm) { // We got fp literal token
2123 switch (OpTy) {
2129 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2130 AsmParser->hasInv2PiInlineImm())) {
2131 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2132 setImmKindConst();
2133 return;
2134 }
2135
2136 // Non-inlineable
2137 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2138 // For fp operands we check if low 32 bits are zeros
2139 if (Literal.getLoBits(32) != 0) {
2140 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2141 "Can't encode literal as exact 64-bit floating-point operand. "
2142 "Low 32-bits will be set to zero");
2143 }
2144
2145 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2146 setImmKindLiteral();
2147 return;
2148 }
2149
2150 // We don't allow fp literals in 64-bit integer instructions. It is
2151 // unclear how we should encode them. This case should be checked earlier
2152 // in predicate methods (isLiteralImm())
2153 llvm_unreachable("fp literal in 64-bit integer instruction.");
2154
2181 bool lost;
2182 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2183 // Convert literal to single precision
2184 FPLiteral.convert(*getOpFltSemantics(OpTy),
2185 APFloat::rmNearestTiesToEven, &lost);
2186 // We allow precision lost but not overflow or underflow. This should be
2187 // checked earlier in isLiteralImm()
2188
2189 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2190 Inst.addOperand(MCOperand::createImm(ImmVal));
2191 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2192 setImmKindMandatoryLiteral();
2193 } else {
2194 setImmKindLiteral();
2195 }
2196 return;
2197 }
2198 default:
2199 llvm_unreachable("invalid operand size");
2200 }
2201
2202 return;
2203 }
2204
2205 // We got int literal token.
2206 // Only sign extend inline immediates.
2207 switch (OpTy) {
2221 if (isSafeTruncation(Val, 32) &&
2222 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2223 AsmParser->hasInv2PiInlineImm())) {
2225 setImmKindConst();
2226 return;
2227 }
2228
2229 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2230 setImmKindLiteral();
2231 return;
2232
2238 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2240 setImmKindConst();
2241 return;
2242 }
2243
2245 setImmKindLiteral();
2246 return;
2247
2255 if (isSafeTruncation(Val, 16) &&
2256 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2257 AsmParser->hasInv2PiInlineImm())) {
2259 setImmKindConst();
2260 return;
2261 }
2262
2263 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2264 setImmKindLiteral();
2265 return;
2266
2271 assert(isSafeTruncation(Val, 16));
2272 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2273 AsmParser->hasInv2PiInlineImm()));
2274
2276 return;
2277 }
2279 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2280 setImmKindMandatoryLiteral();
2281 return;
2283 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2284 setImmKindMandatoryLiteral();
2285 return;
2286 default:
2287 llvm_unreachable("invalid operand size");
2288 }
2289}
2290
2291void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2292 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2293}
2294
2295bool AMDGPUOperand::isInlineValue() const {
2296 return isRegKind() && ::isInlineValue(getReg());
2297}
2298
2299//===----------------------------------------------------------------------===//
2300// AsmParser
2301//===----------------------------------------------------------------------===//
2302
2303static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2304 if (Is == IS_VGPR) {
2305 switch (RegWidth) {
2306 default: return -1;
2307 case 32:
2308 return AMDGPU::VGPR_32RegClassID;
2309 case 64:
2310 return AMDGPU::VReg_64RegClassID;
2311 case 96:
2312 return AMDGPU::VReg_96RegClassID;
2313 case 128:
2314 return AMDGPU::VReg_128RegClassID;
2315 case 160:
2316 return AMDGPU::VReg_160RegClassID;
2317 case 192:
2318 return AMDGPU::VReg_192RegClassID;
2319 case 224:
2320 return AMDGPU::VReg_224RegClassID;
2321 case 256:
2322 return AMDGPU::VReg_256RegClassID;
2323 case 288:
2324 return AMDGPU::VReg_288RegClassID;
2325 case 320:
2326 return AMDGPU::VReg_320RegClassID;
2327 case 352:
2328 return AMDGPU::VReg_352RegClassID;
2329 case 384:
2330 return AMDGPU::VReg_384RegClassID;
2331 case 512:
2332 return AMDGPU::VReg_512RegClassID;
2333 case 1024:
2334 return AMDGPU::VReg_1024RegClassID;
2335 }
2336 } else if (Is == IS_TTMP) {
2337 switch (RegWidth) {
2338 default: return -1;
2339 case 32:
2340 return AMDGPU::TTMP_32RegClassID;
2341 case 64:
2342 return AMDGPU::TTMP_64RegClassID;
2343 case 128:
2344 return AMDGPU::TTMP_128RegClassID;
2345 case 256:
2346 return AMDGPU::TTMP_256RegClassID;
2347 case 512:
2348 return AMDGPU::TTMP_512RegClassID;
2349 }
2350 } else if (Is == IS_SGPR) {
2351 switch (RegWidth) {
2352 default: return -1;
2353 case 32:
2354 return AMDGPU::SGPR_32RegClassID;
2355 case 64:
2356 return AMDGPU::SGPR_64RegClassID;
2357 case 96:
2358 return AMDGPU::SGPR_96RegClassID;
2359 case 128:
2360 return AMDGPU::SGPR_128RegClassID;
2361 case 160:
2362 return AMDGPU::SGPR_160RegClassID;
2363 case 192:
2364 return AMDGPU::SGPR_192RegClassID;
2365 case 224:
2366 return AMDGPU::SGPR_224RegClassID;
2367 case 256:
2368 return AMDGPU::SGPR_256RegClassID;
2369 case 288:
2370 return AMDGPU::SGPR_288RegClassID;
2371 case 320:
2372 return AMDGPU::SGPR_320RegClassID;
2373 case 352:
2374 return AMDGPU::SGPR_352RegClassID;
2375 case 384:
2376 return AMDGPU::SGPR_384RegClassID;
2377 case 512:
2378 return AMDGPU::SGPR_512RegClassID;
2379 }
2380 } else if (Is == IS_AGPR) {
2381 switch (RegWidth) {
2382 default: return -1;
2383 case 32:
2384 return AMDGPU::AGPR_32RegClassID;
2385 case 64:
2386 return AMDGPU::AReg_64RegClassID;
2387 case 96:
2388 return AMDGPU::AReg_96RegClassID;
2389 case 128:
2390 return AMDGPU::AReg_128RegClassID;
2391 case 160:
2392 return AMDGPU::AReg_160RegClassID;
2393 case 192:
2394 return AMDGPU::AReg_192RegClassID;
2395 case 224:
2396 return AMDGPU::AReg_224RegClassID;
2397 case 256:
2398 return AMDGPU::AReg_256RegClassID;
2399 case 288:
2400 return AMDGPU::AReg_288RegClassID;
2401 case 320:
2402 return AMDGPU::AReg_320RegClassID;
2403 case 352:
2404 return AMDGPU::AReg_352RegClassID;
2405 case 384:
2406 return AMDGPU::AReg_384RegClassID;
2407 case 512:
2408 return AMDGPU::AReg_512RegClassID;
2409 case 1024:
2410 return AMDGPU::AReg_1024RegClassID;
2411 }
2412 }
2413 return -1;
2414}
2415
2418 .Case("exec", AMDGPU::EXEC)
2419 .Case("vcc", AMDGPU::VCC)
2420 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2421 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2422 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2423 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2424 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2425 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2426 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2427 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2428 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2429 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2430 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2431 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2432 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2433 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2434 .Case("m0", AMDGPU::M0)
2435 .Case("vccz", AMDGPU::SRC_VCCZ)
2436 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2437 .Case("execz", AMDGPU::SRC_EXECZ)
2438 .Case("src_execz", AMDGPU::SRC_EXECZ)
2439 .Case("scc", AMDGPU::SRC_SCC)
2440 .Case("src_scc", AMDGPU::SRC_SCC)
2441 .Case("tba", AMDGPU::TBA)
2442 .Case("tma", AMDGPU::TMA)
2443 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2444 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2445 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2446 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2447 .Case("vcc_lo", AMDGPU::VCC_LO)
2448 .Case("vcc_hi", AMDGPU::VCC_HI)
2449 .Case("exec_lo", AMDGPU::EXEC_LO)
2450 .Case("exec_hi", AMDGPU::EXEC_HI)
2451 .Case("tma_lo", AMDGPU::TMA_LO)
2452 .Case("tma_hi", AMDGPU::TMA_HI)
2453 .Case("tba_lo", AMDGPU::TBA_LO)
2454 .Case("tba_hi", AMDGPU::TBA_HI)
2455 .Case("pc", AMDGPU::PC_REG)
2456 .Case("null", AMDGPU::SGPR_NULL)
2457 .Default(AMDGPU::NoRegister);
2458}
2459
2460bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2461 SMLoc &EndLoc, bool RestoreOnFailure) {
2462 auto R = parseRegister();
2463 if (!R) return true;
2464 assert(R->isReg());
2465 RegNo = R->getReg();
2466 StartLoc = R->getStartLoc();
2467 EndLoc = R->getEndLoc();
2468 return false;
2469}
2470
2471bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2472 SMLoc &EndLoc) {
2473 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2474}
2475
2476ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2477 SMLoc &EndLoc) {
2478 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2479 bool PendingErrors = getParser().hasPendingError();
2480 getParser().clearPendingErrors();
2481 if (PendingErrors)
2482 return ParseStatus::Failure;
2483 if (Result)
2484 return ParseStatus::NoMatch;
2485 return ParseStatus::Success;
2486}
2487
2488bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2489 RegisterKind RegKind, unsigned Reg1,
2490 SMLoc Loc) {
2491 switch (RegKind) {
2492 case IS_SPECIAL:
2493 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2494 Reg = AMDGPU::EXEC;
2495 RegWidth = 64;
2496 return true;
2497 }
2498 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2499 Reg = AMDGPU::FLAT_SCR;
2500 RegWidth = 64;
2501 return true;
2502 }
2503 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2504 Reg = AMDGPU::XNACK_MASK;
2505 RegWidth = 64;
2506 return true;
2507 }
2508 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2509 Reg = AMDGPU::VCC;
2510 RegWidth = 64;
2511 return true;
2512 }
2513 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2514 Reg = AMDGPU::TBA;
2515 RegWidth = 64;
2516 return true;
2517 }
2518 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2519 Reg = AMDGPU::TMA;
2520 RegWidth = 64;
2521 return true;
2522 }
2523 Error(Loc, "register does not fit in the list");
2524 return false;
2525 case IS_VGPR:
2526 case IS_SGPR:
2527 case IS_AGPR:
2528 case IS_TTMP:
2529 if (Reg1 != Reg + RegWidth / 32) {
2530 Error(Loc, "registers in a list must have consecutive indices");
2531 return false;
2532 }
2533 RegWidth += 32;
2534 return true;
2535 default:
2536 llvm_unreachable("unexpected register kind");
2537 }
2538}
2539
2540struct RegInfo {
2542 RegisterKind Kind;
2543};
2544
2545static constexpr RegInfo RegularRegisters[] = {
2546 {{"v"}, IS_VGPR},
2547 {{"s"}, IS_SGPR},
2548 {{"ttmp"}, IS_TTMP},
2549 {{"acc"}, IS_AGPR},
2550 {{"a"}, IS_AGPR},
2551};
2552
2553static bool isRegularReg(RegisterKind Kind) {
2554 return Kind == IS_VGPR ||
2555 Kind == IS_SGPR ||
2556 Kind == IS_TTMP ||
2557 Kind == IS_AGPR;
2558}
2559
2561 for (const RegInfo &Reg : RegularRegisters)
2562 if (Str.startswith(Reg.Name))
2563 return &Reg;
2564 return nullptr;
2565}
2566
2567static bool getRegNum(StringRef Str, unsigned& Num) {
2568 return !Str.getAsInteger(10, Num);
2569}
2570
2571bool
2572AMDGPUAsmParser::isRegister(const AsmToken &Token,
2573 const AsmToken &NextToken) const {
2574
2575 // A list of consecutive registers: [s0,s1,s2,s3]
2576 if (Token.is(AsmToken::LBrac))
2577 return true;
2578
2579 if (!Token.is(AsmToken::Identifier))
2580 return false;
2581
2582 // A single register like s0 or a range of registers like s[0:1]
2583
2584 StringRef Str = Token.getString();
2585 const RegInfo *Reg = getRegularRegInfo(Str);
2586 if (Reg) {
2587 StringRef RegName = Reg->Name;
2588 StringRef RegSuffix = Str.substr(RegName.size());
2589 if (!RegSuffix.empty()) {
2590 unsigned Num;
2591 // A single register with an index: rXX
2592 if (getRegNum(RegSuffix, Num))
2593 return true;
2594 } else {
2595 // A range of registers: r[XX:YY].
2596 if (NextToken.is(AsmToken::LBrac))
2597 return true;
2598 }
2599 }
2600
2601 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2602}
2603
2604bool
2605AMDGPUAsmParser::isRegister()
2606{
2607 return isRegister(getToken(), peekToken());
2608}
2609
2610unsigned
2611AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2612 unsigned RegNum,
2613 unsigned RegWidth,
2614 SMLoc Loc) {
2615
2616 assert(isRegularReg(RegKind));
2617
2618 unsigned AlignSize = 1;
2619 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2620 // SGPR and TTMP registers must be aligned.
2621 // Max required alignment is 4 dwords.
2622 AlignSize = std::min(RegWidth / 32, 4u);
2623 }
2624
2625 if (RegNum % AlignSize != 0) {
2626 Error(Loc, "invalid register alignment");
2627 return AMDGPU::NoRegister;
2628 }
2629
2630 unsigned RegIdx = RegNum / AlignSize;
2631 int RCID = getRegClass(RegKind, RegWidth);
2632 if (RCID == -1) {
2633 Error(Loc, "invalid or unsupported register size");
2634 return AMDGPU::NoRegister;
2635 }
2636
2637 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2638 const MCRegisterClass RC = TRI->getRegClass(RCID);
2639 if (RegIdx >= RC.getNumRegs()) {
2640 Error(Loc, "register index is out of range");
2641 return AMDGPU::NoRegister;
2642 }
2643
2644 return RC.getRegister(RegIdx);
2645}
2646
2647bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2648 int64_t RegLo, RegHi;
2649 if (!skipToken(AsmToken::LBrac, "missing register index"))
2650 return false;
2651
2652 SMLoc FirstIdxLoc = getLoc();
2653 SMLoc SecondIdxLoc;
2654
2655 if (!parseExpr(RegLo))
2656 return false;
2657
2658 if (trySkipToken(AsmToken::Colon)) {
2659 SecondIdxLoc = getLoc();
2660 if (!parseExpr(RegHi))
2661 return false;
2662 } else {
2663 RegHi = RegLo;
2664 }
2665
2666 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2667 return false;
2668
2669 if (!isUInt<32>(RegLo)) {
2670 Error(FirstIdxLoc, "invalid register index");
2671 return false;
2672 }
2673
2674 if (!isUInt<32>(RegHi)) {
2675 Error(SecondIdxLoc, "invalid register index");
2676 return false;
2677 }
2678
2679 if (RegLo > RegHi) {
2680 Error(FirstIdxLoc, "first register index should not exceed second index");
2681 return false;
2682 }
2683
2684 Num = static_cast<unsigned>(RegLo);
2685 RegWidth = 32 * ((RegHi - RegLo) + 1);
2686 return true;
2687}
2688
2689unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2690 unsigned &RegNum, unsigned &RegWidth,
2691 SmallVectorImpl<AsmToken> &Tokens) {
2692 assert(isToken(AsmToken::Identifier));
2693 unsigned Reg = getSpecialRegForName(getTokenStr());
2694 if (Reg) {
2695 RegNum = 0;
2696 RegWidth = 32;
2697 RegKind = IS_SPECIAL;
2698 Tokens.push_back(getToken());
2699 lex(); // skip register name
2700 }
2701 return Reg;
2702}
2703
2704unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2705 unsigned &RegNum, unsigned &RegWidth,
2706 SmallVectorImpl<AsmToken> &Tokens) {
2707 assert(isToken(AsmToken::Identifier));
2708 StringRef RegName = getTokenStr();
2709 auto Loc = getLoc();
2710
2711 const RegInfo *RI = getRegularRegInfo(RegName);
2712 if (!RI) {
2713 Error(Loc, "invalid register name");
2714 return AMDGPU::NoRegister;
2715 }
2716
2717 Tokens.push_back(getToken());
2718 lex(); // skip register name
2719
2720 RegKind = RI->Kind;
2721 StringRef RegSuffix = RegName.substr(RI->Name.size());
2722 if (!RegSuffix.empty()) {
2723 // Single 32-bit register: vXX.
2724 if (!getRegNum(RegSuffix, RegNum)) {
2725 Error(Loc, "invalid register index");
2726 return AMDGPU::NoRegister;
2727 }
2728 RegWidth = 32;
2729 } else {
2730 // Range of registers: v[XX:YY]. ":YY" is optional.
2731 if (!ParseRegRange(RegNum, RegWidth))
2732 return AMDGPU::NoRegister;
2733 }
2734
2735 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2736}
2737
2738unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2739 unsigned &RegWidth,
2740 SmallVectorImpl<AsmToken> &Tokens) {
2741 unsigned Reg = AMDGPU::NoRegister;
2742 auto ListLoc = getLoc();
2743
2744 if (!skipToken(AsmToken::LBrac,
2745 "expected a register or a list of registers")) {
2746 return AMDGPU::NoRegister;
2747 }
2748
2749 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2750
2751 auto Loc = getLoc();
2752 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2753 return AMDGPU::NoRegister;
2754 if (RegWidth != 32) {
2755 Error(Loc, "expected a single 32-bit register");
2756 return AMDGPU::NoRegister;
2757 }
2758
2759 for (; trySkipToken(AsmToken::Comma); ) {
2760 RegisterKind NextRegKind;
2761 unsigned NextReg, NextRegNum, NextRegWidth;
2762 Loc = getLoc();
2763
2764 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2765 NextRegNum, NextRegWidth,
2766 Tokens)) {
2767 return AMDGPU::NoRegister;
2768 }
2769 if (NextRegWidth != 32) {
2770 Error(Loc, "expected a single 32-bit register");
2771 return AMDGPU::NoRegister;
2772 }
2773 if (NextRegKind != RegKind) {
2774 Error(Loc, "registers in a list must be of the same kind");
2775 return AMDGPU::NoRegister;
2776 }
2777 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2778 return AMDGPU::NoRegister;
2779 }
2780
2781 if (!skipToken(AsmToken::RBrac,
2782 "expected a comma or a closing square bracket")) {
2783 return AMDGPU::NoRegister;
2784 }
2785
2786 if (isRegularReg(RegKind))
2787 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2788
2789 return Reg;
2790}
2791
2792bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2793 unsigned &RegNum, unsigned &RegWidth,
2794 SmallVectorImpl<AsmToken> &Tokens) {
2795 auto Loc = getLoc();
2796 Reg = AMDGPU::NoRegister;
2797
2798 if (isToken(AsmToken::Identifier)) {
2799 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2800 if (Reg == AMDGPU::NoRegister)
2801 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2802 } else {
2803 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2804 }
2805
2806 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2807 if (Reg == AMDGPU::NoRegister) {
2808 assert(Parser.hasPendingError());
2809 return false;
2810 }
2811
2812 if (!subtargetHasRegister(*TRI, Reg)) {
2813 if (Reg == AMDGPU::SGPR_NULL) {
2814 Error(Loc, "'null' operand is not supported on this GPU");
2815 } else {
2816 Error(Loc, "register not available on this GPU");
2817 }
2818 return false;
2819 }
2820
2821 return true;
2822}
2823
2824bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2825 unsigned &RegNum, unsigned &RegWidth,
2826 bool RestoreOnFailure /*=false*/) {
2827 Reg = AMDGPU::NoRegister;
2828
2830 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2831 if (RestoreOnFailure) {
2832 while (!Tokens.empty()) {
2833 getLexer().UnLex(Tokens.pop_back_val());
2834 }
2835 }
2836 return true;
2837 }
2838 return false;
2839}
2840
2841std::optional<StringRef>
2842AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2843 switch (RegKind) {
2844 case IS_VGPR:
2845 return StringRef(".amdgcn.next_free_vgpr");
2846 case IS_SGPR:
2847 return StringRef(".amdgcn.next_free_sgpr");
2848 default:
2849 return std::nullopt;
2850 }
2851}
2852
2853void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2854 auto SymbolName = getGprCountSymbolName(RegKind);
2855 assert(SymbolName && "initializing invalid register kind");
2856 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2857 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2858}
2859
2860bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2861 unsigned DwordRegIndex,
2862 unsigned RegWidth) {
2863 // Symbols are only defined for GCN targets
2864 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2865 return true;
2866
2867 auto SymbolName = getGprCountSymbolName(RegKind);
2868 if (!SymbolName)
2869 return true;
2870 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2871
2872 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2873 int64_t OldCount;
2874
2875 if (!Sym->isVariable())
2876 return !Error(getLoc(),
2877 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2878 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2879 return !Error(
2880 getLoc(),
2881 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2882
2883 if (OldCount <= NewMax)
2884 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2885
2886 return true;
2887}
2888
2889std::unique_ptr<AMDGPUOperand>
2890AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2891 const auto &Tok = getToken();
2892 SMLoc StartLoc = Tok.getLoc();
2893 SMLoc EndLoc = Tok.getEndLoc();
2894 RegisterKind RegKind;
2895 unsigned Reg, RegNum, RegWidth;
2896
2897 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2898 return nullptr;
2899 }
2900 if (isHsaAbi(getSTI())) {
2901 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2902 return nullptr;
2903 } else
2904 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2905 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2906}
2907
2908ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2909 bool HasSP3AbsModifier) {
2910 // TODO: add syntactic sugar for 1/(2*PI)
2911
2912 if (isRegister())
2913 return ParseStatus::NoMatch;
2914 assert(!isModifier());
2915
2916 const auto& Tok = getToken();
2917 const auto& NextTok = peekToken();
2918 bool IsReal = Tok.is(AsmToken::Real);
2919 SMLoc S = getLoc();
2920 bool Negate = false;
2921
2922 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2923 lex();
2924 IsReal = true;
2925 Negate = true;
2926 }
2927
2928 if (IsReal) {
2929 // Floating-point expressions are not supported.
2930 // Can only allow floating-point literals with an
2931 // optional sign.
2932
2933 StringRef Num = getTokenStr();
2934 lex();
2935
2936 APFloat RealVal(APFloat::IEEEdouble());
2937 auto roundMode = APFloat::rmNearestTiesToEven;
2938 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2939 return ParseStatus::Failure;
2940 if (Negate)
2941 RealVal.changeSign();
2942
2943 Operands.push_back(
2944 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2945 AMDGPUOperand::ImmTyNone, true));
2946
2947 return ParseStatus::Success;
2948
2949 } else {
2950 int64_t IntVal;
2951 const MCExpr *Expr;
2952 SMLoc S = getLoc();
2953
2954 if (HasSP3AbsModifier) {
2955 // This is a workaround for handling expressions
2956 // as arguments of SP3 'abs' modifier, for example:
2957 // |1.0|
2958 // |-1|
2959 // |1+x|
2960 // This syntax is not compatible with syntax of standard
2961 // MC expressions (due to the trailing '|').
2962 SMLoc EndLoc;
2963 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2964 return ParseStatus::Failure;
2965 } else {
2966 if (Parser.parseExpression(Expr))
2967 return ParseStatus::Failure;
2968 }
2969
2970 if (Expr->evaluateAsAbsolute(IntVal)) {
2971 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2972 } else {
2973 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2974 }
2975
2976 return ParseStatus::Success;
2977 }
2978
2979 return ParseStatus::NoMatch;
2980}
2981
2982ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2983 if (!isRegister())
2984 return ParseStatus::NoMatch;
2985
2986 if (auto R = parseRegister()) {
2987 assert(R->isReg());
2988 Operands.push_back(std::move(R));
2989 return ParseStatus::Success;
2990 }
2991 return ParseStatus::Failure;
2992}
2993
2994ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
2995 bool HasSP3AbsMod) {
2996 ParseStatus Res = parseReg(Operands);
2997 if (!Res.isNoMatch())
2998 return Res;
2999 if (isModifier())
3000 return ParseStatus::NoMatch;
3001 return parseImm(Operands, HasSP3AbsMod);
3002}
3003
3004bool
3005AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3006 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3007 const auto &str = Token.getString();
3008 return str == "abs" || str == "neg" || str == "sext";
3009 }
3010 return false;
3011}
3012
3013bool
3014AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3015 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3016}
3017
3018bool
3019AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3020 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3021}
3022
3023bool
3024AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3025 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3026}
3027
3028// Check if this is an operand modifier or an opcode modifier
3029// which may look like an expression but it is not. We should
3030// avoid parsing these modifiers as expressions. Currently
3031// recognized sequences are:
3032// |...|
3033// abs(...)
3034// neg(...)
3035// sext(...)
3036// -reg
3037// -|...|
3038// -abs(...)
3039// name:...
3040//
3041bool
3042AMDGPUAsmParser::isModifier() {
3043
3044 AsmToken Tok = getToken();
3045 AsmToken NextToken[2];
3046 peekTokens(NextToken);
3047
3048 return isOperandModifier(Tok, NextToken[0]) ||
3049 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3050 isOpcodeModifierWithVal(Tok, NextToken[0]);
3051}
3052
3053// Check if the current token is an SP3 'neg' modifier.
3054// Currently this modifier is allowed in the following context:
3055//
3056// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3057// 2. Before an 'abs' modifier: -abs(...)
3058// 3. Before an SP3 'abs' modifier: -|...|
3059//
3060// In all other cases "-" is handled as a part
3061// of an expression that follows the sign.
3062//
3063// Note: When "-" is followed by an integer literal,
3064// this is interpreted as integer negation rather
3065// than a floating-point NEG modifier applied to N.
3066// Beside being contr-intuitive, such use of floating-point
3067// NEG modifier would have resulted in different meaning
3068// of integer literals used with VOP1/2/C and VOP3,
3069// for example:
3070// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3071// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3072// Negative fp literals with preceding "-" are
3073// handled likewise for uniformity
3074//
3075bool
3076AMDGPUAsmParser::parseSP3NegModifier() {
3077
3078 AsmToken NextToken[2];
3079 peekTokens(NextToken);
3080
3081 if (isToken(AsmToken::Minus) &&
3082 (isRegister(NextToken[0], NextToken[1]) ||
3083 NextToken[0].is(AsmToken::Pipe) ||
3084 isId(NextToken[0], "abs"))) {
3085 lex();
3086 return true;
3087 }
3088
3089 return false;
3090}
3091
3093AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3094 bool AllowImm) {
3095 bool Neg, SP3Neg;
3096 bool Abs, SP3Abs;
3097 SMLoc Loc;
3098
3099 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3100 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3101 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3102
3103 SP3Neg = parseSP3NegModifier();
3104
3105 Loc = getLoc();
3106 Neg = trySkipId("neg");
3107 if (Neg && SP3Neg)
3108 return Error(Loc, "expected register or immediate");
3109 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3110 return ParseStatus::Failure;
3111
3112 Abs = trySkipId("abs");
3113 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3114 return ParseStatus::Failure;
3115
3116 Loc = getLoc();
3117 SP3Abs = trySkipToken(AsmToken::Pipe);
3118 if (Abs && SP3Abs)
3119 return Error(Loc, "expected register or immediate");
3120
3121 ParseStatus Res;
3122 if (AllowImm) {
3123 Res = parseRegOrImm(Operands, SP3Abs);
3124 } else {
3125 Res = parseReg(Operands);
3126 }
3127 if (!Res.isSuccess())
3128 return (SP3Neg || Neg || SP3Abs || Abs) ? ParseStatus::Failure : Res;
3129
3130 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3131 return ParseStatus::Failure;
3132 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3133 return ParseStatus::Failure;
3134 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3135 return ParseStatus::Failure;
3136
3137 AMDGPUOperand::Modifiers Mods;
3138 Mods.Abs = Abs || SP3Abs;
3139 Mods.Neg = Neg || SP3Neg;
3140
3141 if (Mods.hasFPModifiers()) {
3142 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3143 if (Op.isExpr())
3144 return Error(Op.getStartLoc(), "expected an absolute expression");
3145 Op.setModifiers(Mods);
3146 }
3147 return ParseStatus::Success;
3148}
3149
3151AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3152 bool AllowImm) {
3153 bool Sext = trySkipId("sext");
3154 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3155 return ParseStatus::Failure;
3156
3157 ParseStatus Res;
3158 if (AllowImm) {
3159 Res = parseRegOrImm(Operands);
3160 } else {
3161 Res = parseReg(Operands);
3162 }
3163 if (!Res.isSuccess())
3164 return Sext ? ParseStatus::Failure : Res;
3165
3166 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3167 return ParseStatus::Failure;
3168
3169 AMDGPUOperand::Modifiers Mods;
3170 Mods.Sext = Sext;
3171
3172 if (Mods.hasIntModifiers()) {
3173 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3174 if (Op.isExpr())
3175 return Error(Op.getStartLoc(), "expected an absolute expression");
3176 Op.setModifiers(Mods);
3177 }
3178
3179 return ParseStatus::Success;
3180}
3181
3182ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3183 return parseRegOrImmWithFPInputMods(Operands, false);
3184}
3185
3186ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3187 return parseRegOrImmWithIntInputMods(Operands, false);
3188}
3189
3190ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3191 auto Loc = getLoc();
3192 if (trySkipId("off")) {
3193 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3194 AMDGPUOperand::ImmTyOff, false));
3195 return ParseStatus::Success;
3196 }
3197
3198 if (!isRegister())
3199 return ParseStatus::NoMatch;
3200
3201 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3202 if (Reg) {
3203 Operands.push_back(std::move(Reg));
3204 return ParseStatus::Success;
3205 }
3206
3207 return ParseStatus::Failure;
3208}
3209
3210unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3211 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3212
3213 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3214 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3215 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3216 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3217 return Match_InvalidOperand;
3218
3219 if ((TSFlags & SIInstrFlags::VOP3) &&
3221 getForcedEncodingSize() != 64)
3222 return Match_PreferE32;
3223
3224 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3225 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3226 // v_mac_f32/16 allow only dst_sel == DWORD;
3227 auto OpNum =
3228 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3229 const auto &Op = Inst.getOperand(OpNum);
3230 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3231 return Match_InvalidOperand;
3232 }
3233 }
3234
3235 return Match_Success;
3236}
3237
3239 static const unsigned Variants[] = {
3243 };
3244
3245 return ArrayRef(Variants);
3246}
3247
3248// What asm variants we should check
3249ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3250 if (isForcedDPP() && isForcedVOP3()) {
3251 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3252 return ArrayRef(Variants);
3253 }
3254 if (getForcedEncodingSize() == 32) {
3255 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3256 return ArrayRef(Variants);
3257 }
3258
3259 if (isForcedVOP3()) {
3260 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3261 return ArrayRef(Variants);
3262 }
3263
3264 if (isForcedSDWA()) {
3265 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3267 return ArrayRef(Variants);
3268 }
3269
3270 if (isForcedDPP()) {
3271 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3272 return ArrayRef(Variants);
3273 }
3274
3275 return getAllVariants();
3276}
3277
3278StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3279 if (isForcedDPP() && isForcedVOP3())
3280 return "e64_dpp";
3281
3282 if (getForcedEncodingSize() == 32)
3283 return "e32";
3284
3285 if (isForcedVOP3())
3286 return "e64";
3287
3288 if (isForcedSDWA())
3289 return "sdwa";
3290
3291 if (isForcedDPP())
3292 return "dpp";
3293
3294 return "";
3295}
3296
3297unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3298 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3299 for (MCPhysReg Reg : Desc.implicit_uses()) {
3300 switch (Reg) {
3301 case AMDGPU::FLAT_SCR:
3302 case AMDGPU::VCC:
3303 case AMDGPU::VCC_LO:
3304 case AMDGPU::VCC_HI:
3305 case AMDGPU::M0:
3306 return Reg;
3307 default:
3308 break;
3309 }
3310 }
3311 return AMDGPU::NoRegister;
3312}
3313
3314// NB: This code is correct only when used to check constant
3315// bus limitations because GFX7 support no f16 inline constants.
3316// Note that there are no cases when a GFX7 opcode violates
3317// constant bus limitations due to the use of an f16 constant.
3318bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3319 unsigned OpIdx) const {
3320 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3321
3322 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3323 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3324 return false;
3325 }
3326
3327 const MCOperand &MO = Inst.getOperand(OpIdx);
3328
3329 int64_t Val = MO.getImm();
3330 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3331
3332 switch (OpSize) { // expected operand size
3333 case 8:
3334 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3335 case 4:
3336 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3337 case 2: {
3338 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3343
3348
3352 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3353
3354 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3355 }
3356 default:
3357 llvm_unreachable("invalid operand size");
3358 }
3359}
3360
3361unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3362 if (!isGFX10Plus())
3363 return 1;
3364
3365 switch (Opcode) {
3366 // 64-bit shift instructions can use only one scalar value input
3367 case AMDGPU::V_LSHLREV_B64_e64:
3368 case AMDGPU::V_LSHLREV_B64_gfx10:
3369 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3370 case AMDGPU::V_LSHRREV_B64_e64:
3371 case AMDGPU::V_LSHRREV_B64_gfx10:
3372 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3373 case AMDGPU::V_ASHRREV_I64_e64:
3374 case AMDGPU::V_ASHRREV_I64_gfx10:
3375 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3376 case AMDGPU::V_LSHL_B64_e64:
3377 case AMDGPU::V_LSHR_B64_e64:
3378 case AMDGPU::V_ASHR_I64_e64:
3379 return 1;
3380 default:
3381 return 2;
3382 }
3383}
3384
3385constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3387
3388// Get regular operand indices in the same order as specified
3389// in the instruction (but append mandatory literals to the end).
3391 bool AddMandatoryLiterals = false) {
3392
3393 int16_t ImmIdx =
3394 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3395
3396 if (isVOPD(Opcode)) {
3397 int16_t ImmDeferredIdx =
3398 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3399 : -1;
3400
3401 return {getNamedOperandIdx(Opcode, OpName::src0X),
3402 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3403 getNamedOperandIdx(Opcode, OpName::src0Y),
3404 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3405 ImmDeferredIdx,
3406 ImmIdx};
3407 }
3408
3409 return {getNamedOperandIdx(Opcode, OpName::src0),
3410 getNamedOperandIdx(Opcode, OpName::src1),
3411 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3412}
3413
3414bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3415 const MCOperand &MO = Inst.getOperand(OpIdx);
3416 if (MO.isImm()) {
3417 return !isInlineConstant(Inst, OpIdx);
3418 } else if (MO.isReg()) {
3419 auto Reg = MO.getReg();
3420 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3421 auto PReg = mc2PseudoReg(Reg);
3422 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3423 } else {
3424 return true;
3425 }
3426}
3427
3428bool AMDGPUAsmParser::validateConstantBusLimitations(
3429 const MCInst &Inst, const OperandVector &Operands) {
3430 const unsigned Opcode = Inst.getOpcode();
3431 const MCInstrDesc &Desc = MII.get(Opcode);
3432 unsigned LastSGPR = AMDGPU::NoRegister;
3433 unsigned ConstantBusUseCount = 0;
3434 unsigned NumLiterals = 0;
3435 unsigned LiteralSize;
3436
3437 if (!(Desc.TSFlags &
3440 !isVOPD(Opcode))
3441 return true;
3442
3443 // Check special imm operands (used by madmk, etc)
3444 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3445 ++NumLiterals;
3446 LiteralSize = 4;
3447 }
3448
3449 SmallDenseSet<unsigned> SGPRsUsed;
3450 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3451 if (SGPRUsed != AMDGPU::NoRegister) {
3452 SGPRsUsed.insert(SGPRUsed);
3453 ++ConstantBusUseCount;
3454 }
3455
3456 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3457
3458 for (int OpIdx : OpIndices) {
3459 if (OpIdx == -1)
3460 continue;
3461
3462 const MCOperand &MO = Inst.getOperand(OpIdx);
3463 if (usesConstantBus(Inst, OpIdx)) {
3464 if (MO.isReg()) {
3465 LastSGPR = mc2PseudoReg(MO.getReg());
3466 // Pairs of registers with a partial intersections like these
3467 // s0, s[0:1]
3468 // flat_scratch_lo, flat_scratch
3469 // flat_scratch_lo, flat_scratch_hi
3470 // are theoretically valid but they are disabled anyway.
3471 // Note that this code mimics SIInstrInfo::verifyInstruction
3472 if (SGPRsUsed.insert(LastSGPR).second) {
3473 ++ConstantBusUseCount;
3474 }
3475 } else { // Expression or a literal
3476
3477 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3478 continue; // special operand like VINTERP attr_chan
3479
3480 // An instruction may use only one literal.
3481 // This has been validated on the previous step.
3482 // See validateVOPLiteral.
3483 // This literal may be used as more than one operand.
3484 // If all these operands are of the same size,
3485 // this literal counts as one scalar value.
3486 // Otherwise it counts as 2 scalar values.
3487 // See "GFX10 Shader Programming", section 3.6.2.3.
3488
3489 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3490 if (Size < 4)
3491 Size = 4;
3492
3493 if (NumLiterals == 0) {
3494 NumLiterals = 1;
3495 LiteralSize = Size;
3496 } else if (LiteralSize != Size) {
3497 NumLiterals = 2;
3498 }
3499 }
3500 }
3501 }
3502 ConstantBusUseCount += NumLiterals;
3503
3504 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3505 return true;
3506
3507 SMLoc LitLoc = getLitLoc(Operands);
3508 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3509 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3510 Error(Loc, "invalid operand (violates constant bus restrictions)");
3511 return false;
3512}
3513
3514bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3515 const MCInst &Inst, const OperandVector &Operands) {
3516
3517 const unsigned Opcode = Inst.getOpcode();
3518 if (!isVOPD(Opcode))
3519 return true;
3520
3521 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3522
3523 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3524 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3525 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3526 ? Opr.getReg()
3528 };
3529
3530 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3531 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3532 if (!InvalidCompOprIdx)
3533 return true;
3534
3535 auto CompOprIdx = *InvalidCompOprIdx;
3536 auto ParsedIdx =
3537 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3538 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3539 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3540
3541 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3542 if (CompOprIdx == VOPD::Component::DST) {
3543 Error(Loc, "one dst register must be even and the other odd");
3544 } else {
3545 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3546 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3547 " operands must use different VGPR banks");
3548 }
3549
3550 return false;
3551}
3552
3553bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3554
3555 const unsigned Opc = Inst.getOpcode();
3556 const MCInstrDesc &Desc = MII.get(Opc);
3557
3558 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3559 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3560 assert(ClampIdx != -1);
3561 return Inst.getOperand(ClampIdx).getImm() == 0;
3562 }
3563
3564 return true;
3565}
3566
3567bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3568 const SMLoc &IDLoc) {
3569
3570 const unsigned Opc = Inst.getOpcode();
3571 const MCInstrDesc &Desc = MII.get(Opc);
3572
3573 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3574 return true;
3575
3576 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3577 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3578 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3579
3580 assert(VDataIdx != -1);
3581
3582 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3583 return true;
3584
3585 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3586 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3587 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3588 if (DMask == 0)
3589 DMask = 1;
3590
3591 bool IsPackedD16 = false;
3592 unsigned DataSize =
3593 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3594 if (hasPackedD16()) {
3595 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3596 IsPackedD16 = D16Idx >= 0;
3597 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3598 DataSize = (DataSize + 1) / 2;
3599 }
3600
3601 if ((VDataSize / 4) == DataSize + TFESize)
3602 return true;
3603
3604 StringRef Modifiers;
3605 if (isGFX90A())
3606 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3607 else
3608 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3609
3610 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3611 return false;
3612}
3613
3614bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3615 const SMLoc &IDLoc) {
3616 const unsigned Opc = Inst.getOpcode();
3617 const MCInstrDesc &Desc = MII.get(Opc);
3618
3619 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3620 return true;
3621
3623
3624 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3626 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3627 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3628 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3629 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3630
3631 assert(VAddr0Idx != -1);
3632 assert(SrsrcIdx != -1);
3633 assert(SrsrcIdx > VAddr0Idx);
3634
3635 bool IsA16 = Inst.getOperand(A16Idx).getImm();
3636 if (BaseOpcode->BVH) {
3637 if (IsA16 == BaseOpcode->A16)
3638 return true;
3639 Error(IDLoc, "image address size does not match a16");
3640 return false;
3641 }
3642
3643 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3645 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3646 unsigned ActualAddrSize =
3647 IsNSA ? SrsrcIdx - VAddr0Idx
3648 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3649
3650 unsigned ExpectedAddrSize =
3651 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3652
3653 if (IsNSA) {
3654 if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
3655 int VAddrLastIdx = SrsrcIdx - 1;
3656 unsigned VAddrLastSize =
3657 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3658
3659 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3660 }
3661 } else {
3662 if (ExpectedAddrSize > 12)
3663 ExpectedAddrSize = 16;
3664
3665 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3666 // This provides backward compatibility for assembly created
3667 // before 160b/192b/224b types were directly supported.
3668 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3669 return true;
3670 }
3671
3672 if (ActualAddrSize == ExpectedAddrSize)
3673 return true;
3674
3675 Error(IDLoc, "image address size does not match dim and a16");
3676 return false;
3677}
3678
3679bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3680
3681 const unsigned Opc = Inst.getOpcode();
3682 const MCInstrDesc &Desc = MII.get(Opc);
3683
3684 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3685 return true;
3686 if (!Desc.mayLoad() || !Desc.mayStore())
3687 return true; // Not atomic
3688
3689 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3690 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3691
3692 // This is an incomplete check because image_atomic_cmpswap
3693 // may only use 0x3 and 0xf while other atomic operations
3694 // may use 0x1 and 0x3. However these limitations are
3695 // verified when we check that dmask matches dst size.
3696 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3697}
3698
3699bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3700
3701 const unsigned Opc = Inst.getOpcode();
3702 const MCInstrDesc &Desc = MII.get(Opc);
3703
3704 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3705 return true;
3706
3707 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3708 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3709
3710 // GATHER4 instructions use dmask in a different fashion compared to
3711 // other MIMG instructions. The only useful DMASK values are
3712 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3713 // (red,red,red,red) etc.) The ISA document doesn't mention
3714 // this.
3715 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3716}
3717
3718bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3719 const unsigned Opc = Inst.getOpcode();
3720 const MCInstrDesc &Desc = MII.get(Opc);
3721
3722 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3723 return true;
3724
3726 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3728
3729 if (!BaseOpcode->MSAA)
3730 return true;
3731
3732 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3733 assert(DimIdx != -1);
3734
3735 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3737
3738 return DimInfo->MSAA;
3739}
3740
3741static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3742{
3743 switch (Opcode) {
3744 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3745 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3746 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3747 return true;
3748 default:
3749 return false;
3750 }
3751}
3752
3753// movrels* opcodes should only allow VGPRS as src0.
3754// This is specified in .td description for vop1/vop3,
3755// but sdwa is handled differently. See isSDWAOperand.
3756bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3757 const OperandVector &Operands) {
3758
3759 const unsigned Opc = Inst.getOpcode();
3760 const MCInstrDesc &Desc = MII.get(Opc);
3761
3762 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3763 return true;
3764
3765 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3766 assert(Src0Idx != -1);
3767
3768 SMLoc ErrLoc;
3769 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3770 if (Src0.isReg()) {
3771 auto Reg = mc2PseudoReg(Src0.getReg());
3772 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3773 if (!isSGPR(Reg, TRI))
3774 return true;
3775 ErrLoc = getRegLoc(Reg, Operands);
3776 } else {
3777 ErrLoc = getConstLoc(Operands);
3778 }
3779
3780 Error(ErrLoc, "source operand must be a VGPR");
3781 return false;
3782}
3783
3784bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3785 const OperandVector &Operands) {
3786
3787 const unsigned Opc = Inst.getOpcode();
3788
3789 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3790 return true;
3791
3792 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3793 assert(Src0Idx != -1);
3794
3795 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3796 if (!Src0.isReg())
3797 return true;
3798
3799 auto Reg = mc2PseudoReg(Src0.getReg());
3800 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3801 if (!isGFX90A() && isSGPR(Reg, TRI)) {
3802 Error(getRegLoc(Reg, Operands),
3803 "source operand must be either a VGPR or an inline constant");
3804 return false;
3805 }
3806
3807 return true;
3808}
3809
3810bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3811 const OperandVector &Operands) {
3812 unsigned Opcode = Inst.getOpcode();
3813 const MCInstrDesc &Desc = MII.get(Opcode);
3814
3815 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3816 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3817 return true;
3818
3819 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3820 if (Src2Idx == -1)
3821 return true;
3822
3823 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3824 Error(getConstLoc(Operands),
3825 "inline constants are not allowed for this operand");
3826 return false;
3827 }
3828
3829 return true;
3830}
3831
3832bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3833 const OperandVector &Operands) {
3834 const unsigned Opc = Inst.getOpcode();
3835 const MCInstrDesc &Desc = MII.get(Opc);
3836
3837 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3838 return true;
3839
3840 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3841 if (Src2Idx == -1)
3842 return true;
3843
3844 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3845 if (!Src2.isReg())
3846 return true;
3847
3848 MCRegister Src2Reg = Src2.getReg();
3849 MCRegister DstReg = Inst.getOperand(0).getReg();
3850 if (Src2Reg == DstReg)
3851 return true;
3852
3853 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3854 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3855 return true;
3856
3857 if (TRI->regsOverlap(Src2Reg, DstReg)) {
3858 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3859 "source 2 operand must not partially overlap with dst");
3860 return false;
3861 }
3862
3863 return true;
3864}
3865
3866bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3867 switch (Inst.getOpcode()) {
3868 default:
3869 return true;
3870 case V_DIV_SCALE_F32_gfx6_gfx7:
3871 case V_DIV_SCALE_F32_vi:
3872 case V_DIV_SCALE_F32_gfx10:
3873 case V_DIV_SCALE_F64_gfx6_gfx7:
3874 case V_DIV_SCALE_F64_vi:
3875 case V_DIV_SCALE_F64_gfx10:
3876 break;
3877 }
3878
3879 // TODO: Check that src0 = src1 or src2.
3880
3881 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3882 AMDGPU::OpName::src2_modifiers,
3883 AMDGPU::OpName::src2_modifiers}) {
3885 .getImm() &
3887 return false;
3888 }
3889 }
3890
3891 return true;
3892}
3893
3894bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3895
3896 const unsigned Opc = Inst.getOpcode();
3897 const MCInstrDesc &Desc = MII.get(Opc);
3898
3899 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3900 return true;
3901
3902 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3903 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3904 if (isCI() || isSI())
3905 return false;
3906 }
3907
3908 return true;
3909}
3910
3911static bool IsRevOpcode(const unsigned Opcode)
3912{
3913 switch (Opcode) {
3914 case AMDGPU::V_SUBREV_F32_e32:
3915 case AMDGPU::V_SUBREV_F32_e64:
3916 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3917 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3918 case AMDGPU::V_SUBREV_F32_e32_vi:
3919 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3920 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3921 case AMDGPU::V_SUBREV_F32_e64_vi:
3922
3923 case AMDGPU::V_SUBREV_CO_U32_e32:
3924 case AMDGPU::V_SUBREV_CO_U32_e64:
3925 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3926 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3927
3928 case AMDGPU::V_SUBBREV_U32_e32:
3929 case AMDGPU::V_SUBBREV_U32_e64:
3930 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3931 case AMDGPU::V_SUBBREV_U32_e32_vi:
3932 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3933 case AMDGPU::V_SUBBREV_U32_e64_vi:
3934
3935 case AMDGPU::V_SUBREV_U32_e32:
3936 case AMDGPU::V_SUBREV_U32_e64:
3937 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3938 case AMDGPU::V_SUBREV_U32_e32_vi:
3939 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3940 case AMDGPU::V_SUBREV_U32_e64_vi:
3941
3942 case AMDGPU::V_SUBREV_F16_e32:
3943 case AMDGPU::V_SUBREV_F16_e64:
3944 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3945 case AMDGPU::V_SUBREV_F16_e32_vi:
3946 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3947 case AMDGPU::V_SUBREV_F16_e64_vi:
3948
3949 case AMDGPU::V_SUBREV_U16_e32:
3950 case AMDGPU::V_SUBREV_U16_e64:
3951 case AMDGPU::V_SUBREV_U16_e32_vi:
3952 case AMDGPU::V_SUBREV_U16_e64_vi:
3953
3954 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3955 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3956 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3957
3958 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3959 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3960
3961 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3962 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3963
3964 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3965 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3966
3967 case AMDGPU::V_LSHRREV_B32_e32:
3968 case AMDGPU::V_LSHRREV_B32_e64:
3969 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3970 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3971 case AMDGPU::V_LSHRREV_B32_e32_vi:
3972 case AMDGPU::V_LSHRREV_B32_e64_vi:
3973 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3974 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3975
3976 case AMDGPU::V_ASHRREV_I32_e32:
3977 case AMDGPU::V_ASHRREV_I32_e64:
3978 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3979 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3980 case AMDGPU::V_ASHRREV_I32_e32_vi:
3981 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3982 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3983 case AMDGPU::V_ASHRREV_I32_e64_vi:
3984
3985 case AMDGPU::V_LSHLREV_B32_e32:
3986 case AMDGPU::V_LSHLREV_B32_e64:
3987 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3988 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3989 case AMDGPU::V_LSHLREV_B32_e32_vi:
3990 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3991 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3992 case AMDGPU::V_LSHLREV_B32_e64_vi:
3993
3994 case AMDGPU::V_LSHLREV_B16_e32:
3995 case AMDGPU::V_LSHLREV_B16_e64:
3996 case AMDGPU::V_LSHLREV_B16_e32_vi:
3997 case AMDGPU::V_LSHLREV_B16_e64_vi:
3998 case AMDGPU::V_LSHLREV_B16_gfx10:
3999
4000 case AMDGPU::V_LSHRREV_B16_e32:
4001 case AMDGPU::V_LSHRREV_B16_e64:
4002 case AMDGPU::V_LSHRREV_B16_e32_vi:
4003 case AMDGPU::V_LSHRREV_B16_e64_vi:
4004 case AMDGPU::V_LSHRREV_B16_gfx10:
4005
4006 case AMDGPU::V_ASHRREV_I16_e32:
4007 case AMDGPU::V_ASHRREV_I16_e64:
4008 case AMDGPU::V_ASHRREV_I16_e32_vi:
4009 case AMDGPU::V_ASHRREV_I16_e64_vi:
4010 case AMDGPU::V_ASHRREV_I16_gfx10:
4011
4012 case AMDGPU::V_LSHLREV_B64_e64:
4013 case AMDGPU::V_LSHLREV_B64_gfx10:
4014 case AMDGPU::V_LSHLREV_B64_vi:
4015
4016 case AMDGPU::V_LSHRREV_B64_e64:
4017 case AMDGPU::V_LSHRREV_B64_gfx10:
4018 case AMDGPU::V_LSHRREV_B64_vi:
4019
4020 case AMDGPU::V_ASHRREV_I64_e64:
4021 case AMDGPU::V_ASHRREV_I64_gfx10:
4022 case AMDGPU::V_ASHRREV_I64_vi:
4023
4024 case AMDGPU::V_PK_LSHLREV_B16:
4025 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4026 case AMDGPU::V_PK_LSHLREV_B16_vi:
4027
4028 case AMDGPU::V_PK_LSHRREV_B16:
4029 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4030 case AMDGPU::V_PK_LSHRREV_B16_vi:
4031 case AMDGPU::V_PK_ASHRREV_I16:
4032 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4033 case AMDGPU::V_PK_ASHRREV_I16_vi:
4034 return true;
4035 default:
4036 return false;
4037 }
4038}
4039
4040std::optional<StringRef>
4041AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4042
4043 using namespace SIInstrFlags;
4044 const unsigned Opcode = Inst.getOpcode();
4045 const MCInstrDesc &Desc = MII.get(Opcode);
4046
4047 // lds_direct register is defined so that it can be used
4048 // with 9-bit operands only. Ignore encodings which do not accept these.
4049 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4050 if ((Desc.TSFlags & Enc) == 0)
4051 return std::nullopt;
4052
4053 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4054 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4055 if (SrcIdx == -1)
4056 break;
4057 const auto &Src = Inst.getOperand(SrcIdx);
4058 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4059
4060 if (isGFX90A() || isGFX11Plus())
4061 return StringRef("lds_direct is not supported on this GPU");
4062
4063 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4064 return StringRef("lds_direct cannot be used with this instruction");
4065
4066 if (SrcName != OpName::src0)
4067 return StringRef("lds_direct may be used as src0 only");
4068 }
4069 }
4070
4071 return std::nullopt;
4072}
4073
4074SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4075 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4076 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4077 if (Op.isFlatOffset())
4078 return Op.getStartLoc();
4079 }
4080 return getLoc();
4081}
4082
4083bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4084 const OperandVector &Operands) {
4085 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4086 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4087 return true;
4088
4089 auto Opcode = Inst.getOpcode();
4090 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4091 assert(OpNum != -1);
4092
4093 const auto &Op = Inst.getOperand(OpNum);
4094 if (!hasFlatOffsets() && Op.getImm() != 0) {
4095 Error(getFlatOffsetLoc(Operands),
4096 "flat offset modifier is not supported on this GPU");
4097 return false;
4098 }
4099
4100 // For FLAT segment the offset must be positive;
4101 // MSB is ignored and forced to zero.
4102 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4103 bool AllowNegative =
4105 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4106 Error(getFlatOffsetLoc(Operands),
4107 Twine("expected a ") +
4108 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4109 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4110 return false;
4111 }
4112
4113 return true;
4114}
4115
4116SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4117 // Start with second operand because SMEM Offset cannot be dst or src0.
4118 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4119 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4120 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4121 return Op.getStartLoc();
4122 }
4123 return getLoc();
4124}
4125
4126bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4127 const OperandVector &Operands) {
4128 if (isCI() || isSI())
4129 return true;
4130
4131 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4132 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4133 return true;
4134
4135 auto Opcode = Inst.getOpcode();
4136 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4137 if (OpNum == -1)
4138 return true;
4139
4140 const auto &Op = Inst.getOperand(OpNum);
4141 if (!Op.isImm())
4142 return true;
4143
4144 uint64_t Offset = Op.getImm();
4145 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4148 return true;
4149
4150 Error(getSMEMOffsetLoc(Operands),
4151 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4152 "expected a 21-bit signed offset");
4153
4154 return false;
4155}
4156
4157bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4158 unsigned Opcode = Inst.getOpcode();
4159 const MCInstrDesc &Desc = MII.get(Opcode);
4160 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4161 return true;
4162
4163 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4164 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4165
4166 const int OpIndices[] = { Src0Idx, Src1Idx };
4167
4168 unsigned NumExprs = 0;
4169 unsigned NumLiterals = 0;
4171
4172 for (int OpIdx : OpIndices) {
4173 if (OpIdx == -1) break;
4174
4175 const MCOperand &MO = Inst.getOperand(OpIdx);
4176 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4177 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4178 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4179 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4180 if (NumLiterals == 0 || LiteralValue != Value) {
4182 ++NumLiterals;
4183 }
4184 } else if (MO.isExpr()) {
4185 ++NumExprs;
4186 }
4187 }
4188 }
4189
4190 return NumLiterals + NumExprs <= 1;
4191}
4192
4193bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4194 const unsigned Opc = Inst.getOpcode();
4195 if (isPermlane16(Opc)) {
4196 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4197 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4198
4199 if (OpSel & ~3)
4200 return false;
4201 }
4202
4203 uint64_t TSFlags = MII.get(Opc).TSFlags;
4204
4205 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4206 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4207 if (OpSelIdx != -1) {
4208 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4209 return false;
4210 }
4211 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4212 if (OpSelHiIdx != -1) {
4213 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4214 return false;
4215 }
4216 }
4217
4218 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4221 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4222 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4223 if (OpSel & 3)
4224 return false;
4225 }
4226
4227 return true;
4228}
4229
4230bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4231 const OperandVector &Operands) {
4232 const unsigned Opc = Inst.getOpcode();
4233 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4234 if (DppCtrlIdx < 0)
4235 return true;
4236 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4237
4238 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4239 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4240 // DP ALU DPP is supported for row_newbcast only on GFX9*
4241 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4242 Error(S, "DP ALU dpp only supports row_newbcast");
4243 return false;
4244 }
4245
4246 return true;
4247}
4248
4249// Check if VCC register matches wavefront size
4250bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4251 auto FB = getFeatureBits();
4252 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4253 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4254}
4255
4256// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4257bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4258 const OperandVector &Operands) {
4259 unsigned Opcode = Inst.getOpcode();
4260 const MCInstrDesc &Desc = MII.get(Opcode);
4261 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4262 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4263 !HasMandatoryLiteral && !isVOPD(Opcode))
4264 return true;
4265
4266 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4267
4268 unsigned NumExprs = 0;
4269 unsigned NumLiterals = 0;
4271
4272 for (int OpIdx : OpIndices) {
4273 if (OpIdx == -1)
4274 continue;
4275
4276 const MCOperand &MO = Inst.getOperand(OpIdx);
4277 if (!MO.isImm() && !MO.isExpr())
4278 continue;
4279 if (!isSISrcOperand(Desc, OpIdx))
4280 continue;
4281
4282 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4283 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4284 if (NumLiterals == 0 || LiteralValue != Value) {
4286 ++NumLiterals;
4287 }
4288 } else if (MO.isExpr()) {
4289 ++NumExprs;
4290 }
4291 }
4292 NumLiterals += NumExprs;
4293
4294 if (!NumLiterals)
4295 return true;
4296
4297 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4298 Error(getLitLoc(Operands), "literal operands are not supported");
4299 return false;
4300 }
4301
4302 if (NumLiterals > 1) {
4303 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4304 return false;
4305 }
4306
4307 return true;
4308}
4309
4310// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4311static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4312 const MCRegisterInfo *MRI) {
4313 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4314 if (OpIdx < 0)
4315 return -1;
4316
4317 const MCOperand &Op = Inst.getOperand(OpIdx);
4318 if (!Op.isReg())
4319 return -1;
4320
4321 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4322 auto Reg = Sub ? Sub : Op.getReg();
4323 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4324 return AGPR32.contains(Reg) ? 1 : 0;
4325}
4326
4327bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4328 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4331 SIInstrFlags::DS)) == 0)
4332 return true;
4333
4334 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4335 : AMDGPU::OpName::vdata;
4336
4337 const MCRegisterInfo *MRI = getMRI();
4338 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4339 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4340
4341 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4342 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4343 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4344 return false;
4345 }
4346
4347 auto FB = getFeatureBits();
4348 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4349 if (DataAreg < 0 || DstAreg < 0)
4350 return true;
4351 return DstAreg == DataAreg;
4352 }
4353
4354 return DstAreg < 1 && DataAreg < 1;
4355}
4356
4357bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4358 auto FB = getFeatureBits();
4359 if (!FB[AMDGPU::FeatureGFX90AInsts])
4360 return true;
4361
4362 const MCRegisterInfo *MRI = getMRI();
4363 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4364 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4365 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4366 const MCOperand &Op = Inst.getOperand(I);
4367 if (!Op.isReg())
4368 continue;
4369
4370 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4371 if (!Sub)
4372 continue;
4373
4374 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4375 return false;
4376 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4377 return false;
4378 }
4379
4380 return true;
4381}
4382
4383SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4384 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4385 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4386 if (Op.isBLGP())
4387 return Op.getStartLoc();
4388 }
4389 return SMLoc();
4390}
4391
4392bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4393 const OperandVector &Operands) {
4394 unsigned Opc = Inst.getOpcode();
4395 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4396 if (BlgpIdx == -1)
4397 return true;
4398 SMLoc BLGPLoc = getBLGPLoc(Operands);
4399 if (!BLGPLoc.isValid())
4400 return true;
4401 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4402 auto FB = getFeatureBits();
4403 bool UsesNeg = false;
4404 if (FB[AMDGPU::FeatureGFX940Insts]) {
4405 switch (Opc) {
4406 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4407 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4408 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4409 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4410 UsesNeg = true;
4411 }
4412 }
4413
4414 if (IsNeg == UsesNeg)
4415 return true;
4416
4417 Error(BLGPLoc,
4418 UsesNeg ? "invalid modifier: blgp is not supported"
4419 : "invalid modifier: neg is not supported");
4420
4421 return false;
4422}
4423
4424bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4425 const OperandVector &Operands) {
4426 if (!isGFX11Plus())
4427 return true;
4428
4429 unsigned Opc = Inst.getOpcode();
4430 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4431 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4432 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4433 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4434 return true;
4435
4436 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4437 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4438 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4439 if (Reg == AMDGPU::SGPR_NULL)
4440 return true;
4441
4442 SMLoc RegLoc = getRegLoc(Reg, Operands);
4443 Error(RegLoc, "src0 must be null");
4444 return false;
4445}
4446
4447bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4448 const OperandVector &Operands) {
4449 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4450 if ((TSFlags & SIInstrFlags::DS) == 0)
4451 return true;
4453 return validateGWS(Inst, Operands);
4454 // Only validate GDS for non-GWS instructions.
4455 if (hasGDS())
4456 return true;
4457 int GDSIdx =
4458 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4459 if (GDSIdx < 0)
4460 return true;
4461 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4462 if (GDS) {
4463 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4464 Error(S, "gds modifier is not supported on this GPU");
4465 return false;
4466 }
4467 return true;
4468}
4469
4470// gfx90a has an undocumented limitation:
4471// DS_GWS opcodes must use even aligned registers.
4472bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4473 const OperandVector &Operands) {
4474 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4475 return true;
4476
4477 int Opc = Inst.getOpcode();
4478 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4479 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4480 return true;
4481
4482 const MCRegisterInfo *MRI = getMRI();
4483 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4484 int Data0Pos =
4485 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4486 assert(Data0Pos != -1);
4487 auto Reg = Inst.getOperand(Data0Pos).getReg();
4488 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4489 if (RegIdx & 1) {
4490 SMLoc RegLoc = getRegLoc(Reg, Operands);
4491 Error(RegLoc, "vgpr must be even aligned");
4492 return false;
4493 }
4494
4495 return true;
4496}
4497
4498bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4499 const OperandVector &Operands,
4500 const SMLoc &IDLoc) {
4501 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4502 AMDGPU::OpName::cpol);
4503 if (CPolPos == -1)
4504 return true;
4505
4506 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4507
4508 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4510 if (CPol && (isSI() || isCI())) {
4511 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4512 Error(S, "cache policy is not supported for SMRD instructions");
4513 return false;
4514 }
4515 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4516 Error(IDLoc, "invalid cache policy for SMEM instruction");
4517 return false;
4518 }
4519 }
4520
4521 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4522 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4525 if (!(TSFlags & AllowSCCModifier)) {
4526 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4527 StringRef CStr(S.getPointer());
4528 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4529 Error(S,
4530 "scc modifier is not supported for this instruction on this GPU");
4531 return false;
4532 }
4533 }
4534
4536 return true;
4537
4539 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4540 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4541 : "instruction must use glc");
4542 return false;
4543 }
4544 } else {
4545 if (CPol & CPol::GLC) {
4546 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4547 StringRef CStr(S.getPointer());
4549 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4550 Error(S, isGFX940() ? "instruction must not use sc0"
4551 : "instruction must not use glc");
4552 return false;
4553 }
4554 }
4555
4556 return true;
4557}
4558
4559bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4560 if (!isGFX11Plus())
4561 return true;
4562 for (auto &Operand : Operands) {
4563 if (!Operand->isReg())
4564 continue;
4565 unsigned Reg = Operand->getReg();
4566 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4567 Error(getRegLoc(Reg, Operands),
4568 "execz and vccz are not supported on this GPU");
4569 return false;
4570 }
4571 }
4572 return true;
4573}
4574
4575bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4576 const OperandVector &Operands) {
4577 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4578 if (Desc.mayStore() &&
4580 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4581 if (Loc != getInstLoc(Operands)) {
4582 Error(Loc, "TFE modifier has no meaning for store instructions");
4583 return false;
4584 }
4585 }
4586
4587 return true;
4588}
4589
4590bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4591 const SMLoc &IDLoc,
4592 const OperandVector &Operands) {
4593 if (auto ErrMsg = validateLdsDirect(Inst)) {
4594 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4595 return false;
4596 }
4597 if (!validateSOPLiteral(Inst)) {
4598 Error(getLitLoc(Operands),
4599 "only one unique literal operand is allowed");
4600 return false;
4601 }
4602 if (!validateVOPLiteral(Inst, Operands)) {
4603 return false;
4604 }
4605 if (!validateConstantBusLimitations(Inst, Operands)) {
4606 return false;
4607 }
4608 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4609 return false;
4610 }
4611 if (!validateIntClampSupported(Inst)) {
4612 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4613 "integer clamping is not supported on this GPU");
4614 return false;
4615 }
4616 if (!validateOpSel(Inst)) {
4617 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4618 "invalid op_sel operand");
4619 return false;
4620 }
4621 if (!validateDPP(Inst, Operands)) {
4622 return false;
4623 }
4624 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4625 if (!validateMIMGD16(Inst)) {
4626 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4627 "d16 modifier is not supported on this GPU");
4628 return false;
4629 }
4630 if (!validateMIMGMSAA(Inst)) {
4631 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4632 "invalid dim; must be MSAA type");
4633 return false;
4634 }
4635 if (!validateMIMGDataSize(Inst, IDLoc)) {
4636 return false;
4637 }
4638 if (!validateMIMGAddrSize(Inst, IDLoc))
4639 return false;
4640 if (!validateMIMGAtomicDMask(Inst)) {
4641 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4642 "invalid atomic image dmask");
4643 return false;
4644 }
4645 if (!validateMIMGGatherDMask(Inst)) {
4646 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4647 "invalid image_gather dmask: only one bit must be set");
4648 return false;
4649 }
4650 if (!validateMovrels(Inst, Operands)) {
4651 return false;
4652 }
4653 if (!validateFlatOffset(Inst, Operands)) {
4654 return false;
4655 }
4656 if (!validateSMEMOffset(Inst, Operands)) {
4657 return false;
4658 }
4659 if (!validateMAIAccWrite(Inst, Operands)) {
4660 return false;
4661 }
4662 if (!validateMAISrc2(Inst, Operands)) {
4663 return false;
4664 }
4665 if (!validateMFMA(Inst, Operands)) {
4666 return false;
4667 }
4668 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4669 return false;
4670 }
4671
4672 if (!validateAGPRLdSt(Inst)) {
4673 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4674 ? "invalid register class: data and dst should be all VGPR or AGPR"
4675 : "invalid register class: agpr loads and stores not supported on this GPU"
4676 );
4677 return false;
4678 }
4679 if (!validateVGPRAlign(Inst)) {
4680 Error(IDLoc,
4681 "invalid register class: vgpr tuples must be 64 bit aligned");
4682 return false;
4683 }
4684 if (!validateDS(Inst, Operands)) {
4685 return false;
4686 }
4687
4688 if (!validateBLGP(Inst, Operands)) {
4689 return false;
4690 }
4691
4692 if (!validateDivScale(Inst)) {
4693 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4694 return false;
4695 }
4696 if (!validateWaitCnt(Inst, Operands)) {
4697 return false;
4698 }
4699 if (!validateExeczVcczOperands(Operands)) {
4700 return false;
4701 }
4702 if (!validateTFE(Inst, Operands)) {
4703 return false;
4704 }
4705
4706 return true;
4707}
4708
4710 const FeatureBitset &FBS,
4711 unsigned VariantID = 0);
4712
4713static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4714 const FeatureBitset &AvailableFeatures,
4715 unsigned VariantID);
4716
4717bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4718 const FeatureBitset &FBS) {
4719 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4720}
4721
4722bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4723 const FeatureBitset &FBS,
4724 ArrayRef<unsigned> Variants) {
4725 for (auto Variant : Variants) {
4726 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4727 return true;
4728 }
4729
4730 return false;
4731}
4732
4733bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4734 const SMLoc &IDLoc) {
4735 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4736
4737 // Check if requested instruction variant is supported.
4738 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4739 return false;
4740
4741 // This instruction is not supported.
4742 // Clear any other pending errors because they are no longer relevant.
4743 getParser().clearPendingErrors();
4744
4745 // Requested instruction variant is not supported.
4746 // Check if any other variants are supported.
4747 StringRef VariantName = getMatchedVariantName();
4748 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4749 return Error(IDLoc,
4750 Twine(VariantName,
4751 " variant of this instruction is not supported"));
4752 }
4753
4754 // Check if this instruction may be used with a different wavesize.
4755 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4756 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4757
4758 FeatureBitset FeaturesWS32 = getFeatureBits();
4759 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4760 .flip(AMDGPU::FeatureWavefrontSize32);
4761 FeatureBitset AvailableFeaturesWS32 =
4762 ComputeAvailableFeatures(FeaturesWS32);
4763
4764 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4765 return Error(IDLoc, "instruction requires wavesize=32");
4766 }
4767
4768 // Finally check if this instruction is supported on any other GPU.
4769 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4770 return Error(IDLoc, "instruction not supported on this GPU");
4771 }
4772
4773 // Instruction not supported on any GPU. Probably a typo.
4774 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4775 return Error(IDLoc, "invalid instruction" + Suggestion);
4776}
4777
4779 uint64_t InvalidOprIdx) {
4780 assert(InvalidOprIdx < Operands.size());
4781 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4782 if (Op.isToken() && InvalidOprIdx > 1) {
4783 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4784 return PrevOp.isToken() && PrevOp.getToken() == "::";
4785 }
4786 return false;
4787}
4788
4789bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4791 MCStreamer &Out,
4793 bool MatchingInlineAsm) {
4794 MCInst Inst;
4795 unsigned Result = Match_Success;
4796 for (auto Variant : getMatchedVariants()) {
4797 uint64_t EI;
4798 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4799 Variant);
4800 // We order match statuses from least to most specific. We use most specific
4801 // status as resulting
4802 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4803 if ((R == Match_Success) ||
4804 (R == Match_PreferE32) ||
4805 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4806 (R == Match_InvalidOperand && Result != Match_MissingFeature
4807 && Result != Match_PreferE32) ||
4808 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4809 && Result != Match_MissingFeature
4810 && Result != Match_PreferE32)) {
4811 Result = R;
4812 ErrorInfo = EI;
4813 }
4814 if (R == Match_Success)
4815 break;
4816 }
4817
4818 if (Result == Match_Success) {
4819 if (!validateInstruction(Inst, IDLoc, Operands)) {
4820 return true;
4821 }
4822 Inst.setLoc(IDLoc);
4823 Out.emitInstruction(Inst, getSTI());
4824 return false;
4825 }
4826
4827 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4828 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4829 return true;
4830 }
4831
4832 switch (Result) {
4833 default: break;
4834 case Match_MissingFeature:
4835 // It has been verified that the specified instruction
4836 // mnemonic is valid. A match was found but it requires
4837 // features which are not supported on this GPU.
4838 return Error(IDLoc, "operands are not valid for this GPU or mode");
4839
4840 case Match_InvalidOperand: {
4841 SMLoc ErrorLoc = IDLoc;
4842 if (ErrorInfo != ~0ULL) {
4843 if (ErrorInfo >= Operands.size()) {
4844 return Error(IDLoc, "too few operands for instruction");
4845 }
4846 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4847 if (ErrorLoc == SMLoc())
4848 ErrorLoc = IDLoc;
4849
4851 return Error(ErrorLoc, "invalid VOPDY instruction");
4852 }
4853 return Error(ErrorLoc, "invalid operand for instruction");
4854 }
4855
4856 case Match_PreferE32:
4857 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4858 "should be encoded as e32");
4859 case Match_MnemonicFail:
4860 llvm_unreachable("Invalid instructions should have been handled already");
4861 }
4862 llvm_unreachable("Implement any new match types added!");
4863}
4864
4865bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4866 int64_t Tmp = -1;
4867 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4868 return true;
4869 }
4870 if (getParser().parseAbsoluteExpression(Tmp)) {
4871 return true;
4872 }
4873 Ret = static_cast<uint32_t>(Tmp);
4874 return false;
4875}
4876
4877bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4878 uint32_t &Minor) {
4879 if (ParseAsAbsoluteExpression(Major))
4880 return TokError("invalid major version");
4881
4882 if (!trySkipToken(AsmToken::Comma))
4883 return TokError("minor version number required, comma expected");
4884
4885 if (ParseAsAbsoluteExpression(Minor))
4886 return TokError("invalid minor version");
4887
4888 return false;
4889}
4890
4891bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4892 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4893 return TokError("directive only supported for amdgcn architecture");
4894
4895 std::string TargetIDDirective;
4896 SMLoc TargetStart = getTok().getLoc();
4897 if (getParser().parseEscapedString(TargetIDDirective))
4898 return true;
4899
4900 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4901 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4902 return getParser().Error(TargetRange.Start,
4903 (Twine(".amdgcn_target directive's target id ") +
4904 Twine(TargetIDDirective) +
4905 Twine(" does not match the specified target id ") +
4906 Twine(getTargetStreamer().getTargetID()->toString())).str());
4907
4908 return false;
4909}
4910
4911bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4912 return Error(Range.Start, "value out of range", Range);
4913}
4914
4915bool AMDGPUAsmParser::calculateGPRBlocks(
4916 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4917 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4918 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4919 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4920 // TODO(scott.linder): These calculations are duplicated from
4921 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4922 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4923
4924 unsigned NumVGPRs = NextFreeVGPR;
4925 unsigned NumSGPRs = NextFreeSGPR;
4926
4927 if (Version.Major >= 10)
4928 NumSGPRs = 0;
4929 else {
4930 unsigned MaxAddressableNumSGPRs =
4932
4933 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4934 NumSGPRs > MaxAddressableNumSGPRs)
4935 return OutOfRangeError(SGPRRange);
4936
4937 NumSGPRs +=
4938 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4939
4940 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4941 NumSGPRs > MaxAddressableNumSGPRs)
4942 return OutOfRangeError(SGPRRange);
4943
4944 if (Features.test(FeatureSGPRInitBug))
4946 }
4947
4948 VGPRBlocks =
4949 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4950 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4951
4952 return false;
4953}
4954
4955bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4956 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4957 return TokError("directive only supported for amdgcn architecture");
4958
4959 if (!isHsaAbi(getSTI()))
4960 return TokError("directive only supported for amdhsa OS");
4961
4962 StringRef KernelName;
4963 if (getParser().parseIdentifier(KernelName))
4964 return true;
4965
4967
4968 StringSet<> Seen;
4969
4970 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4971
4972 SMRange VGPRRange;
4973 uint64_t NextFreeVGPR = 0;
4974 uint64_t AccumOffset = 0;
4975 uint64_t SharedVGPRCount = 0;
4976 uint64_t PreloadLength = 0;
4977 uint64_t PreloadOffset = 0;
4978 SMRange SGPRRange;
4979 uint64_t NextFreeSGPR = 0;
4980
4981 // Count the number of user SGPRs implied from the enabled feature bits.
4982 unsigned ImpliedUserSGPRCount = 0;
4983
4984 // Track if the asm explicitly contains the directive for the user SGPR
4985 // count.
4986 std::optional<unsigned> ExplicitUserSGPRCount;
4987 bool ReserveVCC = true;
4988 bool ReserveFlatScr = true;
4989 std::optional<bool> EnableWavefrontSize32;
4990
4991 while (true) {
4992 while (trySkipToken(AsmToken::EndOfStatement));
4993
4994 StringRef ID;
4995 SMRange IDRange = getTok().getLocRange();
4996 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4997 return true;
4998
4999 if (ID == ".end_amdhsa_kernel")
5000 break;
5001
5002 if (!Seen.insert(ID).second)
5003 return TokError(".amdhsa_ directives cannot be repeated");
5004
5005 SMLoc ValStart = getLoc();
5006 int64_t IVal;
5007 if (getParser().parseAbsoluteExpression(IVal))
5008 return true;
5009 SMLoc ValEnd = getLoc();
5010 SMRange ValRange = SMRange(ValStart, ValEnd);
5011
5012 if (IVal < 0)
5013 return OutOfRangeError(ValRange);
5014
5015 uint64_t Val = IVal;
5016
5017#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5018 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5019 return OutOfRangeError(RANGE); \
5020 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5021
5022 if (ID == ".amdhsa_group_segment_fixed_size") {
5023 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5024 return OutOfRangeError(ValRange);
5025 KD.group_segment_fixed_size = Val;
5026 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5027 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5028 return OutOfRangeError(ValRange);
5030 } else if (ID == ".amdhsa_kernarg_size") {
5031 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5032 return OutOfRangeError(ValRange);
5033 KD.kernarg_size = Val;
5034 } else if (ID == ".amdhsa_user_sgpr_count") {
5035 ExplicitUserSGPRCount = Val;
5036 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5038 return Error(IDRange.Start,
5039 "directive is not supported with architected flat scratch",
5040 IDRange);
5042 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5043 Val, ValRange);
5044 if (Val)
5045 ImpliedUserSGPRCount += 4;
5046 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5047 if (!hasKernargPreload())
5048 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5049
5050 if (Val > getMaxNumUserSGPRs())
5051 return OutOfRangeError(ValRange);
5052 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5053 ValRange);
5054 if (Val) {
5055 ImpliedUserSGPRCount += Val;
5056 PreloadLength = Val;
5057 }
5058 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5059 if (!hasKernargPreload())
5060 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5061
5062 if (Val >= 1024)
5063 return OutOfRangeError(ValRange);
5064 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5065 ValRange);
5066 if (Val)
5067 PreloadOffset = Val;
5068 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5071 ValRange);
5072 if (Val)
5073 ImpliedUserSGPRCount += 2;
5074 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5077 ValRange);
5078 if (Val)
5079 ImpliedUserSGPRCount += 2;
5080 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5083 Val, ValRange);
5084 if (Val)
5085 ImpliedUserSGPRCount += 2;
5086 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5088 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5089 ValRange);
5090 if (Val)
5091 ImpliedUserSGPRCount += 2;
5092 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5094 return Error(IDRange.Start,
5095 "directive is not supported with architected flat scratch",
5096 IDRange);
5098 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5099 ValRange);
5100 if (Val)
5101 ImpliedUserSGPRCount += 2;
5102 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5104 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5105 Val, ValRange);
5106 if (Val)
5107 ImpliedUserSGPRCount += 1;
5108 } else if (ID == ".amdhsa_wavefront_size32") {
5109 if (IVersion.Major < 10)
5110 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5111 EnableWavefrontSize32 = Val;
5113 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5114 Val, ValRange);
5115 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5117 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5118 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5120 return Error(IDRange.Start,
5121 "directive is not supported with architected flat scratch",
5122 IDRange);
5124 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5125 } else if (ID == ".amdhsa_enable_private_segment") {
5127 return Error(
5128 IDRange.Start,
5129 "directive is not supported without architected flat scratch",
5130 IDRange);
5132 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5133 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5135 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5136 ValRange);
5137 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5139 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5140 ValRange);
5141 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5143 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5144 ValRange);
5145 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5147 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5148 ValRange);
5149 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5151 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5152 ValRange);
5153 } else if (ID == ".amdhsa_next_free_vgpr") {
5154 VGPRRange = ValRange;
5155 NextFreeVGPR = Val;
5156 } else if (ID == ".amdhsa_next_free_sgpr") {
5157 SGPRRange = ValRange;
5158 NextFreeSGPR = Val;
5159 } else if (ID == ".amdhsa_accum_offset") {
5160 if (!isGFX90A())
5161 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5162 AccumOffset = Val;
5163 } else if (ID == ".amdhsa_reserve_vcc") {
5164 if (!isUInt<1>(Val))
5165 return OutOfRangeError(ValRange);
5166 ReserveVCC = Val;
5167 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5168 if (IVersion.Major < 7)
5169 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5171 return Error(IDRange.Start,
5172 "directive is not supported with architected flat scratch",
5173 IDRange);
5174 if (!isUInt<1>(Val))
5175 return OutOfRangeError(ValRange);
5176 ReserveFlatScr = Val;
5177 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5178 if (IVersion.Major < 8)
5179 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5180 if (!isUInt<1>(Val))
5181 return OutOfRangeError(ValRange);
5182 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5183 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5184 IDRange);
5185 } else if (ID == ".amdhsa_float_round_mode_32") {
5187 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5188 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5190 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5191 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5193 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5194 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5196 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5197 ValRange);
5198 } else if (ID == ".amdhsa_dx10_clamp") {
5200 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5201 } else if (ID == ".amdhsa_ieee_mode") {
5202 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5203 Val, ValRange);
5204 } else if (ID == ".amdhsa_fp16_overflow") {
5205 if (IVersion.Major < 9)
5206 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5207 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5208 ValRange);
5209 } else if (ID == ".amdhsa_tg_split") {
5210 if (!isGFX90A())
5211 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5212 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5213 ValRange);
5214 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5215 if (IVersion.Major < 10)
5216 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5217 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5218 ValRange);
5219 } else if (ID == ".amdhsa_memory_ordered") {
5220 if (IVersion.Major < 10)
5221 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5222 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5223 ValRange);
5224 } else if (ID == ".amdhsa_forward_progress") {
5225 if (IVersion.Major < 10)
5226 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5227 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5228 ValRange);
5229 } else if (ID == ".amdhsa_shared_vgpr_count") {
5230 if (IVersion.Major < 10)
5231 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5232 SharedVGPRCount = Val;
5234 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5235 ValRange);
5236 } else if (