LLVM 17.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
12#include "SIDefines.h"
13#include "SIInstrInfo.h"
14#include "SIRegisterInfo.h"
19#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Twine.h"
25#include "llvm/MC/MCAsmInfo.h"
26#include "llvm/MC/MCContext.h"
27#include "llvm/MC/MCExpr.h"
28#include "llvm/MC/MCInst.h"
29#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/MC/MCSymbol.h"
41#include <optional>
42
43using namespace llvm;
44using namespace llvm::AMDGPU;
45using namespace llvm::amdhsa;
46
47namespace {
48
49class AMDGPUAsmParser;
50
51enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52
53//===----------------------------------------------------------------------===//
54// Operand
55//===----------------------------------------------------------------------===//
56
57class AMDGPUOperand : public MCParsedAsmOperand {
58 enum KindTy {
59 Token,
60 Immediate,
63 } Kind;
64
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
67
68public:
69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70 : Kind(Kind_), AsmParser(AsmParser_) {}
71
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
73
74 struct Modifiers {
75 bool Abs = false;
76 bool Neg = false;
77 bool Sext = false;
78
79 bool hasFPModifiers() const { return Abs || Neg; }
80 bool hasIntModifiers() const { return Sext; }
81 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
82
83 int64_t getFPModifiersOperand() const {
84 int64_t Operand = 0;
85 Operand |= Abs ? SISrcMods::ABS : 0u;
86 Operand |= Neg ? SISrcMods::NEG : 0u;
87 return Operand;
88 }
89
90 int64_t getIntModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Sext ? SISrcMods::SEXT : 0u;
93 return Operand;
94 }
95
96 int64_t getModifiersOperand() const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 && "fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 } else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
103 } else {
104 return 0;
105 }
106 }
107
108 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
109 };
110
111 enum ImmTy {
112 ImmTyNone,
113 ImmTyGDS,
114 ImmTyLDS,
115 ImmTyOffen,
116 ImmTyIdxen,
117 ImmTyAddr64,
118 ImmTyOffset,
119 ImmTyInstOffset,
120 ImmTyOffset0,
121 ImmTyOffset1,
122 ImmTySMEMOffsetMod,
123 ImmTyCPol,
124 ImmTySWZ,
125 ImmTyTFE,
126 ImmTyD16,
127 ImmTyClampSI,
128 ImmTyOModSI,
129 ImmTySDWADstSel,
130 ImmTySDWASrc0Sel,
131 ImmTySDWASrc1Sel,
132 ImmTySDWADstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTyDPP8,
155 ImmTyDppCtrl,
156 ImmTyDppRowMask,
157 ImmTyDppBankMask,
158 ImmTyDppBoundCtrl,
159 ImmTyDppFI,
160 ImmTySwizzle,
161 ImmTyGprIdxMode,
162 ImmTyHigh,
163 ImmTyBLGP,
164 ImmTyCBSZ,
165 ImmTyABID,
166 ImmTyEndpgm,
167 ImmTyWaitVDST,
168 ImmTyWaitEXP,
169 };
170
171 // Immediate operand kind.
172 // It helps to identify the location of an offending operand after an error.
173 // Note that regular literals and mandatory literals (KImm) must be handled
174 // differently. When looking for an offending operand, we should usually
175 // ignore mandatory literals because they are part of the instruction and
176 // cannot be changed. Report location of mandatory operands only for VOPD,
177 // when both OpX and OpY have a KImm and there are no other literals.
178 enum ImmKindTy {
179 ImmKindTyNone,
180 ImmKindTyLiteral,
181 ImmKindTyMandatoryLiteral,
182 ImmKindTyConst,
183 };
184
185private:
186 struct TokOp {
187 const char *Data;
188 unsigned Length;
189 };
190
191 struct ImmOp {
192 int64_t Val;
193 ImmTy Type;
194 bool IsFPImm;
195 mutable ImmKindTy Kind;
196 Modifiers Mods;
197 };
198
199 struct RegOp {
200 unsigned RegNo;
201 Modifiers Mods;
202 };
203
204 union {
205 TokOp Tok;
206 ImmOp Imm;
207 RegOp Reg;
208 const MCExpr *Expr;
209 };
210
211public:
212 bool isToken() const override { return Kind == Token; }
213
214 bool isSymbolRefExpr() const {
215 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
216 }
217
218 bool isImm() const override {
219 return Kind == Immediate;
220 }
221
222 void setImmKindNone() const {
223 assert(isImm());
224 Imm.Kind = ImmKindTyNone;
225 }
226
227 void setImmKindLiteral() const {
228 assert(isImm());
229 Imm.Kind = ImmKindTyLiteral;
230 }
231
232 void setImmKindMandatoryLiteral() const {
233 assert(isImm());
234 Imm.Kind = ImmKindTyMandatoryLiteral;
235 }
236
237 void setImmKindConst() const {
238 assert(isImm());
239 Imm.Kind = ImmKindTyConst;
240 }
241
242 bool IsImmKindLiteral() const {
243 return isImm() && Imm.Kind == ImmKindTyLiteral;
244 }
245
246 bool IsImmKindMandatoryLiteral() const {
247 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
248 }
249
250 bool isImmKindConst() const {
251 return isImm() && Imm.Kind == ImmKindTyConst;
252 }
253
254 bool isInlinableImm(MVT type) const;
255 bool isLiteralImm(MVT type) const;
256
257 bool isRegKind() const {
258 return Kind == Register;
259 }
260
261 bool isReg() const override {
262 return isRegKind() && !hasModifiers();
263 }
264
265 bool isRegOrInline(unsigned RCID, MVT type) const {
266 return isRegClass(RCID) || isInlinableImm(type);
267 }
268
269 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
270 return isRegOrInline(RCID, type) || isLiteralImm(type);
271 }
272
273 bool isRegOrImmWithInt16InputMods() const {
274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
275 }
276
277 bool isRegOrImmWithInt32InputMods() const {
278 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
279 }
280
281 bool isRegOrInlineImmWithInt16InputMods() const {
282 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
283 }
284
285 bool isRegOrInlineImmWithInt32InputMods() const {
286 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
287 }
288
289 bool isRegOrImmWithInt64InputMods() const {
290 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
291 }
292
293 bool isRegOrImmWithFP16InputMods() const {
294 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
295 }
296
297 bool isRegOrImmWithFP32InputMods() const {
298 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
299 }
300
301 bool isRegOrImmWithFP64InputMods() const {
302 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
303 }
304
305 bool isRegOrInlineImmWithFP16InputMods() const {
306 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
307 }
308
309 bool isRegOrInlineImmWithFP32InputMods() const {
310 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
311 }
312
313
314 bool isVReg() const {
315 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
316 isRegClass(AMDGPU::VReg_64RegClassID) ||
317 isRegClass(AMDGPU::VReg_96RegClassID) ||
318 isRegClass(AMDGPU::VReg_128RegClassID) ||
319 isRegClass(AMDGPU::VReg_160RegClassID) ||
320 isRegClass(AMDGPU::VReg_192RegClassID) ||
321 isRegClass(AMDGPU::VReg_256RegClassID) ||
322 isRegClass(AMDGPU::VReg_512RegClassID) ||
323 isRegClass(AMDGPU::VReg_1024RegClassID);
324 }
325
326 bool isVReg32() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID);
328 }
329
330 bool isVReg32OrOff() const {
331 return isOff() || isVReg32();
332 }
333
334 bool isNull() const {
335 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
336 }
337
338 bool isVRegWithInputMods() const;
339 bool isT16VRegWithInputMods() const;
340
341 bool isSDWAOperand(MVT type) const;
342 bool isSDWAFP16Operand() const;
343 bool isSDWAFP32Operand() const;
344 bool isSDWAInt16Operand() const;
345 bool isSDWAInt32Operand() const;
346
347 bool isImmTy(ImmTy ImmT) const {
348 return isImm() && Imm.Type == ImmT;
349 }
350
351 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
352
353 bool isImmModifier() const {
354 return isImm() && Imm.Type != ImmTyNone;
355 }
356
357 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
358 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
359 bool isDMask() const { return isImmTy(ImmTyDMask); }
360 bool isDim() const { return isImmTy(ImmTyDim); }
361 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
362 bool isDA() const { return isImmTy(ImmTyDA); }
363 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
364 bool isA16() const { return isImmTy(ImmTyA16); }
365 bool isLWE() const { return isImmTy(ImmTyLWE); }
366 bool isOff() const { return isImmTy(ImmTyOff); }
367 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
368 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
369 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
370 bool isOffen() const { return isImmTy(ImmTyOffen); }
371 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
372 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
373 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
374 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
375 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
376 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
377 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
378 bool isGDS() const { return isImmTy(ImmTyGDS); }
379 bool isLDS() const { return isImmTy(ImmTyLDS); }
380 bool isCPol() const { return isImmTy(ImmTyCPol); }
381 bool isSWZ() const { return isImmTy(ImmTySWZ); }
382 bool isTFE() const { return isImmTy(ImmTyTFE); }
383 bool isD16() const { return isImmTy(ImmTyD16); }
384 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
385 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
386 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
387 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
388 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
389 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
390 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
391 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
392 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
393 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
394 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
395 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
396 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
397 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
398 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
399 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
400 bool isHigh() const { return isImmTy(ImmTyHigh); }
401
402 bool isRegOrImm() const {
403 return isReg() || isImm();
404 }
405
406 bool isRegClass(unsigned RCID) const;
407
408 bool isInlineValue() const;
409
410 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
411 return isRegOrInline(RCID, type) && !hasModifiers();
412 }
413
414 bool isSCSrcB16() const {
415 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
416 }
417
418 bool isSCSrcV2B16() const {
419 return isSCSrcB16();
420 }
421
422 bool isSCSrcB32() const {
423 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
424 }
425
426 bool isSCSrcB64() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
428 }
429
430 bool isBoolReg() const;
431
432 bool isSCSrcF16() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
434 }
435
436 bool isSCSrcV2F16() const {
437 return isSCSrcF16();
438 }
439
440 bool isSCSrcF32() const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
442 }
443
444 bool isSCSrcF64() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
446 }
447
448 bool isSSrcB32() const {
449 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
450 }
451
452 bool isSSrcB16() const {
453 return isSCSrcB16() || isLiteralImm(MVT::i16);
454 }
455
456 bool isSSrcV2B16() const {
457 llvm_unreachable("cannot happen");
458 return isSSrcB16();
459 }
460
461 bool isSSrcB64() const {
462 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
463 // See isVSrc64().
464 return isSCSrcB64() || isLiteralImm(MVT::i64);
465 }
466
467 bool isSSrcF32() const {
468 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
469 }
470
471 bool isSSrcF64() const {
472 return isSCSrcB64() || isLiteralImm(MVT::f64);
473 }
474
475 bool isSSrcF16() const {
476 return isSCSrcB16() || isLiteralImm(MVT::f16);
477 }
478
479 bool isSSrcV2F16() const {
480 llvm_unreachable("cannot happen");
481 return isSSrcF16();
482 }
483
484 bool isSSrcV2FP32() const {
485 llvm_unreachable("cannot happen");
486 return isSSrcF32();
487 }
488
489 bool isSCSrcV2FP32() const {
490 llvm_unreachable("cannot happen");
491 return isSCSrcF32();
492 }
493
494 bool isSSrcV2INT32() const {
495 llvm_unreachable("cannot happen");
496 return isSSrcB32();
497 }
498
499 bool isSCSrcV2INT32() const {
500 llvm_unreachable("cannot happen");
501 return isSCSrcB32();
502 }
503
504 bool isSSrcOrLdsB32() const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
507 }
508
509 bool isVCSrcB32() const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511 }
512
513 bool isVCSrcB64() const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515 }
516
517 bool isVCSrcTB16_Lo128() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
519 }
520
521 bool isVCSrcB16() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
523 }
524
525 bool isVCSrcV2B16() const {
526 return isVCSrcB16();
527 }
528
529 bool isVCSrcF32() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
531 }
532
533 bool isVCSrcF64() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
535 }
536
537 bool isVCSrcTF16_Lo128() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
539 }
540
541 bool isVCSrcF16() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
543 }
544
545 bool isVCSrcV2F16() const {
546 return isVCSrcF16();
547 }
548
549 bool isVSrcB32() const {
550 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
551 }
552
553 bool isVSrcB64() const {
554 return isVCSrcF64() || isLiteralImm(MVT::i64);
555 }
556
557 bool isVSrcTB16_Lo128() const {
558 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
559 }
560
561 bool isVSrcB16() const {
562 return isVCSrcB16() || isLiteralImm(MVT::i16);
563 }
564
565 bool isVSrcV2B16() const {
566 return isVSrcB16() || isLiteralImm(MVT::v2i16);
567 }
568
569 bool isVCSrcV2FP32() const {
570 return isVCSrcF64();
571 }
572
573 bool isVSrcV2FP32() const {
574 return isVSrcF64() || isLiteralImm(MVT::v2f32);
575 }
576
577 bool isVCSrcV2INT32() const {
578 return isVCSrcB64();
579 }
580
581 bool isVSrcV2INT32() const {
582 return isVSrcB64() || isLiteralImm(MVT::v2i32);
583 }
584
585 bool isVSrcF32() const {
586 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
587 }
588
589 bool isVSrcF64() const {
590 return isVCSrcF64() || isLiteralImm(MVT::f64);
591 }
592
593 bool isVSrcTF16_Lo128() const {
594 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
595 }
596
597 bool isVSrcF16() const {
598 return isVCSrcF16() || isLiteralImm(MVT::f16);
599 }
600
601 bool isVSrcV2F16() const {
602 return isVSrcF16() || isLiteralImm(MVT::v2f16);
603 }
604
605 bool isVISrcB32() const {
606 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
607 }
608
609 bool isVISrcB16() const {
610 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
611 }
612
613 bool isVISrcV2B16() const {
614 return isVISrcB16();
615 }
616
617 bool isVISrcF32() const {
618 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
619 }
620
621 bool isVISrcF16() const {
622 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
623 }
624
625 bool isVISrcV2F16() const {
626 return isVISrcF16() || isVISrcB32();
627 }
628
629 bool isVISrc_64B64() const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
631 }
632
633 bool isVISrc_64F64() const {
634 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
635 }
636
637 bool isVISrc_64V2FP32() const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
639 }
640
641 bool isVISrc_64V2INT32() const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
643 }
644
645 bool isVISrc_256B64() const {
646 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
647 }
648
649 bool isVISrc_256F64() const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
651 }
652
653 bool isVISrc_128B16() const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
655 }
656
657 bool isVISrc_128V2B16() const {
658 return isVISrc_128B16();
659 }
660
661 bool isVISrc_128B32() const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
663 }
664
665 bool isVISrc_128F32() const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
667 }
668
669 bool isVISrc_256V2FP32() const {
670 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
671 }
672
673 bool isVISrc_256V2INT32() const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
675 }
676
677 bool isVISrc_512B32() const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
679 }
680
681 bool isVISrc_512B16() const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
683 }
684
685 bool isVISrc_512V2B16() const {
686 return isVISrc_512B16();
687 }
688
689 bool isVISrc_512F32() const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
691 }
692
693 bool isVISrc_512F16() const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
695 }
696
697 bool isVISrc_512V2F16() const {
698 return isVISrc_512F16() || isVISrc_512B32();
699 }
700
701 bool isVISrc_1024B32() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
703 }
704
705 bool isVISrc_1024B16() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
707 }
708
709 bool isVISrc_1024V2B16() const {
710 return isVISrc_1024B16();
711 }
712
713 bool isVISrc_1024F32() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
715 }
716
717 bool isVISrc_1024F16() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
719 }
720
721 bool isVISrc_1024V2F16() const {
722 return isVISrc_1024F16() || isVISrc_1024B32();
723 }
724
725 bool isAISrcB32() const {
726 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
727 }
728
729 bool isAISrcB16() const {
730 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
731 }
732
733 bool isAISrcV2B16() const {
734 return isAISrcB16();
735 }
736
737 bool isAISrcF32() const {
738 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
739 }
740
741 bool isAISrcF16() const {
742 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
743 }
744
745 bool isAISrcV2F16() const {
746 return isAISrcF16() || isAISrcB32();
747 }
748
749 bool isAISrc_64B64() const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
751 }
752
753 bool isAISrc_64F64() const {
754 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
755 }
756
757 bool isAISrc_128B32() const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
759 }
760
761 bool isAISrc_128B16() const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
763 }
764
765 bool isAISrc_128V2B16() const {
766 return isAISrc_128B16();
767 }
768
769 bool isAISrc_128F32() const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
771 }
772
773 bool isAISrc_128F16() const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
775 }
776
777 bool isAISrc_128V2F16() const {
778 return isAISrc_128F16() || isAISrc_128B32();
779 }
780
781 bool isVISrc_128F16() const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
783 }
784
785 bool isVISrc_128V2F16() const {
786 return isVISrc_128F16() || isVISrc_128B32();
787 }
788
789 bool isAISrc_256B64() const {
790 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
791 }
792
793 bool isAISrc_256F64() const {
794 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
795 }
796
797 bool isAISrc_512B32() const {
798 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
799 }
800
801 bool isAISrc_512B16() const {
802 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
803 }
804
805 bool isAISrc_512V2B16() const {
806 return isAISrc_512B16();
807 }
808
809 bool isAISrc_512F32() const {
810 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
811 }
812
813 bool isAISrc_512F16() const {
814 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
815 }
816
817 bool isAISrc_512V2F16() const {
818 return isAISrc_512F16() || isAISrc_512B32();
819 }
820
821 bool isAISrc_1024B32() const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
823 }
824
825 bool isAISrc_1024B16() const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
827 }
828
829 bool isAISrc_1024V2B16() const {
830 return isAISrc_1024B16();
831 }
832
833 bool isAISrc_1024F32() const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
835 }
836
837 bool isAISrc_1024F16() const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
839 }
840
841 bool isAISrc_1024V2F16() const {
842 return isAISrc_1024F16() || isAISrc_1024B32();
843 }
844
845 bool isKImmFP32() const {
846 return isLiteralImm(MVT::f32);
847 }
848
849 bool isKImmFP16() const {
850 return isLiteralImm(MVT::f16);
851 }
852
853 bool isMem() const override {
854 return false;
855 }
856
857 bool isExpr() const {
858 return Kind == Expression;
859 }
860
861 bool isSoppBrTarget() const {
862 return isExpr() || isImm();
863 }
864
865 bool isSWaitCnt() const;
866 bool isDepCtr() const;
867 bool isSDelayAlu() const;
868 bool isHwreg() const;
869 bool isSendMsg() const;
870 bool isSwizzle() const;
871 bool isSMRDOffset8() const;
872 bool isSMEMOffset() const;
873 bool isSMRDLiteralOffset() const;
874 bool isDPP8() const;
875 bool isDPPCtrl() const;
876 bool isBLGP() const;
877 bool isCBSZ() const;
878 bool isABID() const;
879 bool isGPRIdxMode() const;
880 bool isS16Imm() const;
881 bool isU16Imm() const;
882 bool isEndpgm() const;
883 bool isWaitVDST() const;
884 bool isWaitEXP() const;
885
886 StringRef getToken() const {
887 assert(isToken());
888 return StringRef(Tok.Data, Tok.Length);
889 }
890
891 int64_t getImm() const {
892 assert(isImm());
893 return Imm.Val;
894 }
895
896 void setImm(int64_t Val) {
897 assert(isImm());
898 Imm.Val = Val;
899 }
900
901 ImmTy getImmTy() const {
902 assert(isImm());
903 return Imm.Type;
904 }
905
906 unsigned getReg() const override {
907 assert(isRegKind());
908 return Reg.RegNo;
909 }
910
911 SMLoc getStartLoc() const override {
912 return StartLoc;
913 }
914
915 SMLoc getEndLoc() const override {
916 return EndLoc;
917 }
918
919 SMRange getLocRange() const {
920 return SMRange(StartLoc, EndLoc);
921 }
922
923 Modifiers getModifiers() const {
924 assert(isRegKind() || isImmTy(ImmTyNone));
925 return isRegKind() ? Reg.Mods : Imm.Mods;
926 }
927
928 void setModifiers(Modifiers Mods) {
929 assert(isRegKind() || isImmTy(ImmTyNone));
930 if (isRegKind())
931 Reg.Mods = Mods;
932 else
933 Imm.Mods = Mods;
934 }
935
936 bool hasModifiers() const {
937 return getModifiers().hasModifiers();
938 }
939
940 bool hasFPModifiers() const {
941 return getModifiers().hasFPModifiers();
942 }
943
944 bool hasIntModifiers() const {
945 return getModifiers().hasIntModifiers();
946 }
947
948 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
949
950 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
951
952 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
953
954 template <unsigned Bitwidth>
955 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
956
957 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
958 addKImmFPOperands<16>(Inst, N);
959 }
960
961 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
962 addKImmFPOperands<32>(Inst, N);
963 }
964
965 void addRegOperands(MCInst &Inst, unsigned N) const;
966
967 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
968 addRegOperands(Inst, N);
969 }
970
971 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
972 if (isRegKind())
973 addRegOperands(Inst, N);
974 else if (isExpr())
976 else
977 addImmOperands(Inst, N);
978 }
979
980 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
981 Modifiers Mods = getModifiers();
982 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
983 if (isRegKind()) {
984 addRegOperands(Inst, N);
985 } else {
986 addImmOperands(Inst, N, false);
987 }
988 }
989
990 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
991 assert(!hasIntModifiers());
992 addRegOrImmWithInputModsOperands(Inst, N);
993 }
994
995 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
996 assert(!hasFPModifiers());
997 addRegOrImmWithInputModsOperands(Inst, N);
998 }
999
1000 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1001 Modifiers Mods = getModifiers();
1002 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1003 assert(isRegKind());
1004 addRegOperands(Inst, N);
1005 }
1006
1007 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1008 assert(!hasIntModifiers());
1009 addRegWithInputModsOperands(Inst, N);
1010 }
1011
1012 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1013 assert(!hasFPModifiers());
1014 addRegWithInputModsOperands(Inst, N);
1015 }
1016
1017 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1018 if (isImm())
1019 addImmOperands(Inst, N);
1020 else {
1021 assert(isExpr());
1023 }
1024 }
1025
1026 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1027 switch (Type) {
1028 case ImmTyNone: OS << "None"; break;
1029 case ImmTyGDS: OS << "GDS"; break;
1030 case ImmTyLDS: OS << "LDS"; break;
1031 case ImmTyOffen: OS << "Offen"; break;
1032 case ImmTyIdxen: OS << "Idxen"; break;
1033 case ImmTyAddr64: OS << "Addr64"; break;
1034 case ImmTyOffset: OS << "Offset"; break;
1035 case ImmTyInstOffset: OS << "InstOffset"; break;
1036 case ImmTyOffset0: OS << "Offset0"; break;
1037 case ImmTyOffset1: OS << "Offset1"; break;
1038 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1039 case ImmTyCPol: OS << "CPol"; break;
1040 case ImmTySWZ: OS << "SWZ"; break;
1041 case ImmTyTFE: OS << "TFE"; break;
1042 case ImmTyD16: OS << "D16"; break;
1043 case ImmTyFORMAT: OS << "FORMAT"; break;
1044 case ImmTyClampSI: OS << "ClampSI"; break;
1045 case ImmTyOModSI: OS << "OModSI"; break;
1046 case ImmTyDPP8: OS << "DPP8"; break;
1047 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1048 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1049 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1050 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1051 case ImmTyDppFI: OS << "DppFI"; break;
1052 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1053 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1054 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1055 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1056 case ImmTyDMask: OS << "DMask"; break;
1057 case ImmTyDim: OS << "Dim"; break;
1058 case ImmTyUNorm: OS << "UNorm"; break;
1059 case ImmTyDA: OS << "DA"; break;
1060 case ImmTyR128A16: OS << "R128A16"; break;
1061 case ImmTyA16: OS << "A16"; break;
1062 case ImmTyLWE: OS << "LWE"; break;
1063 case ImmTyOff: OS << "Off"; break;
1064 case ImmTyExpTgt: OS << "ExpTgt"; break;
1065 case ImmTyExpCompr: OS << "ExpCompr"; break;
1066 case ImmTyExpVM: OS << "ExpVM"; break;
1067 case ImmTyHwreg: OS << "Hwreg"; break;
1068 case ImmTySendMsg: OS << "SendMsg"; break;
1069 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1070 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1071 case ImmTyAttrChan: OS << "AttrChan"; break;
1072 case ImmTyOpSel: OS << "OpSel"; break;
1073 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1074 case ImmTyNegLo: OS << "NegLo"; break;
1075 case ImmTyNegHi: OS << "NegHi"; break;
1076 case ImmTySwizzle: OS << "Swizzle"; break;
1077 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1078 case ImmTyHigh: OS << "High"; break;
1079 case ImmTyBLGP: OS << "BLGP"; break;
1080 case ImmTyCBSZ: OS << "CBSZ"; break;
1081 case ImmTyABID: OS << "ABID"; break;
1082 case ImmTyEndpgm: OS << "Endpgm"; break;
1083 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1084 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1085 }
1086 }
1087
1088 void print(raw_ostream &OS) const override {
1089 switch (Kind) {
1090 case Register:
1091 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1092 break;
1093 case Immediate:
1094 OS << '<' << getImm();
1095 if (getImmTy() != ImmTyNone) {
1096 OS << " type: "; printImmTy(OS, getImmTy());
1097 }
1098 OS << " mods: " << Imm.Mods << '>';
1099 break;
1100 case Token:
1101 OS << '\'' << getToken() << '\'';
1102 break;
1103 case Expression:
1104 OS << "<expr " << *Expr << '>';
1105 break;
1106 }
1107 }
1108
1109 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1110 int64_t Val, SMLoc Loc,
1111 ImmTy Type = ImmTyNone,
1112 bool IsFPImm = false) {
1113 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1114 Op->Imm.Val = Val;
1115 Op->Imm.IsFPImm = IsFPImm;
1116 Op->Imm.Kind = ImmKindTyNone;
1117 Op->Imm.Type = Type;
1118 Op->Imm.Mods = Modifiers();
1119 Op->StartLoc = Loc;
1120 Op->EndLoc = Loc;
1121 return Op;
1122 }
1123
1124 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1125 StringRef Str, SMLoc Loc,
1126 bool HasExplicitEncodingSize = true) {
1127 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1128 Res->Tok.Data = Str.data();
1129 Res->Tok.Length = Str.size();
1130 Res->StartLoc = Loc;
1131 Res->EndLoc = Loc;
1132 return Res;
1133 }
1134
1135 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1136 unsigned RegNo, SMLoc S,
1137 SMLoc E) {
1138 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1139 Op->Reg.RegNo = RegNo;
1140 Op->Reg.Mods = Modifiers();
1141 Op->StartLoc = S;
1142 Op->EndLoc = E;
1143 return Op;
1144 }
1145
1146 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1147 const class MCExpr *Expr, SMLoc S) {
1148 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1149 Op->Expr = Expr;
1150 Op->StartLoc = S;
1151 Op->EndLoc = S;
1152 return Op;
1153 }
1154};
1155
1156raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1157 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1158 return OS;
1159}
1160
1161//===----------------------------------------------------------------------===//
1162// AsmParser
1163//===----------------------------------------------------------------------===//
1164
1165// Holds info related to the current kernel, e.g. count of SGPRs used.
1166// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1167// .amdgpu_hsa_kernel or at EOF.
1168class KernelScopeInfo {
1169 int SgprIndexUnusedMin = -1;
1170 int VgprIndexUnusedMin = -1;
1171 int AgprIndexUnusedMin = -1;
1172 MCContext *Ctx = nullptr;
1173 MCSubtargetInfo const *MSTI = nullptr;
1174
1175 void usesSgprAt(int i) {
1176 if (i >= SgprIndexUnusedMin) {
1177 SgprIndexUnusedMin = ++i;
1178 if (Ctx) {
1179 MCSymbol* const Sym =
1180 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1181 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1182 }
1183 }
1184 }
1185
1186 void usesVgprAt(int i) {
1187 if (i >= VgprIndexUnusedMin) {
1188 VgprIndexUnusedMin = ++i;
1189 if (Ctx) {
1190 MCSymbol* const Sym =
1191 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1192 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1193 VgprIndexUnusedMin);
1194 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1195 }
1196 }
1197 }
1198
1199 void usesAgprAt(int i) {
1200 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1201 if (!hasMAIInsts(*MSTI))
1202 return;
1203
1204 if (i >= AgprIndexUnusedMin) {
1205 AgprIndexUnusedMin = ++i;
1206 if (Ctx) {
1207 MCSymbol* const Sym =
1208 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1209 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1210
1211 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1212 MCSymbol* const vSym =
1213 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1214 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1215 VgprIndexUnusedMin);
1216 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1217 }
1218 }
1219 }
1220
1221public:
1222 KernelScopeInfo() = default;
1223
1224 void initialize(MCContext &Context) {
1225 Ctx = &Context;
1226 MSTI = Ctx->getSubtargetInfo();
1227
1228 usesSgprAt(SgprIndexUnusedMin = -1);
1229 usesVgprAt(VgprIndexUnusedMin = -1);
1230 if (hasMAIInsts(*MSTI)) {
1231 usesAgprAt(AgprIndexUnusedMin = -1);
1232 }
1233 }
1234
1235 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1236 unsigned RegWidth) {
1237 switch (RegKind) {
1238 case IS_SGPR:
1239 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1240 break;
1241 case IS_AGPR:
1242 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1243 break;
1244 case IS_VGPR:
1245 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1246 break;
1247 default:
1248 break;
1249 }
1250 }
1251};
1252
1253class AMDGPUAsmParser : public MCTargetAsmParser {
1254 MCAsmParser &Parser;
1255
1256 unsigned ForcedEncodingSize = 0;
1257 bool ForcedDPP = false;
1258 bool ForcedSDWA = false;
1259 KernelScopeInfo KernelScope;
1260
1261 /// @name Auto-generated Match Functions
1262 /// {
1263
1264#define GET_ASSEMBLER_HEADER
1265#include "AMDGPUGenAsmMatcher.inc"
1266
1267 /// }
1268
1269private:
1270 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1271 bool OutOfRangeError(SMRange Range);
1272 /// Calculate VGPR/SGPR blocks required for given target, reserved
1273 /// registers, and user-specified NextFreeXGPR values.
1274 ///
1275 /// \param Features [in] Target features, used for bug corrections.
1276 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1277 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1278 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1279 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1280 /// descriptor field, if valid.
1281 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1282 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1283 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1284 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1285 /// \param VGPRBlocks [out] Result VGPR block count.
1286 /// \param SGPRBlocks [out] Result SGPR block count.
1287 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1288 bool FlatScrUsed, bool XNACKUsed,
1289 std::optional<bool> EnableWavefrontSize32,
1290 unsigned NextFreeVGPR, SMRange VGPRRange,
1291 unsigned NextFreeSGPR, SMRange SGPRRange,
1292 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1293 bool ParseDirectiveAMDGCNTarget();
1294 bool ParseDirectiveAMDHSAKernel();
1295 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1296 bool ParseDirectiveHSACodeObjectVersion();
1297 bool ParseDirectiveHSACodeObjectISA();
1298 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1299 bool ParseDirectiveAMDKernelCodeT();
1300 // TODO: Possibly make subtargetHasRegister const.
1301 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1302 bool ParseDirectiveAMDGPUHsaKernel();
1303
1304 bool ParseDirectiveISAVersion();
1305 bool ParseDirectiveHSAMetadata();
1306 bool ParseDirectivePALMetadataBegin();
1307 bool ParseDirectivePALMetadata();
1308 bool ParseDirectiveAMDGPULDS();
1309
1310 /// Common code to parse out a block of text (typically YAML) between start and
1311 /// end directives.
1312 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1313 const char *AssemblerDirectiveEnd,
1314 std::string &CollectString);
1315
1316 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1317 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1318 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1319 unsigned &RegNum, unsigned &RegWidth,
1320 bool RestoreOnFailure = false);
1321 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1322 unsigned &RegNum, unsigned &RegWidth,
1324 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1325 unsigned &RegWidth,
1327 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1328 unsigned &RegWidth,
1330 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1331 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1332 bool ParseRegRange(unsigned& Num, unsigned& Width);
1333 unsigned getRegularReg(RegisterKind RegKind,
1334 unsigned RegNum,
1335 unsigned RegWidth,
1336 SMLoc Loc);
1337
1338 bool isRegister();
1339 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1340 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1341 void initializeGprCountSymbol(RegisterKind RegKind);
1342 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1343 unsigned RegWidth);
1344 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1345 bool IsAtomic);
1346 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1347 bool IsGdsHardcoded);
1348
1349public:
1350 enum AMDGPUMatchResultTy {
1351 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1352 };
1353 enum OperandMode {
1354 OperandMode_Default,
1355 OperandMode_NSA,
1356 };
1357
1358 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1359
1360 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1361 const MCInstrInfo &MII,
1362 const MCTargetOptions &Options)
1363 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1365
1366 if (getFeatureBits().none()) {
1367 // Set default features.
1368 copySTI().ToggleFeature("southern-islands");
1369 }
1370
1371 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1372
1373 {
1374 // TODO: make those pre-defined variables read-only.
1375 // Currently there is none suitable machinery in the core llvm-mc for this.
1376 // MCSymbol::isRedefinable is intended for another purpose, and
1377 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1379 MCContext &Ctx = getContext();
1380 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1381 MCSymbol *Sym =
1382 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1383 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1384 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1385 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1386 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1387 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1388 } else {
1389 MCSymbol *Sym =
1390 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1391 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1392 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1393 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1394 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1395 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1396 }
1397 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1398 initializeGprCountSymbol(IS_VGPR);
1399 initializeGprCountSymbol(IS_SGPR);
1400 } else
1401 KernelScope.initialize(getContext());
1402 }
1403 }
1404
1405 bool hasMIMG_R128() const {
1406 return AMDGPU::hasMIMG_R128(getSTI());
1407 }
1408
1409 bool hasPackedD16() const {
1410 return AMDGPU::hasPackedD16(getSTI());
1411 }
1412
1413 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1414
1415 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1416
1417 bool isSI() const {
1418 return AMDGPU::isSI(getSTI());
1419 }
1420
1421 bool isCI() const {
1422 return AMDGPU::isCI(getSTI());
1423 }
1424
1425 bool isVI() const {
1426 return AMDGPU::isVI(getSTI());
1427 }
1428
1429 bool isGFX9() const {
1430 return AMDGPU::isGFX9(getSTI());
1431 }
1432
1433 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1434 bool isGFX90A() const {
1435 return AMDGPU::isGFX90A(getSTI());
1436 }
1437
1438 bool isGFX940() const {
1439 return AMDGPU::isGFX940(getSTI());
1440 }
1441
1442 bool isGFX9Plus() const {
1443 return AMDGPU::isGFX9Plus(getSTI());
1444 }
1445
1446 bool isGFX10() const {
1447 return AMDGPU::isGFX10(getSTI());
1448 }
1449
1450 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1451
1452 bool isGFX11() const {
1453 return AMDGPU::isGFX11(getSTI());
1454 }
1455
1456 bool isGFX11Plus() const {
1457 return AMDGPU::isGFX11Plus(getSTI());
1458 }
1459
1460 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1461
1462 bool isGFX10_BEncoding() const {
1464 }
1465
1466 bool hasInv2PiInlineImm() const {
1467 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1468 }
1469
1470 bool hasFlatOffsets() const {
1471 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1472 }
1473
1474 bool hasArchitectedFlatScratch() const {
1475 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1476 }
1477
1478 bool hasSGPR102_SGPR103() const {
1479 return !isVI() && !isGFX9();
1480 }
1481
1482 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1483
1484 bool hasIntClamp() const {
1485 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1486 }
1487
1488 bool hasPartialNSAEncoding() const {
1489 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1490 }
1491
1492 unsigned getNSAMaxSize() const {
1493 return AMDGPU::getNSAMaxSize(getSTI());
1494 }
1495
1496 AMDGPUTargetStreamer &getTargetStreamer() {
1498 return static_cast<AMDGPUTargetStreamer &>(TS);
1499 }
1500
1501 const MCRegisterInfo *getMRI() const {
1502 // We need this const_cast because for some reason getContext() is not const
1503 // in MCAsmParser.
1504 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1505 }
1506
1507 const MCInstrInfo *getMII() const {
1508 return &MII;
1509 }
1510
1511 const FeatureBitset &getFeatureBits() const {
1512 return getSTI().getFeatureBits();
1513 }
1514
1515 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1516 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1517 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1518
1519 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1520 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1521 bool isForcedDPP() const { return ForcedDPP; }
1522 bool isForcedSDWA() const { return ForcedSDWA; }
1523 ArrayRef<unsigned> getMatchedVariants() const;
1524 StringRef getMatchedVariantName() const;
1525
1526 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1527 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1528 bool RestoreOnFailure);
1529 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1530 SMLoc &EndLoc) override;
1532 SMLoc &EndLoc) override;
1533 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1535 unsigned Kind) override;
1536 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1539 bool MatchingInlineAsm) override;
1540 bool ParseDirective(AsmToken DirectiveID) override;
1541 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1542 OperandMode Mode = OperandMode_Default);
1543 StringRef parseMnemonicSuffix(StringRef Name);
1545 SMLoc NameLoc, OperandVector &Operands) override;
1546 //bool ProcessInstruction(MCInst &Inst);
1547
1549
1550 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1551
1553 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1554 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1555 std::function<bool(int64_t &)> ConvertResult = nullptr);
1556
1558 parseOperandArrayWithPrefix(const char *Prefix,
1560 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1561 bool (*ConvertResult)(int64_t&) = nullptr);
1562
1564 parseNamedBit(StringRef Name, OperandVector &Operands,
1565 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1566 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1568 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1570 SMLoc &StringLoc);
1571
1572 bool isModifier();
1573 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1574 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1575 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1576 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1577 bool parseSP3NegModifier();
1578 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1580 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1581 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1582 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1583 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1584 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1585 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1586 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1587 OperandMatchResultTy parseUfmt(int64_t &Format);
1588 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1589 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1591 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1592 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1596 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1597 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1598
1599 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1600 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1601 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1602 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1603
1604 bool parseCnt(int64_t &IntVal);
1605 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1606
1607 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1608 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1610
1611 bool parseDelay(int64_t &Delay);
1612 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1613
1615
1616private:
1617 struct OperandInfoTy {
1618 SMLoc Loc;
1619 int64_t Id;
1620 bool IsSymbolic = false;
1621 bool IsDefined = false;
1622
1623 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1624 };
1625
1626 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1627 bool validateSendMsg(const OperandInfoTy &Msg,
1628 const OperandInfoTy &Op,
1629 const OperandInfoTy &Stream);
1630
1631 bool parseHwregBody(OperandInfoTy &HwReg,
1632 OperandInfoTy &Offset,
1633 OperandInfoTy &Width);
1634 bool validateHwreg(const OperandInfoTy &HwReg,
1635 const OperandInfoTy &Offset,
1636 const OperandInfoTy &Width);
1637
1638 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1639 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1640 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1641
1642 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1643 const OperandVector &Operands) const;
1644 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1645 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1646 SMLoc getLitLoc(const OperandVector &Operands,
1647 bool SearchMandatoryLiterals = false) const;
1648 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1649 SMLoc getConstLoc(const OperandVector &Operands) const;
1650 SMLoc getInstLoc(const OperandVector &Operands) const;
1651
1652 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1653 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1654 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1655 bool validateSOPLiteral(const MCInst &Inst) const;
1656 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1657 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1658 const OperandVector &Operands);
1659 bool validateIntClampSupported(const MCInst &Inst);
1660 bool validateMIMGAtomicDMask(const MCInst &Inst);
1661 bool validateMIMGGatherDMask(const MCInst &Inst);
1662 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1663 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1664 bool validateMIMGAddrSize(const MCInst &Inst);
1665 bool validateMIMGD16(const MCInst &Inst);
1666 bool validateMIMGMSAA(const MCInst &Inst);
1667 bool validateOpSel(const MCInst &Inst);
1668 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1669 bool validateVccOperand(unsigned Reg) const;
1670 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1671 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1672 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1673 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1674 bool validateAGPRLdSt(const MCInst &Inst) const;
1675 bool validateVGPRAlign(const MCInst &Inst) const;
1676 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1677 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1678 bool validateDivScale(const MCInst &Inst);
1679 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1680 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1681 const SMLoc &IDLoc);
1682 bool validateExeczVcczOperands(const OperandVector &Operands);
1683 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1684 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1685 unsigned getConstantBusLimit(unsigned Opcode) const;
1686 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1687 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1688 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1689
1690 bool isSupportedMnemo(StringRef Mnemo,
1691 const FeatureBitset &FBS);
1692 bool isSupportedMnemo(StringRef Mnemo,
1693 const FeatureBitset &FBS,
1694 ArrayRef<unsigned> Variants);
1695 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1696
1697 bool isId(const StringRef Id) const;
1698 bool isId(const AsmToken &Token, const StringRef Id) const;
1699 bool isToken(const AsmToken::TokenKind Kind) const;
1700 StringRef getId() const;
1701 bool trySkipId(const StringRef Id);
1702 bool trySkipId(const StringRef Pref, const StringRef Id);
1703 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1704 bool trySkipToken(const AsmToken::TokenKind Kind);
1705 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1706 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1707 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1708
1709 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1710 AsmToken::TokenKind getTokenKind() const;
1711 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1712 bool parseExpr(OperandVector &Operands);
1713 StringRef getTokenStr() const;
1714 AsmToken peekToken(bool ShouldSkipSpace = true);
1715 AsmToken getToken() const;
1716 SMLoc getLoc() const;
1717 void lex();
1718
1719public:
1720 void onBeginOfFile() override;
1721
1722 OperandMatchResultTy parseCustomOperand(OperandVector &Operands,
1723 unsigned MCK);
1724
1729 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1731
1732 bool parseSwizzleOperand(int64_t &Op,
1733 const unsigned MinVal,
1734 const unsigned MaxVal,
1735 const StringRef ErrMsg,
1736 SMLoc &Loc);
1737 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1738 const unsigned MinVal,
1739 const unsigned MaxVal,
1740 const StringRef ErrMsg);
1742 bool parseSwizzleOffset(int64_t &Imm);
1743 bool parseSwizzleMacro(int64_t &Imm);
1744 bool parseSwizzleQuadPerm(int64_t &Imm);
1745 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1746 bool parseSwizzleBroadcast(int64_t &Imm);
1747 bool parseSwizzleSwap(int64_t &Imm);
1748 bool parseSwizzleReverse(int64_t &Imm);
1749
1751 int64_t parseGPRIdxMacro();
1752
1753 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1754 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1755 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1756
1757 AMDGPUOperand::Ptr defaultCPol() const;
1758
1759 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1760 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1761 AMDGPUOperand::Ptr defaultSMEMOffsetMod() const;
1762 AMDGPUOperand::Ptr defaultFlatOffset() const;
1763
1765
1766 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1767 OptionalImmIndexMap &OptionalIdx);
1768 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1769 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1770 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1771 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1772 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1773 OptionalImmIndexMap &OptionalIdx);
1774 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1775 OptionalImmIndexMap &OptionalIdx);
1776
1777 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1778 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1779
1780 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1781 bool IsAtomic = false);
1782 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1783 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1784
1785 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1786
1787 bool parseDimId(unsigned &Encoding);
1789 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1792 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1793 int64_t parseDPPCtrlSel(StringRef Ctrl);
1794 int64_t parseDPPCtrlPerm();
1795 AMDGPUOperand::Ptr defaultDppRowMask() const;
1796 AMDGPUOperand::Ptr defaultDppBankMask() const;
1797 AMDGPUOperand::Ptr defaultDppBoundCtrl() const;
1798 AMDGPUOperand::Ptr defaultDppFI() const;
1799 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1800 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1801 cvtDPP(Inst, Operands, true);
1802 }
1803 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1804 bool IsDPP8 = false);
1805 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1806 cvtVOP3DPP(Inst, Operands, true);
1807 }
1808
1810 AMDGPUOperand::ImmTy Type);
1811 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1812 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1813 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1814 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1815 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1816 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1817 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1818 uint64_t BasicInstType,
1819 bool SkipDstVcc = false,
1820 bool SkipSrcVcc = false);
1821
1822 AMDGPUOperand::Ptr defaultBLGP() const;
1823 AMDGPUOperand::Ptr defaultCBSZ() const;
1824 AMDGPUOperand::Ptr defaultABID() const;
1825
1827 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1828
1829 AMDGPUOperand::Ptr defaultWaitVDST() const;
1830 AMDGPUOperand::Ptr defaultWaitEXP() const;
1832};
1833
1834} // end anonymous namespace
1835
1836// May be called with integer type with equivalent bitwidth.
1837static const fltSemantics *getFltSemantics(unsigned Size) {
1838 switch (Size) {
1839 case 4:
1840 return &APFloat::IEEEsingle();
1841 case 8:
1842 return &APFloat::IEEEdouble();
1843 case 2:
1844 return &APFloat::IEEEhalf();
1845 default:
1846 llvm_unreachable("unsupported fp type");
1847 }
1848}
1849
1851 return getFltSemantics(VT.getSizeInBits() / 8);
1852}
1853
1855 switch (OperandType) {
1868 return &APFloat::IEEEsingle();
1874 return &APFloat::IEEEdouble();
1889 return &APFloat::IEEEhalf();
1890 default:
1891 llvm_unreachable("unsupported fp type");
1892 }
1893}
1894
1895//===----------------------------------------------------------------------===//
1896// Operand
1897//===----------------------------------------------------------------------===//
1898
1899static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1900 bool Lost;
1901
1902 // Convert literal to single precision
1904 APFloat::rmNearestTiesToEven,
1905 &Lost);
1906 // We allow precision lost but not overflow or underflow
1907 if (Status != APFloat::opOK &&
1908 Lost &&
1909 ((Status & APFloat::opOverflow) != 0 ||
1910 (Status & APFloat::opUnderflow) != 0)) {
1911 return false;
1912 }
1913
1914 return true;
1915}
1916
1917static bool isSafeTruncation(int64_t Val, unsigned Size) {
1918 return isUIntN(Size, Val) || isIntN(Size, Val);
1919}
1920
1921static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1922 if (VT.getScalarType() == MVT::i16) {
1923 // FP immediate values are broken.
1924 return isInlinableIntLiteral(Val);
1925 }
1926
1927 // f16/v2f16 operands work correctly for all values.
1928 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1929}
1930
1931bool AMDGPUOperand::isInlinableImm(MVT type) const {
1932
1933 // This is a hack to enable named inline values like
1934 // shared_base with both 32-bit and 64-bit operands.
1935 // Note that these values are defined as
1936 // 32-bit operands only.
1937 if (isInlineValue()) {
1938 return true;
1939 }
1940
1941 if (!isImmTy(ImmTyNone)) {
1942 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1943 return false;
1944 }
1945 // TODO: We should avoid using host float here. It would be better to
1946 // check the float bit values which is what a few other places do.
1947 // We've had bot failures before due to weird NaN support on mips hosts.
1948
1949 APInt Literal(64, Imm.Val);
1950
1951 if (Imm.IsFPImm) { // We got fp literal token
1952 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1954 AsmParser->hasInv2PiInlineImm());
1955 }
1956
1957 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1958 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1959 return false;
1960
1961 if (type.getScalarSizeInBits() == 16) {
1963 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1964 type, AsmParser->hasInv2PiInlineImm());
1965 }
1966
1967 // Check if single precision literal is inlinable
1969 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1970 AsmParser->hasInv2PiInlineImm());
1971 }
1972
1973 // We got int literal token.
1974 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1976 AsmParser->hasInv2PiInlineImm());
1977 }
1978
1979 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1980 return false;
1981 }
1982
1983 if (type.getScalarSizeInBits() == 16) {
1985 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1986 type, AsmParser->hasInv2PiInlineImm());
1987 }
1988
1990 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1991 AsmParser->hasInv2PiInlineImm());
1992}
1993
1994bool AMDGPUOperand::isLiteralImm(MVT type) const {
1995 // Check that this immediate can be added as literal
1996 if (!isImmTy(ImmTyNone)) {
1997 return false;
1998 }
1999
2000 if (!Imm.IsFPImm) {
2001 // We got int literal token.
2002
2003 if (type == MVT::f64 && hasFPModifiers()) {
2004 // Cannot apply fp modifiers to int literals preserving the same semantics
2005 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2006 // disable these cases.
2007 return false;
2008 }
2009
2010 unsigned Size = type.getSizeInBits();
2011 if (Size == 64)
2012 Size = 32;
2013
2014 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2015 // types.
2016 return isSafeTruncation(Imm.Val, Size);
2017 }
2018
2019 // We got fp literal token
2020 if (type == MVT::f64) { // Expected 64-bit fp operand
2021 // We would set low 64-bits of literal to zeroes but we accept this literals
2022 return true;
2023 }
2024
2025 if (type == MVT::i64) { // Expected 64-bit int operand
2026 // We don't allow fp literals in 64-bit integer instructions. It is
2027 // unclear how we should encode them.
2028 return false;
2029 }
2030
2031 // We allow fp literals with f16x2 operands assuming that the specified
2032 // literal goes into the lower half and the upper half is zero. We also
2033 // require that the literal may be losslessly converted to f16.
2034 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2035 (type == MVT::v2i16)? MVT::i16 :
2036 (type == MVT::v2f32)? MVT::f32 : type;
2037
2038 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2039 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2040}
2041
2042bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2043 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2044}
2045
2046bool AMDGPUOperand::isVRegWithInputMods() const {
2047 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2048 // GFX90A allows DPP on 64-bit operands.
2049 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2050 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2051}
2052
2053bool AMDGPUOperand::isT16VRegWithInputMods() const {
2054 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2055}
2056
2057bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2058 if (AsmParser->isVI())
2059 return isVReg32();
2060 else if (AsmParser->isGFX9Plus())
2061 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2062 else
2063 return false;
2064}
2065
2066bool AMDGPUOperand::isSDWAFP16Operand() const {
2067 return isSDWAOperand(MVT::f16);
2068}
2069
2070bool AMDGPUOperand::isSDWAFP32Operand() const {
2071 return isSDWAOperand(MVT::f32);
2072}
2073
2074bool AMDGPUOperand::isSDWAInt16Operand() const {
2075 return isSDWAOperand(MVT::i16);
2076}
2077
2078bool AMDGPUOperand::isSDWAInt32Operand() const {
2079 return isSDWAOperand(MVT::i32);
2080}
2081
2082bool AMDGPUOperand::isBoolReg() const {
2083 auto FB = AsmParser->getFeatureBits();
2084 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2085 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2086}
2087
2088uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2089{
2090 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2091 assert(Size == 2 || Size == 4 || Size == 8);
2092
2093 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2094
2095 if (Imm.Mods.Abs) {
2096 Val &= ~FpSignMask;
2097 }
2098 if (Imm.Mods.Neg) {
2099 Val ^= FpSignMask;
2100 }
2101
2102 return Val;
2103}
2104
2105void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2106 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2107 Inst.getNumOperands())) {
2108 addLiteralImmOperand(Inst, Imm.Val,
2109 ApplyModifiers &
2110 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2111 } else {
2112 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2114 setImmKindNone();
2115 }
2116}
2117
2118void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2119 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2120 auto OpNum = Inst.getNumOperands();
2121 // Check that this operand accepts literals
2122 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2123
2124 if (ApplyModifiers) {
2125 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2126 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2127 Val = applyInputFPModifiers(Val, Size);
2128 }
2129
2130 APInt Literal(64, Val);
2131 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2132
2133 if (Imm.IsFPImm) { // We got fp literal token
2134 switch (OpTy) {
2140 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2141 AsmParser->hasInv2PiInlineImm())) {
2142 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2143 setImmKindConst();
2144 return;
2145 }
2146
2147 // Non-inlineable
2148 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2149 // For fp operands we check if low 32 bits are zeros
2150 if (Literal.getLoBits(32) != 0) {
2151 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2152 "Can't encode literal as exact 64-bit floating-point operand. "
2153 "Low 32-bits will be set to zero");
2154 }
2155
2156 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2157 setImmKindLiteral();
2158 return;
2159 }
2160
2161 // We don't allow fp literals in 64-bit integer instructions. It is
2162 // unclear how we should encode them. This case should be checked earlier
2163 // in predicate methods (isLiteralImm())
2164 llvm_unreachable("fp literal in 64-bit integer instruction.");
2165
2192 bool lost;
2193 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2194 // Convert literal to single precision
2195 FPLiteral.convert(*getOpFltSemantics(OpTy),
2196 APFloat::rmNearestTiesToEven, &lost);
2197 // We allow precision lost but not overflow or underflow. This should be
2198 // checked earlier in isLiteralImm()
2199
2200 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2201 Inst.addOperand(MCOperand::createImm(ImmVal));
2202 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2203 setImmKindMandatoryLiteral();
2204 } else {
2205 setImmKindLiteral();
2206 }
2207 return;
2208 }
2209 default:
2210 llvm_unreachable("invalid operand size");
2211 }
2212
2213 return;
2214 }
2215
2216 // We got int literal token.
2217 // Only sign extend inline immediates.
2218 switch (OpTy) {
2232 if (isSafeTruncation(Val, 32) &&
2233 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2234 AsmParser->hasInv2PiInlineImm())) {
2236 setImmKindConst();
2237 return;
2238 }
2239
2240 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2241 setImmKindLiteral();
2242 return;
2243
2249 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2251 setImmKindConst();
2252 return;
2253 }
2254
2256 setImmKindLiteral();
2257 return;
2258
2266 if (isSafeTruncation(Val, 16) &&
2267 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2268 AsmParser->hasInv2PiInlineImm())) {
2270 setImmKindConst();
2271 return;
2272 }
2273
2274 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2275 setImmKindLiteral();
2276 return;
2277
2282 assert(isSafeTruncation(Val, 16));
2283 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2284 AsmParser->hasInv2PiInlineImm()));
2285
2287 return;
2288 }
2290 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2291 setImmKindMandatoryLiteral();
2292 return;
2294 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2295 setImmKindMandatoryLiteral();
2296 return;
2297 default:
2298 llvm_unreachable("invalid operand size");
2299 }
2300}
2301
2302template <unsigned Bitwidth>
2303void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2304 APInt Literal(64, Imm.Val);
2305 setImmKindMandatoryLiteral();
2306
2307 if (!Imm.IsFPImm) {
2308 // We got int literal token.
2309 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2310 return;
2311 }
2312
2313 bool Lost;
2314 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2315 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2316 APFloat::rmNearestTiesToEven, &Lost);
2317 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2318}
2319
2320void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2321 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2322}
2323
2324bool AMDGPUOperand::isInlineValue() const {
2325 return isRegKind() && ::isInlineValue(getReg());
2326}
2327
2328//===----------------------------------------------------------------------===//
2329// AsmParser
2330//===----------------------------------------------------------------------===//
2331
2332static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2333 if (Is == IS_VGPR) {
2334 switch (RegWidth) {
2335 default: return -1;
2336 case 32:
2337 return AMDGPU::VGPR_32RegClassID;
2338 case 64:
2339 return AMDGPU::VReg_64RegClassID;
2340 case 96:
2341 return AMDGPU::VReg_96RegClassID;
2342 case 128:
2343 return AMDGPU::VReg_128RegClassID;
2344 case 160:
2345 return AMDGPU::VReg_160RegClassID;
2346 case 192:
2347 return AMDGPU::VReg_192RegClassID;
2348 case 224:
2349 return AMDGPU::VReg_224RegClassID;
2350 case 256:
2351 return AMDGPU::VReg_256RegClassID;
2352 case 288:
2353 return AMDGPU::VReg_288RegClassID;
2354 case 320:
2355 return AMDGPU::VReg_320RegClassID;
2356 case 352:
2357 return AMDGPU::VReg_352RegClassID;
2358 case 384:
2359 return AMDGPU::VReg_384RegClassID;
2360 case 512:
2361 return AMDGPU::VReg_512RegClassID;
2362 case 1024:
2363 return AMDGPU::VReg_1024RegClassID;
2364 }
2365 } else if (Is == IS_TTMP) {
2366 switch (RegWidth) {
2367 default: return -1;
2368 case 32:
2369 return AMDGPU::TTMP_32RegClassID;
2370 case 64:
2371 return AMDGPU::TTMP_64RegClassID;
2372 case 128:
2373 return AMDGPU::TTMP_128RegClassID;
2374 case 256:
2375 return AMDGPU::TTMP_256RegClassID;
2376 case 512:
2377 return AMDGPU::TTMP_512RegClassID;
2378 }
2379 } else if (Is == IS_SGPR) {
2380 switch (RegWidth) {
2381 default: return -1;
2382 case 32:
2383 return AMDGPU::SGPR_32RegClassID;
2384 case 64:
2385 return AMDGPU::SGPR_64RegClassID;
2386 case 96:
2387 return AMDGPU::SGPR_96RegClassID;
2388 case 128:
2389 return AMDGPU::SGPR_128RegClassID;
2390 case 160:
2391 return AMDGPU::SGPR_160RegClassID;
2392 case 192:
2393 return AMDGPU::SGPR_192RegClassID;
2394 case 224:
2395 return AMDGPU::SGPR_224RegClassID;
2396 case 256:
2397 return AMDGPU::SGPR_256RegClassID;
2398 case 288:
2399 return AMDGPU::SGPR_288RegClassID;
2400 case 320:
2401 return AMDGPU::SGPR_320RegClassID;
2402 case 352:
2403 return AMDGPU::SGPR_352RegClassID;
2404 case 384:
2405 return AMDGPU::SGPR_384RegClassID;
2406 case 512:
2407 return AMDGPU::SGPR_512RegClassID;
2408 }
2409 } else if (Is == IS_AGPR) {
2410 switch (RegWidth) {
2411 default: return -1;
2412 case 32:
2413 return AMDGPU::AGPR_32RegClassID;
2414 case 64:
2415 return AMDGPU::AReg_64RegClassID;
2416 case 96:
2417 return AMDGPU::AReg_96RegClassID;
2418 case 128:
2419 return AMDGPU::AReg_128RegClassID;
2420 case 160:
2421 return AMDGPU::AReg_160RegClassID;
2422 case 192:
2423 return AMDGPU::AReg_192RegClassID;
2424 case 224:
2425 return AMDGPU::AReg_224RegClassID;
2426 case 256:
2427 return AMDGPU::AReg_256RegClassID;
2428 case 288:
2429 return AMDGPU::AReg_288RegClassID;
2430 case 320:
2431 return AMDGPU::AReg_320RegClassID;
2432 case 352:
2433 return AMDGPU::AReg_352RegClassID;
2434 case 384:
2435 return AMDGPU::AReg_384RegClassID;
2436 case 512:
2437 return AMDGPU::AReg_512RegClassID;
2438 case 1024:
2439 return AMDGPU::AReg_1024RegClassID;
2440 }
2441 }
2442 return -1;
2443}
2444
2447 .Case("exec", AMDGPU::EXEC)
2448 .Case("vcc", AMDGPU::VCC)
2449 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2450 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2451 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2452 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2453 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2454 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2455 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2456 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2457 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2458 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2459 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2460 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2461 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2462 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2463 .Case("m0", AMDGPU::M0)
2464 .Case("vccz", AMDGPU::SRC_VCCZ)
2465 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2466 .Case("execz", AMDGPU::SRC_EXECZ)
2467 .Case("src_execz", AMDGPU::SRC_EXECZ)
2468 .Case("scc", AMDGPU::SRC_SCC)
2469 .Case("src_scc", AMDGPU::SRC_SCC)
2470 .Case("tba", AMDGPU::TBA)
2471 .Case("tma", AMDGPU::TMA)
2472 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2473 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2474 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2475 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2476 .Case("vcc_lo", AMDGPU::VCC_LO)
2477 .Case("vcc_hi", AMDGPU::VCC_HI)
2478 .Case("exec_lo", AMDGPU::EXEC_LO)
2479 .Case("exec_hi", AMDGPU::EXEC_HI)
2480 .Case("tma_lo", AMDGPU::TMA_LO)
2481 .Case("tma_hi", AMDGPU::TMA_HI)
2482 .Case("tba_lo", AMDGPU::TBA_LO)
2483 .Case("tba_hi", AMDGPU::TBA_HI)
2484 .Case("pc", AMDGPU::PC_REG)
2485 .Case("null", AMDGPU::SGPR_NULL)
2486 .Default(AMDGPU::NoRegister);
2487}
2488
2489bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2490 SMLoc &EndLoc, bool RestoreOnFailure) {
2491 auto R = parseRegister();
2492 if (!R) return true;
2493 assert(R->isReg());
2494 RegNo = R->getReg();
2495 StartLoc = R->getStartLoc();
2496 EndLoc = R->getEndLoc();
2497 return false;
2498}
2499
2500bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2501 SMLoc &EndLoc) {
2502 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2503}
2504
2505OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
2506 SMLoc &StartLoc,
2507 SMLoc &EndLoc) {
2508 bool Result =
2509 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2510 bool PendingErrors = getParser().hasPendingError();
2511 getParser().clearPendingErrors();
2512 if (PendingErrors)
2514 if (Result)
2515 return MatchOperand_NoMatch;
2516 return MatchOperand_Success;
2517}
2518
2519bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2520 RegisterKind RegKind, unsigned Reg1,
2521 SMLoc Loc) {
2522 switch (RegKind) {
2523 case IS_SPECIAL:
2524 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2525 Reg = AMDGPU::EXEC;
2526 RegWidth = 64;
2527 return true;
2528 }
2529 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2530 Reg = AMDGPU::FLAT_SCR;
2531 RegWidth = 64;
2532 return true;
2533 }
2534 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2535 Reg = AMDGPU::XNACK_MASK;
2536 RegWidth = 64;
2537 return true;
2538 }
2539 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2540 Reg = AMDGPU::VCC;
2541 RegWidth = 64;
2542 return true;
2543 }
2544 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2545 Reg = AMDGPU::TBA;
2546 RegWidth = 64;
2547 return true;
2548 }
2549 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2550 Reg = AMDGPU::TMA;
2551 RegWidth = 64;
2552 return true;
2553 }
2554 Error(Loc, "register does not fit in the list");
2555 return false;
2556 case IS_VGPR:
2557 case IS_SGPR:
2558 case IS_AGPR:
2559 case IS_TTMP:
2560 if (Reg1 != Reg + RegWidth / 32) {
2561 Error(Loc, "registers in a list must have consecutive indices");
2562 return false;
2563 }
2564 RegWidth += 32;
2565 return true;
2566 default:
2567 llvm_unreachable("unexpected register kind");
2568 }
2569}
2570
2571struct RegInfo {
2573 RegisterKind Kind;
2574};
2575
2576static constexpr RegInfo RegularRegisters[] = {
2577 {{"v"}, IS_VGPR},
2578 {{"s"}, IS_SGPR},
2579 {{"ttmp"}, IS_TTMP},
2580 {{"acc"}, IS_AGPR},
2581 {{"a"}, IS_AGPR},
2582};
2583
2584static bool isRegularReg(RegisterKind Kind) {
2585 return Kind == IS_VGPR ||
2586 Kind == IS_SGPR ||
2587 Kind == IS_TTMP ||
2588 Kind == IS_AGPR;
2589}
2590
2592 for (const RegInfo &Reg : RegularRegisters)
2593 if (Str.startswith(Reg.Name))
2594 return &Reg;
2595 return nullptr;
2596}
2597
2598static bool getRegNum(StringRef Str, unsigned& Num) {
2599 return !Str.getAsInteger(10, Num);
2600}
2601
2602bool
2603AMDGPUAsmParser::isRegister(const AsmToken &Token,
2604 const AsmToken &NextToken) const {
2605
2606 // A list of consecutive registers: [s0,s1,s2,s3]
2607 if (Token.is(AsmToken::LBrac))
2608 return true;
2609
2610 if (!Token.is(AsmToken::Identifier))
2611 return false;
2612
2613 // A single register like s0 or a range of registers like s[0:1]
2614
2615 StringRef Str = Token.getString();
2616 const RegInfo *Reg = getRegularRegInfo(Str);
2617 if (Reg) {
2618 StringRef RegName = Reg->Name;
2619 StringRef RegSuffix = Str.substr(RegName.size());
2620 if (!RegSuffix.empty()) {
2621 unsigned Num;
2622 // A single register with an index: rXX
2623 if (getRegNum(RegSuffix, Num))
2624 return true;
2625 } else {
2626 // A range of registers: r[XX:YY].
2627 if (NextToken.is(AsmToken::LBrac))
2628 return true;
2629 }
2630 }
2631
2632 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2633}
2634
2635bool
2636AMDGPUAsmParser::isRegister()
2637{
2638 return isRegister(getToken(), peekToken());
2639}
2640
2641unsigned
2642AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2643 unsigned RegNum,
2644 unsigned RegWidth,
2645 SMLoc Loc) {
2646
2647 assert(isRegularReg(RegKind));
2648
2649 unsigned AlignSize = 1;
2650 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2651 // SGPR and TTMP registers must be aligned.
2652 // Max required alignment is 4 dwords.
2653 AlignSize = std::min(RegWidth / 32, 4u);
2654 }
2655
2656 if (RegNum % AlignSize != 0) {
2657 Error(Loc, "invalid register alignment");
2658 return AMDGPU::NoRegister;
2659 }
2660
2661 unsigned RegIdx = RegNum / AlignSize;
2662 int RCID = getRegClass(RegKind, RegWidth);
2663 if (RCID == -1) {
2664 Error(Loc, "invalid or unsupported register size");
2665 return AMDGPU::NoRegister;
2666 }
2667
2668 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2669 const MCRegisterClass RC = TRI->getRegClass(RCID);
2670 if (RegIdx >= RC.getNumRegs()) {
2671 Error(Loc, "register index is out of range");
2672 return AMDGPU::NoRegister;
2673 }
2674
2675 return RC.getRegister(RegIdx);
2676}
2677
2678bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2679 int64_t RegLo, RegHi;
2680 if (!skipToken(AsmToken::LBrac, "missing register index"))
2681 return false;
2682
2683 SMLoc FirstIdxLoc = getLoc();
2684 SMLoc SecondIdxLoc;
2685
2686 if (!parseExpr(RegLo))
2687 return false;
2688
2689 if (trySkipToken(AsmToken::Colon)) {
2690 SecondIdxLoc = getLoc();
2691 if (!parseExpr(RegHi))
2692 return false;
2693 } else {
2694 RegHi = RegLo;
2695 }
2696
2697 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2698 return false;
2699
2700 if (!isUInt<32>(RegLo)) {
2701 Error(FirstIdxLoc, "invalid register index");
2702 return false;
2703 }
2704
2705 if (!isUInt<32>(RegHi)) {
2706 Error(SecondIdxLoc, "invalid register index");
2707 return false;
2708 }
2709
2710 if (RegLo > RegHi) {
2711 Error(FirstIdxLoc, "first register index should not exceed second index");
2712 return false;
2713 }
2714
2715 Num = static_cast<unsigned>(RegLo);
2716 RegWidth = 32 * ((RegHi - RegLo) + 1);
2717 return true;
2718}
2719
2720unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2721 unsigned &RegNum, unsigned &RegWidth,
2722 SmallVectorImpl<AsmToken> &Tokens) {
2723 assert(isToken(AsmToken::Identifier));
2724 unsigned Reg = getSpecialRegForName(getTokenStr());
2725 if (Reg) {
2726 RegNum = 0;
2727 RegWidth = 32;
2728 RegKind = IS_SPECIAL;
2729 Tokens.push_back(getToken());
2730 lex(); // skip register name
2731 }
2732 return Reg;
2733}
2734
2735unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2736 unsigned &RegNum, unsigned &RegWidth,
2737 SmallVectorImpl<AsmToken> &Tokens) {
2738 assert(isToken(AsmToken::Identifier));
2739 StringRef RegName = getTokenStr();
2740 auto Loc = getLoc();
2741
2742 const RegInfo *RI = getRegularRegInfo(RegName);
2743 if (!RI) {
2744 Error(Loc, "invalid register name");
2745 return AMDGPU::NoRegister;
2746 }
2747
2748 Tokens.push_back(getToken());
2749 lex(); // skip register name
2750
2751 RegKind = RI->Kind;
2752 StringRef RegSuffix = RegName.substr(RI->Name.size());
2753 if (!RegSuffix.empty()) {
2754 // Single 32-bit register: vXX.
2755 if (!getRegNum(RegSuffix, RegNum)) {
2756 Error(Loc, "invalid register index");
2757 return AMDGPU::NoRegister;
2758 }
2759 RegWidth = 32;
2760 } else {
2761 // Range of registers: v[XX:YY]. ":YY" is optional.
2762 if (!ParseRegRange(RegNum, RegWidth))
2763 return AMDGPU::NoRegister;
2764 }
2765
2766 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2767}
2768
2769unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2770 unsigned &RegWidth,
2771 SmallVectorImpl<AsmToken> &Tokens) {
2772 unsigned Reg = AMDGPU::NoRegister;
2773 auto ListLoc = getLoc();
2774
2775 if (!skipToken(AsmToken::LBrac,
2776 "expected a register or a list of registers")) {
2777 return AMDGPU::NoRegister;
2778 }
2779
2780 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2781
2782 auto Loc = getLoc();
2783 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2784 return AMDGPU::NoRegister;
2785 if (RegWidth != 32) {
2786 Error(Loc, "expected a single 32-bit register");
2787 return AMDGPU::NoRegister;
2788 }
2789
2790 for (; trySkipToken(AsmToken::Comma); ) {
2791 RegisterKind NextRegKind;
2792 unsigned NextReg, NextRegNum, NextRegWidth;
2793 Loc = getLoc();
2794
2795 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2796 NextRegNum, NextRegWidth,
2797 Tokens)) {
2798 return AMDGPU::NoRegister;
2799 }
2800 if (NextRegWidth != 32) {
2801 Error(Loc, "expected a single 32-bit register");
2802 return AMDGPU::NoRegister;
2803 }
2804 if (NextRegKind != RegKind) {
2805 Error(Loc, "registers in a list must be of the same kind");
2806 return AMDGPU::NoRegister;
2807 }
2808 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2809 return AMDGPU::NoRegister;
2810 }
2811
2812 if (!skipToken(AsmToken::RBrac,
2813 "expected a comma or a closing square bracket")) {
2814 return AMDGPU::NoRegister;
2815 }
2816
2817 if (isRegularReg(RegKind))
2818 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2819
2820 return Reg;
2821}
2822
2823bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2824 unsigned &RegNum, unsigned &RegWidth,
2825 SmallVectorImpl<AsmToken> &Tokens) {
2826 auto Loc = getLoc();
2827 Reg = AMDGPU::NoRegister;
2828
2829 if (isToken(AsmToken::Identifier)) {
2830 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2831 if (Reg == AMDGPU::NoRegister)
2832 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2833 } else {
2834 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2835 }
2836
2837 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2838 if (Reg == AMDGPU::NoRegister) {
2839 assert(Parser.hasPendingError());
2840 return false;
2841 }
2842
2843 if (!subtargetHasRegister(*TRI, Reg)) {
2844 if (Reg == AMDGPU::SGPR_NULL) {
2845 Error(Loc, "'null' operand is not supported on this GPU");
2846 } else {
2847 Error(Loc, "register not available on this GPU");
2848 }
2849 return false;
2850 }
2851
2852 return true;
2853}
2854
2855bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2856 unsigned &RegNum, unsigned &RegWidth,
2857 bool RestoreOnFailure /*=false*/) {
2858 Reg = AMDGPU::NoRegister;
2859
2861 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2862 if (RestoreOnFailure) {
2863 while (!Tokens.empty()) {
2864 getLexer().UnLex(Tokens.pop_back_val());
2865 }
2866 }
2867 return true;
2868 }
2869 return false;
2870}
2871
2872std::optional<StringRef>
2873AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2874 switch (RegKind) {
2875 case IS_VGPR:
2876 return StringRef(".amdgcn.next_free_vgpr");
2877 case IS_SGPR:
2878 return StringRef(".amdgcn.next_free_sgpr");
2879 default:
2880 return std::nullopt;
2881 }
2882}
2883
2884void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2885 auto SymbolName = getGprCountSymbolName(RegKind);
2886 assert(SymbolName && "initializing invalid register kind");
2887 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2888 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2889}
2890
2891bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2892 unsigned DwordRegIndex,
2893 unsigned RegWidth) {
2894 // Symbols are only defined for GCN targets
2895 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2896 return true;
2897
2898 auto SymbolName = getGprCountSymbolName(RegKind);
2899 if (!SymbolName)
2900 return true;
2901 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2902
2903 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2904 int64_t OldCount;
2905
2906 if (!Sym->isVariable())
2907 return !Error(getLoc(),
2908 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2909 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2910 return !Error(
2911 getLoc(),
2912 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2913
2914 if (OldCount <= NewMax)
2915 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2916
2917 return true;
2918}
2919
2920std::unique_ptr<AMDGPUOperand>
2921AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2922 const auto &Tok = getToken();
2923 SMLoc StartLoc = Tok.getLoc();
2924 SMLoc EndLoc = Tok.getEndLoc();
2925 RegisterKind RegKind;
2926 unsigned Reg, RegNum, RegWidth;
2927
2928 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2929 return nullptr;
2930 }
2931 if (isHsaAbiVersion3AndAbove(&getSTI())) {
2932 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2933 return nullptr;
2934 } else
2935 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2936 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2937}
2938
2940AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2941 // TODO: add syntactic sugar for 1/(2*PI)
2942
2943 if (isRegister())
2944 return MatchOperand_NoMatch;
2945 assert(!isModifier());
2946
2947 const auto& Tok = getToken();
2948 const auto& NextTok = peekToken();
2949 bool IsReal = Tok.is(AsmToken::Real);
2950 SMLoc S = getLoc();
2951 bool Negate = false;
2952
2953 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2954 lex();
2955 IsReal = true;
2956 Negate = true;
2957 }
2958
2959 if (IsReal) {
2960 // Floating-point expressions are not supported.
2961 // Can only allow floating-point literals with an
2962 // optional sign.
2963
2964 StringRef Num = getTokenStr();
2965 lex();
2966
2967 APFloat RealVal(APFloat::IEEEdouble());
2968 auto roundMode = APFloat::rmNearestTiesToEven;
2969 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2971 }
2972 if (Negate)
2973 RealVal.changeSign();
2974
2975 Operands.push_back(
2976 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2977 AMDGPUOperand::ImmTyNone, true));
2978
2979 return MatchOperand_Success;
2980
2981 } else {
2982 int64_t IntVal;
2983 const MCExpr *Expr;
2984 SMLoc S = getLoc();
2985
2986 if (HasSP3AbsModifier) {
2987 // This is a workaround for handling expressions
2988 // as arguments of SP3 'abs' modifier, for example:
2989 // |1.0|
2990 // |-1|
2991 // |1+x|
2992 // This syntax is not compatible with syntax of standard
2993 // MC expressions (due to the trailing '|').
2994 SMLoc EndLoc;
2995 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2997 } else {
2998 if (Parser.parseExpression(Expr))
3000 }
3001
3002 if (Expr->evaluateAsAbsolute(IntVal)) {
3003 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3004 } else {
3005 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3006 }
3007
3008 return MatchOperand_Success;
3009 }
3010
3011 return MatchOperand_NoMatch;
3012}
3013
3015AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3016 if (!isRegister())
3017 return MatchOperand_NoMatch;
3018
3019 if (auto R = parseRegister()) {
3020 assert(R->isReg());
3021 Operands.push_back(std::move(R));
3022 return MatchOperand_Success;
3023 }
3025}
3026
3028AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
3029 auto res = parseReg(Operands);
3030 if (res != MatchOperand_NoMatch) {
3031 return res;
3032 } else if (isModifier()) {
3033 return MatchOperand_NoMatch;
3034 } else {
3035 return parseImm(Operands, HasSP3AbsMod);
3036 }
3037}
3038
3039bool
3040AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3041 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3042 const auto &str = Token.getString();
3043 return str == "abs" || str == "neg" || str == "sext";
3044 }
3045 return false;
3046}
3047
3048bool
3049AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3050 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3051}
3052
3053bool
3054AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3055 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3056}
3057
3058bool
3059AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3060 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3061}
3062
3063// Check if this is an operand modifier or an opcode modifier
3064// which may look like an expression but it is not. We should
3065// avoid parsing these modifiers as expressions. Currently
3066// recognized sequences are:
3067// |...|
3068// abs(...)
3069// neg(...)
3070// sext(...)
3071// -reg
3072// -|...|
3073// -abs(...)
3074// name:...
3075//
3076bool
3077AMDGPUAsmParser::isModifier() {
3078
3079 AsmToken Tok = getToken();
3080 AsmToken NextToken[2];
3081 peekTokens(NextToken);
3082
3083 return isOperandModifier(Tok, NextToken[0]) ||
3084 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3085 isOpcodeModifierWithVal(Tok, NextToken[0]);
3086}
3087
3088// Check if the current token is an SP3 'neg' modifier.
3089// Currently this modifier is allowed in the following context:
3090//
3091// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3092// 2. Before an 'abs' modifier: -abs(...)
3093// 3. Before an SP3 'abs' modifier: -|...|
3094//
3095// In all other cases "-" is handled as a part
3096// of an expression that follows the sign.
3097//
3098// Note: When "-" is followed by an integer literal,
3099// this is interpreted as integer negation rather
3100// than a floating-point NEG modifier applied to N.
3101// Beside being contr-intuitive, such use of floating-point
3102// NEG modifier would have resulted in different meaning
3103// of integer literals used with VOP1/2/C and VOP3,
3104// for example:
3105// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3106// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3107// Negative fp literals with preceding "-" are
3108// handled likewise for uniformity
3109//
3110bool
3111AMDGPUAsmParser::parseSP3NegModifier() {
3112
3113 AsmToken NextToken[2];
3114 peekTokens(NextToken);
3115
3116 if (isToken(AsmToken::Minus) &&
3117 (isRegister(NextToken[0], NextToken[1]) ||
3118 NextToken[0].is(AsmToken::Pipe) ||
3119 isId(NextToken[0], "abs"))) {
3120 lex();
3121 return true;
3122 }
3123
3124 return false;
3125}
3126
3128AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3129 bool AllowImm) {
3130 bool Neg, SP3Neg;
3131 bool Abs, SP3Abs;
3132 SMLoc Loc;
3133
3134 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3135 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3136 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3138 }
3139
3140 SP3Neg = parseSP3NegModifier();
3141
3142 Loc = getLoc();
3143 Neg = trySkipId("neg");
3144 if (Neg && SP3Neg) {
3145 Error(Loc, "expected register or immediate");
3147 }
3148 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3150
3151 Abs = trySkipId("abs");
3152 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3154
3155 Loc = getLoc();
3156 SP3Abs = trySkipToken(AsmToken::Pipe);
3157 if (Abs && SP3Abs) {
3158 Error(Loc, "expected register or immediate");
3160 }
3161
3163 if (AllowImm) {
3164 Res = parseRegOrImm(Operands, SP3Abs);
3165 } else {
3166 Res = parseReg(Operands);
3167 }
3168 if (Res != MatchOperand_Success) {
3169 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3170 }
3171
3172 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3174 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3176 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3178
3179 AMDGPUOperand::Modifiers Mods;
3180 Mods.Abs = Abs || SP3Abs;
3181 Mods.Neg = Neg || SP3Neg;
3182
3183 if (Mods.hasFPModifiers()) {
3184 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3185 if (Op.isExpr()) {
3186 Error(Op.getStartLoc(), "expected an absolute expression");
3188 }
3189 Op.setModifiers(Mods);
3190 }
3191 return MatchOperand_Success;
3192}
3193
3195AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3196 bool AllowImm) {
3197 bool Sext = trySkipId("sext");
3198 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3200
3202 if (AllowImm) {
3203 Res = parseRegOrImm(Operands);
3204 } else {
3205 Res = parseReg(Operands);
3206 }
3207 if (Res != MatchOperand_Success) {
3208 return Sext? MatchOperand_ParseFail : Res;
3209 }
3210
3211 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3213
3214 AMDGPUOperand::Modifiers Mods;
3215 Mods.Sext = Sext;
3216
3217 if (Mods.hasIntModifiers()) {
3218 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3219 if (Op.isExpr()) {
3220 Error(Op.getStartLoc(), "expected an absolute expression");
3222 }
3223 Op.setModifiers(Mods);
3224 }
3225
3226 return MatchOperand_Success;
3227}
3228
3230AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3231 return parseRegOrImmWithFPInputMods(Operands, false);
3232}
3233
3235AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3236 return parseRegOrImmWithIntInputMods(Operands, false);
3237}
3238
3239OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3240 auto Loc = getLoc();
3241 if (trySkipId("off")) {
3242 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3243 AMDGPUOperand::ImmTyOff, false));
3244 return MatchOperand_Success;
3245 }
3246
3247 if (!isRegister())
3248 return MatchOperand_NoMatch;
3249
3250 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3251 if (Reg) {
3252 Operands.push_back(std::move(Reg));
3253 return MatchOperand_Success;
3254 }
3255
3257
3258}
3259
3260unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3261 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3262
3263 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3264 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3265 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3266 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3267 return Match_InvalidOperand;
3268
3269 if ((TSFlags & SIInstrFlags::VOP3) &&
3271 getForcedEncodingSize() != 64)
3272 return Match_PreferE32;
3273
3274 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3275 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3276 // v_mac_f32/16 allow only dst_sel == DWORD;
3277 auto OpNum =
3278 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3279 const auto &Op = Inst.getOperand(OpNum);
3280 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3281 return Match_InvalidOperand;
3282 }
3283 }
3284
3285 return Match_Success;
3286}
3287
3289 static const unsigned Variants[] = {
3293 };
3294
3295 return ArrayRef(Variants);
3296}
3297
3298// What asm variants we should check
3299ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3300 if (isForcedDPP() && isForcedVOP3()) {
3301 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3302 return ArrayRef(Variants);
3303 }
3304 if (getForcedEncodingSize() == 32) {
3305 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3306 return ArrayRef(Variants);
3307 }
3308
3309 if (isForcedVOP3()) {
3310 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3311 return ArrayRef(Variants);
3312 }
3313
3314 if (isForcedSDWA()) {
3315 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3317 return ArrayRef(Variants);
3318 }
3319
3320 if (isForcedDPP()) {
3321 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3322 return ArrayRef(Variants);
3323 }
3324
3325 return getAllVariants();
3326}
3327
3328StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3329 if (isForcedDPP() && isForcedVOP3())
3330 return "e64_dpp";
3331
3332 if (getForcedEncodingSize() == 32)
3333 return "e32";
3334
3335 if (isForcedVOP3())
3336 return "e64";
3337
3338 if (isForcedSDWA())
3339 return "sdwa";
3340
3341 if (isForcedDPP())
3342 return "dpp";
3343
3344 return "";
3345}
3346
3347unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3348 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3349 for (MCPhysReg Reg : Desc.implicit_uses()) {
3350 switch (Reg) {
3351 case AMDGPU::FLAT_SCR:
3352 case AMDGPU::VCC:
3353 case AMDGPU::VCC_LO:
3354 case AMDGPU::VCC_HI:
3355 case AMDGPU::M0:
3356 return Reg;
3357 default:
3358 break;
3359 }
3360 }
3361 return AMDGPU::NoRegister;
3362}
3363
3364// NB: This code is correct only when used to check constant
3365// bus limitations because GFX7 support no f16 inline constants.
3366// Note that there are no cases when a GFX7 opcode violates
3367// constant bus limitations due to the use of an f16 constant.
3368bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3369 unsigned OpIdx) const {
3370 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3371
3372 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3373 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3374 return false;
3375 }
3376
3377 const MCOperand &MO = Inst.getOperand(OpIdx);
3378
3379 int64_t Val = MO.getImm();
3380 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3381
3382 switch (OpSize) { // expected operand size
3383 case 8:
3384 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3385 case 4:
3386 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3387 case 2: {
3388 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3393
3398
3402 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3403
3404 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3405 }
3406 default:
3407 llvm_unreachable("invalid operand size");
3408 }
3409}
3410
3411unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3412 if (!isGFX10Plus())
3413 return 1;
3414
3415 switch (Opcode) {
3416 // 64-bit shift instructions can use only one scalar value input
3417 case AMDGPU::V_LSHLREV_B64_e64:
3418 case AMDGPU::V_LSHLREV_B64_gfx10:
3419 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3420 case AMDGPU::V_LSHRREV_B64_e64:
3421 case AMDGPU::V_LSHRREV_B64_gfx10:
3422 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3423 case AMDGPU::V_ASHRREV_I64_e64:
3424 case AMDGPU::V_ASHRREV_I64_gfx10:
3425 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3426 case AMDGPU::V_LSHL_B64_e64:
3427 case AMDGPU::V_LSHR_B64_e64:
3428 case AMDGPU::V_ASHR_I64_e64:
3429 return 1;
3430 default:
3431 return 2;
3432 }
3433}
3434
3435constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3437
3438// Get regular operand indices in the same order as specified
3439// in the instruction (but append mandatory literals to the end).
3441 bool AddMandatoryLiterals = false) {
3442
3443 int16_t ImmIdx =
3444 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3445
3446 if (isVOPD(Opcode)) {
3447 int16_t ImmDeferredIdx =
3448 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3449 : -1;
3450
3451 return {getNamedOperandIdx(Opcode, OpName::src0X),
3452 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3453 getNamedOperandIdx(Opcode, OpName::src0Y),
3454 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3455 ImmDeferredIdx,
3456 ImmIdx};
3457 }
3458
3459 return {getNamedOperandIdx(Opcode, OpName::src0),
3460 getNamedOperandIdx(Opcode, OpName::src1),
3461 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3462}
3463
3464bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3465 const MCOperand &MO = Inst.getOperand(OpIdx);
3466 if (MO.isImm()) {
3467 return !isInlineConstant(Inst, OpIdx);
3468 } else if (MO.isReg()) {
3469 auto Reg = MO.getReg();
3470 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3471 auto PReg = mc2PseudoReg(Reg);
3472 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3473 } else {
3474 return true;
3475 }
3476}
3477
3478bool AMDGPUAsmParser::validateConstantBusLimitations(
3479 const MCInst &Inst, const OperandVector &Operands) {
3480 const unsigned Opcode = Inst.getOpcode();
3481 const MCInstrDesc &Desc = MII.get(Opcode);
3482 unsigned LastSGPR = AMDGPU::NoRegister;
3483 unsigned ConstantBusUseCount = 0;
3484 unsigned NumLiterals = 0;
3485 unsigned LiteralSize;
3486
3487 if (!(Desc.TSFlags &
3490 !isVOPD(Opcode))
3491 return true;
3492
3493 // Check special imm operands (used by madmk, etc)
3494 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3495 ++NumLiterals;
3496 LiteralSize = 4;
3497 }
3498
3499 SmallDenseSet<unsigned> SGPRsUsed;
3500 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3501 if (SGPRUsed != AMDGPU::NoRegister) {
3502 SGPRsUsed.insert(SGPRUsed);
3503 ++ConstantBusUseCount;
3504 }
3505
3506 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3507
3508 for (int OpIdx : OpIndices) {
3509 if (OpIdx == -1)
3510 continue;
3511
3512 const MCOperand &MO = Inst.getOperand(OpIdx);
3513 if (usesConstantBus(Inst, OpIdx)) {
3514 if (MO.isReg()) {
3515 LastSGPR = mc2PseudoReg(MO.getReg());
3516 // Pairs of registers with a partial intersections like these
3517 // s0, s[0:1]
3518 // flat_scratch_lo, flat_scratch
3519 // flat_scratch_lo, flat_scratch_hi
3520 // are theoretically valid but they are disabled anyway.
3521 // Note that this code mimics SIInstrInfo::verifyInstruction
3522 if (SGPRsUsed.insert(LastSGPR).second) {
3523 ++ConstantBusUseCount;
3524 }
3525 } else { // Expression or a literal
3526
3527 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3528 continue; // special operand like VINTERP attr_chan
3529
3530 // An instruction may use only one literal.
3531 // This has been validated on the previous step.
3532 // See validateVOPLiteral.
3533 // This literal may be used as more than one operand.
3534 // If all these operands are of the same size,
3535 // this literal counts as one scalar value.
3536 // Otherwise it counts as 2 scalar values.
3537 // See "GFX10 Shader Programming", section 3.6.2.3.
3538
3539 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3540 if (Size < 4)
3541 Size = 4;
3542
3543 if (NumLiterals == 0) {
3544 NumLiterals = 1;
3545 LiteralSize = Size;
3546 } else if (LiteralSize != Size) {
3547 NumLiterals = 2;
3548 }
3549 }
3550 }
3551 }
3552 ConstantBusUseCount += NumLiterals;
3553
3554 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3555 return true;
3556
3557 SMLoc LitLoc = getLitLoc(Operands);
3558 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3559 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3560 Error(Loc, "invalid operand (violates constant bus restrictions)");
3561 return false;
3562}
3563
3564bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3565 const MCInst &Inst, const OperandVector &Operands) {
3566
3567 const unsigned Opcode = Inst.getOpcode();
3568 if (!isVOPD(Opcode))
3569 return true;
3570
3571 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3572
3573 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3574 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3575 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3576 ? Opr.getReg()
3578 };
3579
3580 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3581 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3582 if (!InvalidCompOprIdx)
3583 return true;
3584
3585 auto CompOprIdx = *InvalidCompOprIdx;
3586 auto ParsedIdx =
3587 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3588 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3589 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3590
3591 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3592 if (CompOprIdx == VOPD::Component::DST) {
3593 Error(Loc, "one dst register must be even and the other odd");
3594 } else {
3595 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3596 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3597 " operands must use different VGPR banks");
3598 }
3599
3600 return false;
3601}
3602
3603bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3604
3605 const unsigned Opc = Inst.getOpcode();
3606 const MCInstrDesc &Desc = MII.get(Opc);
3607
3608 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3609 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3610 assert(ClampIdx != -1);
3611 return Inst.getOperand(ClampIdx).getImm() == 0;
3612 }
3613
3614 return true;
3615}
3616
3617bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3618 const SMLoc &IDLoc) {
3619
3620 const unsigned Opc = Inst.getOpcode();
3621 const MCInstrDesc &Desc = MII.get(Opc);
3622
3623 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3624 return true;
3625
3626 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3627 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3628 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3629
3630 assert(VDataIdx != -1);
3631
3632 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3633 return true;
3634
3635 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3636 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3637 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3638 if (DMask == 0)
3639 DMask = 1;
3640
3641 bool IsPackedD16 = false;
3642 unsigned DataSize =
3643 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3644 if (hasPackedD16()) {
3645 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3646 IsPackedD16 = D16Idx >= 0;
3647 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3648 DataSize = (DataSize + 1) / 2;
3649 }
3650
3651 if ((VDataSize / 4) == DataSize + TFESize)
3652 return true;
3653
3654 StringRef Modifiers;
3655 if (isGFX90A())
3656 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3657 else
3658 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3659
3660 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3661 return false;
3662}
3663
3664bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3665 const unsigned Opc = Inst.getOpcode();
3666 const MCInstrDesc &Desc = MII.get(Opc);
3667
3668 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3669 return true;
3670
3672
3673 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3675 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3676 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3677 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3678 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3679
3680 assert(VAddr0Idx != -1);
3681 assert(SrsrcIdx != -1);
3682 assert(SrsrcIdx > VAddr0Idx);
3683
3684 if (DimIdx == -1)
3685 return true; // intersect_ray
3686
3687 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3689 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3690 unsigned ActualAddrSize =
3691 IsNSA ? SrsrcIdx - VAddr0Idx
3692 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3693 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3694
3695 unsigned ExpectedAddrSize =
3696 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3697
3698 if (IsNSA) {
3699 if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
3700 int VAddrLastIdx = SrsrcIdx - 1;
3701 unsigned VAddrLastSize =
3702 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3703
3704 return VAddrLastIdx - VAddr0Idx + VAddrLastSize == ExpectedAddrSize;
3705 }
3706 } else {
3707 if (ExpectedAddrSize > 12)
3708 ExpectedAddrSize = 16;
3709
3710 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3711 // This provides backward compatibility for assembly created
3712 // before 160b/192b/224b types were directly supported.
3713 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3714 return true;
3715 }
3716
3717 return ActualAddrSize == ExpectedAddrSize;
3718}
3719
3720bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3721
3722 const unsigned Opc = Inst.getOpcode();
3723 const MCInstrDesc &Desc = MII.get(Opc);
3724
3725 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3726 return true;
3727 if (!Desc.mayLoad() || !Desc.mayStore())
3728 return true; // Not atomic
3729
3730 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3731 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3732
3733 // This is an incomplete check because image_atomic_cmpswap
3734 // may only use 0x3 and 0xf while other atomic operations
3735 // may use 0x1 and 0x3. However these limitations are
3736 // verified when we check that dmask matches dst size.
3737 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3738}
3739
3740bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3741
3742 const unsigned Opc = Inst.getOpcode();
3743 const MCInstrDesc &Desc = MII.get(Opc);
3744
3745 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3746 return true;
3747
3748 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3749 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3750
3751 // GATHER4 instructions use dmask in a different fashion compared to
3752 // other MIMG instructions. The only useful DMASK values are
3753 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3754 // (red,red,red,red) etc.) The ISA document doesn't mention
3755 // this.
3756 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3757}
3758
3759bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3760 const unsigned Opc = Inst.getOpcode();
3761 const MCInstrDesc &Desc = MII.get(Opc);
3762
3763 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3764 return true;
3765
3767 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3769
3770 if (!BaseOpcode->MSAA)
3771 return true;
3772
3773 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3774 assert(DimIdx != -1);
3775
3776 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3778
3779 return DimInfo->MSAA;
3780}
3781
3782static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3783{
3784 switch (Opcode) {
3785 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3786 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3787 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3788 return true;
3789 default:
3790 return false;
3791 }
3792}
3793
3794// movrels* opcodes should only allow VGPRS as src0.
3795// This is specified in .td description for vop1/vop3,
3796// but sdwa is handled differently. See isSDWAOperand.
3797bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3798 const OperandVector &Operands) {
3799
3800 const unsigned Opc = Inst.getOpcode();
3801 const MCInstrDesc &Desc = MII.get(Opc);
3802
3803 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3804 return true;
3805
3806 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3807 assert(Src0Idx != -1);
3808
3809 SMLoc ErrLoc;
3810 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3811 if (Src0.isReg()) {
3812 auto Reg = mc2PseudoReg(Src0.getReg());
3813 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3814 if (!isSGPR(Reg, TRI))
3815 return true;
3816 ErrLoc = getRegLoc(Reg, Operands);
3817 } else {
3818 ErrLoc = getConstLoc(Operands);
3819 }
3820
3821 Error(ErrLoc, "source operand must be a VGPR");
3822 return false;
3823}
3824
3825bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3826 const OperandVector &Operands) {
3827
3828 const unsigned Opc = Inst.getOpcode();
3829
3830 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3831 return true;
3832
3833 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3834 assert(Src0Idx != -1);
3835
3836 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3837 if (!Src0.isReg())
3838 return true;
3839
3840 auto Reg = mc2PseudoReg(Src0.getReg());
3841 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3842 if (!isGFX90A() && isSGPR(Reg, TRI)) {
3843 Error(getRegLoc(Reg, Operands),
3844 "source operand must be either a VGPR or an inline constant");
3845 return false;
3846 }
3847
3848 return true;
3849}
3850
3851bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3852 const OperandVector &Operands) {
3853 unsigned Opcode = Inst.getOpcode();
3854 const MCInstrDesc &Desc = MII.get(Opcode);
3855
3856 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3857 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3858 return true;
3859
3860 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3861 if (Src2Idx == -1)
3862 return true;
3863
3864 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3865 Error(getConstLoc(Operands),
3866 "inline constants are not allowed for this operand");
3867 return false;
3868 }
3869
3870 return true;
3871}
3872
3873bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3874 const OperandVector &Operands) {
3875 const unsigned Opc = Inst.getOpcode();
3876 const MCInstrDesc &Desc = MII.get(Opc);
3877
3878 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3879 return true;
3880
3881 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3882 if (Src2Idx == -1)
3883 return true;
3884
3885 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3886 if (!Src2.isReg())
3887 return true;
3888
3889 MCRegister Src2Reg = Src2.getReg();
3890 MCRegister DstReg = Inst.getOperand(0).getReg();
3891 if (Src2Reg == DstReg)
3892 return true;
3893
3894 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3895 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3896 return true;
3897
3898 if (TRI->regsOverlap(Src2Reg, DstReg)) {
3899 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3900 "source 2 operand must not partially overlap with dst");
3901 return false;
3902 }
3903
3904 return true;
3905}
3906
3907bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3908 switch (Inst.getOpcode()) {
3909 default:
3910 return true;
3911 case V_DIV_SCALE_F32_gfx6_gfx7:
3912 case V_DIV_SCALE_F32_vi:
3913 case V_DIV_SCALE_F32_gfx10:
3914 case V_DIV_SCALE_F64_gfx6_gfx7:
3915 case V_DIV_SCALE_F64_vi:
3916 case V_DIV_SCALE_F64_gfx10:
3917 break;
3918 }
3919
3920 // TODO: Check that src0 = src1 or src2.
3921
3922 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3923 AMDGPU::OpName::src2_modifiers,
3924 AMDGPU::OpName::src2_modifiers}) {
3926 .getImm() &
3928 return false;
3929 }
3930 }
3931
3932 return true;
3933}
3934
3935bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3936
3937 const unsigned Opc = Inst.getOpcode();
3938 const MCInstrDesc &Desc = MII.get(Opc);
3939
3940 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3941 return true;
3942
3943 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3944 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3945 if (isCI() || isSI())
3946 return false;
3947 }
3948
3949 return true;
3950}
3951
3952static bool IsRevOpcode(const unsigned Opcode)
3953{
3954 switch (Opcode) {
3955 case AMDGPU::V_SUBREV_F32_e32:
3956 case AMDGPU::V_SUBREV_F32_e64:
3957 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3958 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3959 case AMDGPU::V_SUBREV_F32_e32_vi:
3960 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3961 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3962 case AMDGPU::V_SUBREV_F32_e64_vi:
3963
3964 case AMDGPU::V_SUBREV_CO_U32_e32:
3965 case AMDGPU::V_SUBREV_CO_U32_e64:
3966 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3967 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3968
3969 case AMDGPU::V_SUBBREV_U32_e32:
3970 case AMDGPU::V_SUBBREV_U32_e64:
3971 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3972 case AMDGPU::V_SUBBREV_U32_e32_vi:
3973 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3974 case AMDGPU::V_SUBBREV_U32_e64_vi:
3975
3976 case AMDGPU::V_SUBREV_U32_e32:
3977 case AMDGPU::V_SUBREV_U32_e64:
3978 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3979 case AMDGPU::V_SUBREV_U32_e32_vi:
3980 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3981 case AMDGPU::V_SUBREV_U32_e64_vi:
3982
3983 case AMDGPU::V_SUBREV_F16_e32:
3984 case AMDGPU::V_SUBREV_F16_e64:
3985 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3986 case AMDGPU::V_SUBREV_F16_e32_vi:
3987 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3988 case AMDGPU::V_SUBREV_F16_e64_vi:
3989
3990 case AMDGPU::V_SUBREV_U16_e32:
3991 case AMDGPU::V_SUBREV_U16_e64:
3992 case AMDGPU::V_SUBREV_U16_e32_vi:
3993 case AMDGPU::V_SUBREV_U16_e64_vi:
3994
3995 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3996 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3997 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3998
3999 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4000 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4001
4002 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4003 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4004
4005 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4006 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4007
4008 case AMDGPU::V_LSHRREV_B32_e32:
4009 case AMDGPU::V_LSHRREV_B32_e64:
4010 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4011 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4012 case AMDGPU::V_LSHRREV_B32_e32_vi:
4013 case AMDGPU::V_LSHRREV_B32_e64_vi:
4014 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4015 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4016
4017 case AMDGPU::V_ASHRREV_I32_e32:
4018 case AMDGPU::V_ASHRREV_I32_e64:
4019 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4020 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4021 case AMDGPU::V_ASHRREV_I32_e32_vi:
4022 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4023 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4024 case AMDGPU::V_ASHRREV_I32_e64_vi:
4025
4026 case AMDGPU::V_LSHLREV_B32_e32:
4027 case AMDGPU::V_LSHLREV_B32_e64:
4028 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4029 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4030 case AMDGPU::V_LSHLREV_B32_e32_vi:
4031 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4032 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4033 case AMDGPU::V_LSHLREV_B32_e64_vi:
4034
4035 case AMDGPU::V_LSHLREV_B16_e32:
4036 case AMDGPU::V_LSHLREV_B16_e64:
4037 case AMDGPU::V_LSHLREV_B16_e32_vi:
4038 case AMDGPU::V_LSHLREV_B16_e64_vi:
4039 case AMDGPU::V_LSHLREV_B16_gfx10:
4040
4041 case AMDGPU::V_LSHRREV_B16_e32:
4042 case AMDGPU::V_LSHRREV_B16_e64:
4043 case AMDGPU::V_LSHRREV_B16_e32_vi:
4044 case AMDGPU::V_LSHRREV_B16_e64_vi:
4045 case AMDGPU::V_LSHRREV_B16_gfx10:
4046
4047 case AMDGPU::V_ASHRREV_I16_e32:
4048 case AMDGPU::V_ASHRREV_I16_e64:
4049 case AMDGPU::V_ASHRREV_I16_e32_vi:
4050 case AMDGPU::V_ASHRREV_I16_e64_vi:
4051 case AMDGPU::V_ASHRREV_I16_gfx10:
4052
4053 case AMDGPU::V_LSHLREV_B64_e64:
4054 case AMDGPU::V_LSHLREV_B64_gfx10:
4055 case AMDGPU::V_LSHLREV_B64_vi:
4056
4057 case AMDGPU::V_LSHRREV_B64_e64:
4058 case AMDGPU::V_LSHRREV_B64_gfx10:
4059 case AMDGPU::V_LSHRREV_B64_vi:
4060
4061 case AMDGPU::V_ASHRREV_I64_e64:
4062 case AMDGPU::V_ASHRREV_I64_gfx10:
4063 case AMDGPU::V_ASHRREV_I64_vi:
4064
4065 case AMDGPU::V_PK_LSHLREV_B16:
4066 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4067 case AMDGPU::V_PK_LSHLREV_B16_vi:
4068
4069 case AMDGPU::V_PK_LSHRREV_B16:
4070 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4071 case AMDGPU::V_PK_LSHRREV_B16_vi:
4072 case AMDGPU::V_PK_ASHRREV_I16:
4073 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4074 case AMDGPU::V_PK_ASHRREV_I16_vi:
4075 return true;
4076 default:
4077 return false;
4078 }
4079}
4080
4081std::optional<StringRef>
4082AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4083
4084 using namespace SIInstrFlags;
4085 const unsigned Opcode = Inst.getOpcode();
4086 const MCInstrDesc &Desc = MII.get(Opcode);
4087
4088 // lds_direct register is defined so that it can be used
4089 // with 9-bit operands only. Ignore encodings which do not accept these.
4090 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4091 if ((Desc.TSFlags & Enc) == 0)
4092 return std::nullopt;
4093
4094 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4095 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4096 if (SrcIdx == -1)
4097 break;
4098 const auto &Src = Inst.getOperand(SrcIdx);
4099 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4100
4101 if (isGFX90A() || isGFX11Plus())
4102 return StringRef("lds_direct is not supported on this GPU");
4103
4104 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4105 return StringRef("lds_direct cannot be used with this instruction");
4106
4107 if (SrcName != OpName::src0)
4108 return StringRef("lds_direct may be used as src0 only");
4109 }
4110 }
4111
4112 return std::nullopt;
4113}
4114
4115SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4116 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4117 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4118 if (Op.isFlatOffset())
4119 return Op.getStartLoc();
4120 }
4121 return getLoc();
4122}
4123
4124bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4125 const OperandVector &Operands) {
4126 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4127 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4128 return true;
4129
4130 auto Opcode = Inst.getOpcode();
4131 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4132 assert(OpNum != -1);
4133
4134 const auto &Op = Inst.getOperand(OpNum);
4135 if (!hasFlatOffsets() && Op.getImm() != 0) {
4136 Error(getFlatOffsetLoc(Operands),
4137 "flat offset modifier is not supported on this GPU");
4138 return false;
4139 }
4140
4141 // For FLAT segment the offset must be positive;
4142 // MSB is ignored and forced to zero.
4143 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4144 bool AllowNegative =
4146 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4147 Error(getFlatOffsetLoc(Operands),
4148 Twine("expected a ") +
4149 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4150 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4151 return false;
4152 }
4153
4154 return true;
4155}
4156
4157SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4158 // Start with second operand because SMEM Offset cannot be dst or src0.
4159 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4160 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4161 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4162 return Op.getStartLoc();
4163 }
4164 return getLoc();
4165}
4166
4167bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4168 const OperandVector &Operands) {
4169 if (isCI() || isSI())
4170 return true;
4171
4172 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4173 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4174 return true;
4175
4176 auto Opcode = Inst.getOpcode();
4177 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4178 if (OpNum == -1)
4179 return true;
4180
4181 const auto &Op = Inst.getOperand(OpNum);
4182 if (!Op.isImm())
4183 return true;
4184
4185 uint64_t Offset = Op.getImm();
4186 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4189 return true;
4190
4191 Error(getSMEMOffsetLoc(Operands),
4192 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4193 "expected a 21-bit signed offset");
4194
4195 return false;
4196}
4197
4198bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4199 unsigned Opcode = Inst.getOpcode();
4200 const MCInstrDesc &Desc = MII.get(Opcode);
4202 return true;
4203
4204 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4205 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4206
4207 const int OpIndices[] = { Src0Idx, Src1Idx };
4208
4209 unsigned NumExprs = 0;
4210 unsigned NumLiterals = 0;
4212
4213 for (int OpIdx : OpIndices) {
4214 if (OpIdx == -1) break;
4215
4216 const MCOperand &MO = Inst.getOperand(OpIdx);
4217 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4218 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4219 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4220 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4221 if (NumLiterals == 0 || LiteralValue != Value) {
4223 ++NumLiterals;
4224 }
4225 } else if (MO.isExpr()) {
4226 ++NumExprs;
4227 }
4228 }
4229 }
4230
4231 return NumLiterals + NumExprs <= 1;
4232}
4233
4234bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4235 const unsigned Opc = Inst.getOpcode();
4236 if (isPermlane16(Opc)) {
4237 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4238 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4239
4240 if (OpSel & ~3)
4241 return false;
4242 }
4243
4244 uint64_t TSFlags = MII.get(Opc).TSFlags;
4245
4246 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4247 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4248 if (OpSelIdx != -1) {
4249 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4250 return false;
4251 }
4252 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4253 if (OpSelHiIdx != -1) {
4254 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4255 return false;
4256 }
4257 }
4258
4259 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4262 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4263 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4264 if (OpSel & 3)
4265 return false;
4266 }
4267
4268 return true;
4269}
4270
4271bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4272 const OperandVector &Operands) {
4273 const unsigned Opc = Inst.getOpcode();
4274 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4275 if (DppCtrlIdx < 0)
4276 return true;
4277 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4278
4279 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4280 // DPP64 is supported for row_newbcast only.
4281 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4282 if (Src0Idx >= 0 &&
4283 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4284 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4285 Error(S, "64 bit dpp only supports row_newbcast");
4286 return false;
4287 }
4288 }
4289
4290 return true;
4291}
4292
4293// Check if VCC register matches wavefront size
4294bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4295 auto FB = getFeatureBits();
4296 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4297 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4298}
4299
4300// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4301bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4302 const OperandVector &Operands) {
4303 unsigned Opcode = Inst.getOpcode();
4304 const MCInstrDesc &Desc = MII.get(Opcode);
4305 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4307 !HasMandatoryLiteral && !isVOPD(Opcode))
4308 return true;
4309
4310 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4311
4312 unsigned NumExprs = 0;
4313 unsigned NumLiterals = 0;
4315
4316 for (int OpIdx : OpIndices) {
4317 if (OpIdx == -1)
4318 continue;
4319
4320 const MCOperand &MO = Inst.getOperand(OpIdx);
4321 if (!MO.isImm() && !MO.isExpr())
4322 continue;
4323 if (!isSISrcOperand(Desc, OpIdx))
4324 continue;
4325
4326 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4327 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4328 if (NumLiterals == 0 || LiteralValue != Value) {
4330 ++NumLiterals;
4331 }
4332 } else if (MO.isExpr()) {
4333 ++NumExprs;
4334 }
4335 }
4336 NumLiterals += NumExprs;
4337
4338 if (!NumLiterals)
4339 return true;
4340
4341 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4342 Error(getLitLoc(Operands), "literal operands are not supported");
4343 return false;
4344 }
4345
4346 if (NumLiterals > 1) {
4347 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4348 return false;
4349 }
4350
4351 return true;
4352}
4353
4354// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4355static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4356 const MCRegisterInfo *MRI) {
4357 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4358 if (OpIdx < 0)
4359 return -1;
4360
4361 const MCOperand &Op = Inst.getOperand(OpIdx);
4362 if (!Op.isReg())
4363 return -1;
4364
4365 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4366 auto Reg = Sub ? Sub : Op.getReg();
4367 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4368 return AGPR32.contains(Reg) ? 1 : 0;
4369}
4370
4371bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4372 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4375 SIInstrFlags::DS)) == 0)
4376 return true;
4377
4378 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4379 : AMDGPU::OpName::vdata;
4380
4381 const MCRegisterInfo *MRI = getMRI();
4382 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4383 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4384
4385 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4386 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4387 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4388 return false;
4389 }
4390
4391 auto FB = getFeatureBits();
4392 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4393 if (DataAreg < 0 || DstAreg < 0)
4394 return true;
4395 return DstAreg == DataAreg;
4396 }
4397
4398 return DstAreg < 1 && DataAreg < 1;
4399}
4400
4401bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4402 auto FB = getFeatureBits();
4403 if (!FB[AMDGPU::FeatureGFX90AInsts])
4404 return true;
4405
4406 const MCRegisterInfo *MRI = getMRI();
4407 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4408 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4409 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4410 const MCOperand &Op = Inst.getOperand(I);
4411 if (!Op.isReg())
4412 continue;
4413
4414 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4415 if (!Sub)
4416 continue;
4417
4418 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4419 return false;
4420 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4421 return false;
4422 }
4423
4424 return true;
4425}
4426
4427SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4428 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4429 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4430 if (Op.isBLGP())
4431 return Op.getStartLoc();
4432 }
4433 return SMLoc();
4434}
4435
4436bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4437 const OperandVector &Operands) {
4438 unsigned Opc = Inst.getOpcode();
4439 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4440 if (BlgpIdx == -1)
4441 return true;
4442 SMLoc BLGPLoc = getBLGPLoc(Operands);
4443 if (!BLGPLoc.isValid())
4444 return true;
4445 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4446 auto FB = getFeatureBits();
4447 bool UsesNeg = false;
4448 if (FB[AMDGPU::FeatureGFX940Insts]) {
4449 switch (Opc) {
4450 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4451 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4452 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4453 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4454 UsesNeg = true;
4455 }
4456 }
4457
4458 if (IsNeg == UsesNeg)
4459 return true;
4460
4461 Error(BLGPLoc,
4462 UsesNeg ? "invalid modifier: blgp is not supported"
4463 : "invalid modifier: neg is not supported");
4464
4465 return false;
4466}
4467
4468bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4469 const OperandVector &Operands) {
4470 if (!isGFX11Plus())
4471 return true;
4472
4473 unsigned Opc = Inst.getOpcode();
4474 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4475 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4476 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4477 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4478 return true;
4479
4480 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4481 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4482 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4483 if (Reg == AMDGPU::SGPR_NULL)
4484 return true;
4485
4486 SMLoc RegLoc = getRegLoc(Reg, Operands);
4487 Error(RegLoc, "src0 must be null");
4488 return false;
4489}
4490
4491// gfx90a has an undocumented limitation:
4492// DS_GWS opcodes must use even aligned registers.
4493bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4494 const OperandVector &Operands) {
4495 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4496 return true;
4497
4498 int Opc = Inst.getOpcode();
4499 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4500 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4501 return true;
4502
4503 const MCRegisterInfo *MRI = getMRI();
4504 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4505 int Data0Pos =
4506 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4507 assert(Data0Pos != -1);
4508 auto Reg = Inst.getOperand(Data0Pos).getReg();
4509 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4510 if (RegIdx & 1) {
4511 SMLoc RegLoc = getRegLoc(Reg, Operands);
4512 Error(RegLoc, "vgpr must be even aligned");
4513 return false;
4514 }
4515
4516 return true;
4517}
4518
4519bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4520 const OperandVector &Operands,
4521 const SMLoc &IDLoc) {
4522 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4523 AMDGPU::OpName::cpol);
4524 if (CPolPos == -1)
4525 return true;
4526
4527 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4528
4529 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4531 if (CPol && (isSI() || isCI())) {
4532 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4533 Error(S, "cache policy is not supported for SMRD instructions");
4534 return false;
4535 }
4536 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4537 Error(IDLoc, "invalid cache policy for SMEM instruction");
4538 return false;
4539 }
4540 }
4541
4542 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4543 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4544 StringRef CStr(S.getPointer());
4545 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4546 Error(S, "scc is not supported on this GPU");
4547 return false;
4548 }
4549
4551 return true;
4552
4554 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4555 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4556 : "instruction must use glc");
4557 return false;
4558 }
4559 } else {
4560 if (CPol & CPol::GLC) {
4561 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4562 StringRef CStr(S.getPointer());
4564 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4565 Error(S, isGFX940() ? "instruction must not use sc0"
4566 : "instruction must not use glc");
4567 return false;
4568 }
4569 }
4570
4571 return true;
4572}
4573
4574bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4575 if (!isGFX11Plus())
4576 return true;
4577 for (auto &Operand : Operands) {
4578 if (!Operand->isReg())
4579 continue;
4580 unsigned Reg = Operand->getReg();
4581 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4582 Error(getRegLoc(Reg, Operands),
4583 "execz and vccz are not supported on this GPU");
4584 return false;
4585 }
4586 }
4587 return true;
4588}
4589
4590bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4591 const OperandVector &Operands) {
4592 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4593 if (Desc.mayStore() &&
4595 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4596 if (Loc != getInstLoc(Operands)) {
4597 Error(Loc, "TFE modifier has no meaning for store instructions");
4598 return false;
4599 }
4600 }
4601
4602 return true;
4603}
4604
4605bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4606 const SMLoc &IDLoc,
4607 const OperandVector &Operands) {
4608 if (auto ErrMsg = validateLdsDirect(Inst)) {
4609 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4610 return false;
4611 }
4612 if (!validateSOPLiteral(Inst)) {
4613 Error(getLitLoc(Operands),
4614 "only one unique literal operand is allowed");
4615 return false;
4616 }
4617 if (!validateVOPLiteral(Inst, Operands)) {
4618 return false;
4619 }
4620 if (!validateConstantBusLimitations(Inst, Operands)) {
4621 return false;
4622 }
4623 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4624 return false;
4625 }
4626 if (!validateIntClampSupported(Inst)) {
4627 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4628 "integer clamping is not supported on this GPU");
4629 return false;
4630 }
4631 if (!validateOpSel(Inst)) {
4632 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4633 "invalid op_sel operand");
4634 return false;
4635 }
4636 if (!validateDPP(Inst, Operands)) {
4637 return false;
4638 }
4639 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4640 if (!validateMIMGD16(Inst)) {
4641 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4642 "d16 modifier is not supported on this GPU");
4643 return false;
4644 }
4645 if (!validateMIMGMSAA(Inst)) {
4646 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4647 "invalid dim; must be MSAA type");
4648 return false;
4649 }
4650 if (!validateMIMGDataSize(Inst, IDLoc)) {
4651 return false;
4652 }
4653 if (!validateMIMGAddrSize(Inst)) {
4654 Error(IDLoc,
4655 "image address size does not match dim and a16");
4656 return false;
4657 }
4658 if (!validateMIMGAtomicDMask(Inst)) {
4659 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4660 "invalid atomic image dmask");
4661 return false;
4662 }
4663 if (!validateMIMGGatherDMask(Inst)) {
4664 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4665 "invalid image_gather dmask: only one bit must be set");
4666 return false;
4667 }
4668 if (!validateMovrels(Inst, Operands)) {
4669 return false;
4670 }
4671 if (!validateFlatOffset(Inst, Operands)) {
4672 return false;
4673 }
4674 if (!validateSMEMOffset(Inst, Operands)) {
4675 return false;
4676 }
4677 if (!validateMAIAccWrite(Inst, Operands)) {
4678 return false;
4679 }
4680 if (!validateMAISrc2(Inst, Operands)) {
4681 return false;
4682 }
4683 if (!validateMFMA(Inst, Operands)) {
4684 return false;
4685 }
4686 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4687 return false;
4688 }
4689
4690 if (!validateAGPRLdSt(Inst)) {
4691 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4692 ? "invalid register class: data and dst should be all VGPR or AGPR"
4693 : "invalid register class: agpr loads and stores not supported on this GPU"
4694 );
4695 return false;
4696 }
4697 if (!validateVGPRAlign(Inst)) {
4698 Error(IDLoc,
4699 "invalid register class: vgpr tuples must be 64 bit aligned");
4700 return false;
4701 }
4702 if (!validateGWS(Inst, Operands)) {
4703 return false;
4704 }
4705
4706 if (!validateBLGP(Inst, Operands)) {
4707 return false;
4708 }
4709
4710 if (!validateDivScale(Inst)) {
4711 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4712 return false;
4713 }
4714 if (!validateWaitCnt(Inst, Operands)) {
4715 return false;
4716 }
4717 if (!validateExeczVcczOperands(Operands)) {
4718 return false;
4719 }
4720 if (!validateTFE(Inst, Operands)) {
4721 return false;
4722 }
4723
4724 return true;
4725}
4726
4728 const FeatureBitset &FBS,
4729 unsigned VariantID = 0);
4730
4731static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4732 const FeatureBitset &AvailableFeatures,
4733 unsigned VariantID);
4734
4735bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4736 const FeatureBitset &FBS) {
4737 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4738}
4739
4740bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4741 const FeatureBitset &FBS,
4742 ArrayRef<unsigned> Variants) {
4743 for (auto Variant : Variants) {
4744 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4745 return true;
4746 }
4747
4748 return false;
4749}
4750
4751bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4752 const SMLoc &IDLoc) {
4753 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4754
4755 // Check if requested instruction variant is supported.
4756 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4757 return false;
4758
4759 // This instruction is not supported.
4760 // Clear any other pending errors because they are no longer relevant.
4761 getParser().clearPendingErrors();
4762
4763 // Requested instruction variant is not supported.
4764 // Check if any other variants are supported.
4765 StringRef VariantName = getMatchedVariantName();
4766 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4767 return Error(IDLoc,
4768 Twine(VariantName,
4769 " variant of this instruction is not supported"));
4770 }
4771
4772 // Check if this instruction may be used with a different wavesize.
4773 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4774 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4775
4776 FeatureBitset FeaturesWS32 = getFeatureBits();
4777 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4778 .flip(AMDGPU::FeatureWavefrontSize32);
4779 FeatureBitset AvailableFeaturesWS32 =
4780 ComputeAvailableFeatures(FeaturesWS32);
4781
4782 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4783 return Error(IDLoc, "instruction requires wavesize=32");
4784 }
4785
4786 // Finally check if this instruction is supported on any other GPU.
4787 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4788 return Error(IDLoc, "instruction not supported on this GPU");
4789 }
4790
4791 // Instruction not supported on any GPU. Probably a typo.
4792 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4793 return Error(IDLoc, "invalid instruction" + Suggestion);
4794}
4795
4797 uint64_t InvalidOprIdx) {
4798 assert(InvalidOprIdx < Operands.size());
4799 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4800 if (Op.isToken() && InvalidOprIdx > 1) {
4801 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4802 return PrevOp.isToken() && PrevOp.getToken() == "::";
4803 }
4804 return false;
4805}
4806
4807bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4809 MCStreamer &Out,
4811 bool MatchingInlineAsm) {
4812 MCInst Inst;
4813 unsigned Result = Match_Success;
4814 for (auto Variant : getMatchedVariants()) {
4815 uint64_t EI;
4816 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4817 Variant);
4818 // We order match statuses from least to most specific. We use most specific
4819 // status as resulting
4820 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4821 if ((R == Match_Success) ||
4822 (R == Match_PreferE32) ||
4823 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4824 (R == Match_InvalidOperand && Result != Match_MissingFeature
4825 && Result != Match_PreferE32) ||
4826 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4827 && Result != Match_MissingFeature
4828 && Result != Match_PreferE32)) {
4829 Result = R;
4830 ErrorInfo = EI;
4831 }
4832 if (R == Match_Success)
4833 break;
4834 }
4835
4836 if (Result == Match_Success) {
4837 if (!validateInstruction(Inst, IDLoc, Operands)) {
4838 return true;
4839 }
4840 Inst.setLoc(IDLoc);
4841 Out.emitInstruction(Inst, getSTI());
4842 return false;
4843 }
4844
4845 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4846 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4847 return true;
4848 }
4849
4850 switch (Result) {
4851 default: break;
4852 case Match_MissingFeature:
4853 // It has been verified that the specified instruction
4854 // mnemonic is valid. A match was found but it requires
4855 // features which are not supported on this GPU.
4856 return Error(IDLoc, "operands are not valid for this GPU or mode");
4857
4858 case Match_InvalidOperand: {
4859 SMLoc ErrorLoc = IDLoc;
4860 if (ErrorInfo != ~0ULL) {
4861 if (ErrorInfo >= Operands.size()) {
4862 return Error(IDLoc, "too few operands for instruction");
4863 }
4864 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4865 if (ErrorLoc == SMLoc())
4866 ErrorLoc = IDLoc;
4867
4869 return Error(ErrorLoc, "invalid VOPDY instruction");
4870 }
4871 return Error(ErrorLoc, "invalid operand for instruction");
4872 }
4873
4874 case Match_PreferE32:
4875 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4876 "should be encoded as e32");
4877 case Match_MnemonicFail:
4878 llvm_unreachable("Invalid instructions should have been handled already");
4879 }
4880 llvm_unreachable("Implement any new match types added!");
4881}
4882
4883bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4884 int64_t Tmp = -1;
4885 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4886 return true;
4887 }
4888 if (getParser().parseAbsoluteExpression(Tmp)) {
4889 return true;
4890 }
4891 Ret = static_cast<uint32_t>(Tmp);
4892 return false;
4893}
4894
4895bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4896 uint32_t &Minor) {
4897 if (ParseAsAbsoluteExpression(Major))
4898 return TokError("invalid major version");
4899
4900 if (!trySkipToken(AsmToken::Comma))
4901 return TokError("minor version number required, comma expected");
4902
4903 if (ParseAsAbsoluteExpression(Minor))
4904 return TokError("invalid minor version");
4905
4906 return false;
4907}
4908
4909bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4910 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4911 return TokError("directive only supported for amdgcn architecture");
4912
4913 std::string TargetIDDirective;
4914 SMLoc TargetStart = getTok().getLoc();
4915 if (getParser().parseEscapedString(TargetIDDirective))
4916 return true;
4917
4918 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4919 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4920 return getParser().Error(TargetRange.Start,
4921 (Twine(".amdgcn_target directive's target id ") +
4922 Twine(TargetIDDirective) +
4923 Twine(" does not match the specified target id ") +
4924 Twine(getTargetStreamer().getTargetID()->toString())).str());
4925
4926 return false;
4927}
4928
4929bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4930 return Error(Range.Start, "value out of range", Range);
4931}
4932
4933bool AMDGPUAsmParser::calculateGPRBlocks(
4934 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4935 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4936 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4937 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4938 // TODO(scott.linder): These calculations are duplicated from
4939 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4940 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4941
4942 unsigned NumVGPRs = NextFreeVGPR;
4943 unsigned NumSGPRs = NextFreeSGPR;
4944
4945 if (Version.Major >= 10)
4946 NumSGPRs = 0;
4947 else {
4948 unsigned MaxAddressableNumSGPRs =
4950
4951 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4952 NumSGPRs > MaxAddressableNumSGPRs)
4953 return OutOfRangeError(SGPRRange);
4954
4955 NumSGPRs +=
4956 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4957
4958 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4959 NumSGPRs > MaxAddressableNumSGPRs)
4960 return OutOfRangeError(SGPRRange);
4961
4962 if (Features.test(FeatureSGPRInitBug))
4964 }
4965
4966 VGPRBlocks =
4967 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4968 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4969
4970 return false;
4971}
4972
4973bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4974 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4975 return TokError("directive only supported for amdgcn architecture");
4976
4977 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4978 return TokError("directive only supported for amdhsa OS");
4979
4980 StringRef KernelName;
4981 if (getParser().parseIdentifier(KernelName))
4982 return true;
4983
4985
4986 StringSet<> Seen;
4987
4988 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4989
4990 SMRange VGPRRange;
4991 uint64_t NextFreeVGPR = 0;
4992 uint64_t AccumOffset = 0;
4993 uint64_t SharedVGPRCount = 0;
4994 SMRange SGPRRange;
4995 uint64_t NextFreeSGPR = 0;
4996
4997 // Count the number of user SGPRs implied from the enabled feature bits.
4998 unsigned ImpliedUserSGPRCount = 0;
4999
5000 // Track if the asm explicitly contains the directive for the user SGPR
5001 // count.
5002 std::optional<unsigned> ExplicitUserSGPRCount;
5003 bool ReserveVCC = true;
5004 bool ReserveFlatScr = true;
5005 std::optional<bool> EnableWavefrontSize32;
5006
5007 while (true) {
5008 while (trySkipToken(AsmToken::EndOfStatement));
5009
5010 StringRef ID;
5011 SMRange IDRange = getTok().getLocRange();
5012 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5013 return true;
5014
5015 if (ID == ".end_amdhsa_kernel")
5016 break;
5017
5018 if (!Seen.insert(ID).second)
5019 return TokError(".amdhsa_ directives cannot be repeated");
5020
5021 SMLoc ValStart = getLoc();
5022 int64_t IVal;
5023 if (getParser().parseAbsoluteExpression(IVal))
5024 return true;
5025 SMLoc ValEnd = getLoc();
5026 SMRange ValRange = SMRange(ValStart, ValEnd);
5027
5028 if (IVal < 0)
5029 return OutOfRangeError(ValRange);
5030
5031 uint64_t Val = IVal;
5032
5033#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5034 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5035 return OutOfRangeError(RANGE); \
5036 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5037
5038 if (ID == ".amdhsa_group_segment_fixed_size") {
5039 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5040 return OutOfRangeError(ValRange);
5041 KD.group_segment_fixed_size = Val;
5042 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5043 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5044 return OutOfRangeError(ValRange);
5046 } else if (ID == ".amdhsa_kernarg_size") {
5047 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5048 return OutOfRangeError(ValRange);
5049 KD.kernarg_size = Val;
5050 } else if (ID == ".amdhsa_user_sgpr_count") {
5051 ExplicitUserSGPRCount = Val;
5052 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5054 return Error(IDRange.Start,
5055 "directive is not supported with architected flat scratch",
5056 IDRange);
5058 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5059 Val, ValRange);
5060 if (Val)
5061 ImpliedUserSGPRCount += 4;
5062 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5064 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5065 ValRange);
5066 if (Val)
5067 ImpliedUserSGPRCount += 2;
5068 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5071 ValRange);
5072 if (Val)
5073 ImpliedUserSGPRCount += 2;
5074 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5077 Val, ValRange);
5078 if (Val)
5079 ImpliedUserSGPRCount += 2;
5080 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5083 ValRange);
5084 if (Val)
5085 ImpliedUserSGPRCount += 2;
5086 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5088 return Error(IDRange.Start,
5089 "directive is not supported with architected flat scratch",
5090 IDRange);
5092 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5093 ValRange);
5094 if (Val)
5095 ImpliedUserSGPRCount += 2;
5096 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5098 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5099 Val, ValRange);
5100 if (Val)
5101 ImpliedUserSGPRCount += 1;
5102 } else if (ID == ".amdhsa_wavefront_size32") {
5103 if (IVersion.Major < 10)
5104 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5105 EnableWavefrontSize32 = Val;
5107 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5108 Val, ValRange);
5109 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5111 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5112 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5114 return Error(IDRange.Start,
5115 "directive is not supported with architected flat scratch",
5116 IDRange);
5118 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5119 } else if (ID == ".amdhsa_enable_private_segment") {
5121 return Error(
5122 IDRange.Start,
5123 "directive is not supported without architected flat scratch",
5124 IDRange);
5126 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5127 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5129 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5130 ValRange);
5131 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5133 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5134 ValRange);
5135 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5137 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5138 ValRange);
5139 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5141 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5142 ValRange);
5143 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5145 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5146 ValRange);
5147 } else if (ID == ".amdhsa_next_free_vgpr") {
5148 VGPRRange = ValRange;
5149 NextFreeVGPR = Val;
5150 } else if (ID == ".amdhsa_next_free_sgpr") {
5151 SGPRRange = ValRange;
5152 NextFreeSGPR = Val;
5153 } else if (ID == ".amdhsa_accum_offset") {
5154 if (!isGFX90A())
5155 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5156 AccumOffset = Val;
5157 } else if (ID == ".amdhsa_reserve_vcc") {
5158 if (!isUInt<1>(Val))
5159 return OutOfRangeError(ValRange);
5160 ReserveVCC = Val;
5161 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5162 if (IVersion.Major < 7)
5163 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5165 return Error(IDRange.Start,
5166 "directive is not supported with architected flat scratch",
5167 IDRange);
5168 if (!isUInt<1>(Val))
5169 return OutOfRangeError(ValRange);
5170 ReserveFlatScr = Val;
5171 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5172 if (IVersion.Major < 8)
5173 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5174 if (!isUInt<1>(Val))
5175 return OutOfRangeError(ValRange);
5176 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5177 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5178 IDRange);
5179 } else if (ID == ".amdhsa_float_round_mode_32") {
5181 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5182 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5184 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5185 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5187 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5188 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5190 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5191 ValRange);
5192 } else if (ID == ".amdhsa_dx10_clamp") {
5194 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5195 } else if (ID == ".amdhsa_ieee_mode") {
5196 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5197 Val, ValRange);
5198 } else if (ID == ".amdhsa_fp16_overflow") {
5199 if (IVersion.Major < 9)
5200 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5201 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5202 ValRange);
5203 } else if (ID == ".amdhsa_tg_split") {
5204 if (!isGFX90A())
5205 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5206 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5207 ValRange);
5208 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5209 if (IVersion.Major < 10)
5210 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5211 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5212 ValRange);
5213 } else if (ID == ".amdhsa_memory_ordered") {
5214 if (IVersion.Major < 10)
5215 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5216 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5217 ValRange);
5218 } else if (ID == ".amdhsa_forward_progress") {
5219 if (IVersion.Major < 10)
5220 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5221 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5222 ValRange);
5223 } else if (ID == ".amdhsa_shared_vgpr_count") {
5224 if (IVersion.Major < 10)
5225 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5226 SharedVGPRCount = Val;
5228 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5229 ValRange);
5230 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5231