LLVM 17.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
12#include "SIDefines.h"
13#include "SIInstrInfo.h"
14#include "SIRegisterInfo.h"
19#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Twine.h"
24#include "llvm/MC/MCAsmInfo.h"
25#include "llvm/MC/MCContext.h"
26#include "llvm/MC/MCExpr.h"
27#include "llvm/MC/MCInst.h"
28#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/MC/MCSymbol.h"
41#include <optional>
42
43using namespace llvm;
44using namespace llvm::AMDGPU;
45using namespace llvm::amdhsa;
46
47namespace {
48
49class AMDGPUAsmParser;
50
51enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52
53//===----------------------------------------------------------------------===//
54// Operand
55//===----------------------------------------------------------------------===//
56
57class AMDGPUOperand : public MCParsedAsmOperand {
58 enum KindTy {
59 Token,
60 Immediate,
63 } Kind;
64
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
67
68public:
69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70 : Kind(Kind_), AsmParser(AsmParser_) {}
71
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
73
74 struct Modifiers {
75 bool Abs = false;
76 bool Neg = false;
77 bool Sext = false;
78
79 bool hasFPModifiers() const { return Abs || Neg; }
80 bool hasIntModifiers() const { return Sext; }
81 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
82
83 int64_t getFPModifiersOperand() const {
84 int64_t Operand = 0;
85 Operand |= Abs ? SISrcMods::ABS : 0u;
86 Operand |= Neg ? SISrcMods::NEG : 0u;
87 return Operand;
88 }
89
90 int64_t getIntModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Sext ? SISrcMods::SEXT : 0u;
93 return Operand;
94 }
95
96 int64_t getModifiersOperand() const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 && "fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 } else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
103 } else {
104 return 0;
105 }
106 }
107
108 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
109 };
110
111 enum ImmTy {
112 ImmTyNone,
113 ImmTyGDS,
114 ImmTyLDS,
115 ImmTyOffen,
116 ImmTyIdxen,
117 ImmTyAddr64,
118 ImmTyOffset,
119 ImmTyInstOffset,
120 ImmTyOffset0,
121 ImmTyOffset1,
122 ImmTyCPol,
123 ImmTySWZ,
124 ImmTyTFE,
125 ImmTyD16,
126 ImmTyClampSI,
127 ImmTyOModSI,
128 ImmTySdwaDstSel,
129 ImmTySdwaSrc0Sel,
130 ImmTySdwaSrc1Sel,
131 ImmTySdwaDstUnused,
132 ImmTyDMask,
133 ImmTyDim,
134 ImmTyUNorm,
135 ImmTyDA,
136 ImmTyR128A16,
137 ImmTyA16,
138 ImmTyLWE,
139 ImmTyExpTgt,
140 ImmTyExpCompr,
141 ImmTyExpVM,
142 ImmTyFORMAT,
143 ImmTyHwreg,
144 ImmTyOff,
145 ImmTySendMsg,
146 ImmTyInterpSlot,
147 ImmTyInterpAttr,
148 ImmTyAttrChan,
149 ImmTyOpSel,
150 ImmTyOpSelHi,
151 ImmTyNegLo,
152 ImmTyNegHi,
153 ImmTyDPP8,
154 ImmTyDppCtrl,
155 ImmTyDppRowMask,
156 ImmTyDppBankMask,
157 ImmTyDppBoundCtrl,
158 ImmTyDppFi,
159 ImmTySwizzle,
160 ImmTyGprIdxMode,
161 ImmTyHigh,
162 ImmTyBLGP,
163 ImmTyCBSZ,
164 ImmTyABID,
165 ImmTyEndpgm,
166 ImmTyWaitVDST,
167 ImmTyWaitEXP,
168 };
169
170 // Immediate operand kind.
171 // It helps to identify the location of an offending operand after an error.
172 // Note that regular literals and mandatory literals (KImm) must be handled
173 // differently. When looking for an offending operand, we should usually
174 // ignore mandatory literals because they are part of the instruction and
175 // cannot be changed. Report location of mandatory operands only for VOPD,
176 // when both OpX and OpY have a KImm and there are no other literals.
177 enum ImmKindTy {
178 ImmKindTyNone,
179 ImmKindTyLiteral,
180 ImmKindTyMandatoryLiteral,
181 ImmKindTyConst,
182 };
183
184private:
185 struct TokOp {
186 const char *Data;
187 unsigned Length;
188 };
189
190 struct ImmOp {
191 int64_t Val;
192 ImmTy Type;
193 bool IsFPImm;
194 mutable ImmKindTy Kind;
195 Modifiers Mods;
196 };
197
198 struct RegOp {
199 unsigned RegNo;
200 Modifiers Mods;
201 };
202
203 union {
204 TokOp Tok;
205 ImmOp Imm;
206 RegOp Reg;
207 const MCExpr *Expr;
208 };
209
210public:
211 bool isToken() const override { return Kind == Token; }
212
213 bool isSymbolRefExpr() const {
214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215 }
216
217 bool isImm() const override {
218 return Kind == Immediate;
219 }
220
221 void setImmKindNone() const {
222 assert(isImm());
223 Imm.Kind = ImmKindTyNone;
224 }
225
226 void setImmKindLiteral() const {
227 assert(isImm());
228 Imm.Kind = ImmKindTyLiteral;
229 }
230
231 void setImmKindMandatoryLiteral() const {
232 assert(isImm());
233 Imm.Kind = ImmKindTyMandatoryLiteral;
234 }
235
236 void setImmKindConst() const {
237 assert(isImm());
238 Imm.Kind = ImmKindTyConst;
239 }
240
241 bool IsImmKindLiteral() const {
242 return isImm() && Imm.Kind == ImmKindTyLiteral;
243 }
244
245 bool IsImmKindMandatoryLiteral() const {
246 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
247 }
248
249 bool isImmKindConst() const {
250 return isImm() && Imm.Kind == ImmKindTyConst;
251 }
252
253 bool isInlinableImm(MVT type) const;
254 bool isLiteralImm(MVT type) const;
255
256 bool isRegKind() const {
257 return Kind == Register;
258 }
259
260 bool isReg() const override {
261 return isRegKind() && !hasModifiers();
262 }
263
264 bool isRegOrInline(unsigned RCID, MVT type) const {
265 return isRegClass(RCID) || isInlinableImm(type);
266 }
267
268 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
269 return isRegOrInline(RCID, type) || isLiteralImm(type);
270 }
271
272 bool isRegOrImmWithInt16InputMods() const {
273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
274 }
275
276 bool isRegOrImmWithInt32InputMods() const {
277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
278 }
279
280 bool isRegOrInlineImmWithInt16InputMods() const {
281 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
282 }
283
284 bool isRegOrInlineImmWithInt32InputMods() const {
285 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
286 }
287
288 bool isRegOrImmWithInt64InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
290 }
291
292 bool isRegOrImmWithFP16InputMods() const {
293 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
294 }
295
296 bool isRegOrImmWithFP32InputMods() const {
297 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
298 }
299
300 bool isRegOrImmWithFP64InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
302 }
303
304 bool isRegOrInlineImmWithFP16InputMods() const {
305 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
306 }
307
308 bool isRegOrInlineImmWithFP32InputMods() const {
309 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
310 }
311
312
313 bool isVReg() const {
314 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
315 isRegClass(AMDGPU::VReg_64RegClassID) ||
316 isRegClass(AMDGPU::VReg_96RegClassID) ||
317 isRegClass(AMDGPU::VReg_128RegClassID) ||
318 isRegClass(AMDGPU::VReg_160RegClassID) ||
319 isRegClass(AMDGPU::VReg_192RegClassID) ||
320 isRegClass(AMDGPU::VReg_256RegClassID) ||
321 isRegClass(AMDGPU::VReg_512RegClassID) ||
322 isRegClass(AMDGPU::VReg_1024RegClassID);
323 }
324
325 bool isVReg32() const {
326 return isRegClass(AMDGPU::VGPR_32RegClassID);
327 }
328
329 bool isVReg32OrOff() const {
330 return isOff() || isVReg32();
331 }
332
333 bool isNull() const {
334 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
335 }
336
337 bool isVRegWithInputMods() const;
338 bool isT16VRegWithInputMods() const;
339
340 bool isSDWAOperand(MVT type) const;
341 bool isSDWAFP16Operand() const;
342 bool isSDWAFP32Operand() const;
343 bool isSDWAInt16Operand() const;
344 bool isSDWAInt32Operand() const;
345
346 bool isImmTy(ImmTy ImmT) const {
347 return isImm() && Imm.Type == ImmT;
348 }
349
350 bool isImmModifier() const {
351 return isImm() && Imm.Type != ImmTyNone;
352 }
353
354 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
355 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
356 bool isDMask() const { return isImmTy(ImmTyDMask); }
357 bool isDim() const { return isImmTy(ImmTyDim); }
358 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
359 bool isDA() const { return isImmTy(ImmTyDA); }
360 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
361 bool isA16() const { return isImmTy(ImmTyA16); }
362 bool isLWE() const { return isImmTy(ImmTyLWE); }
363 bool isOff() const { return isImmTy(ImmTyOff); }
364 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
365 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
366 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
367 bool isOffen() const { return isImmTy(ImmTyOffen); }
368 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
369 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
370 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
371 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
372 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
373
374 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
375 bool isGDS() const { return isImmTy(ImmTyGDS); }
376 bool isLDS() const { return isImmTy(ImmTyLDS); }
377 bool isCPol() const { return isImmTy(ImmTyCPol); }
378 bool isSWZ() const { return isImmTy(ImmTySWZ); }
379 bool isTFE() const { return isImmTy(ImmTyTFE); }
380 bool isD16() const { return isImmTy(ImmTyD16); }
381 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
382 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
383 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
384 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
385 bool isFI() const { return isImmTy(ImmTyDppFi); }
386 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
387 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
388 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
389 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
390 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
391 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
392 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
393 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
394 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
395 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
396 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
397 bool isHigh() const { return isImmTy(ImmTyHigh); }
398
399 bool isRegOrImm() const {
400 return isReg() || isImm();
401 }
402
403 bool isRegClass(unsigned RCID) const;
404
405 bool isInlineValue() const;
406
407 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
408 return isRegOrInline(RCID, type) && !hasModifiers();
409 }
410
411 bool isSCSrcB16() const {
412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
413 }
414
415 bool isSCSrcV2B16() const {
416 return isSCSrcB16();
417 }
418
419 bool isSCSrcB32() const {
420 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
421 }
422
423 bool isSCSrcB64() const {
424 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
425 }
426
427 bool isBoolReg() const;
428
429 bool isSCSrcF16() const {
430 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
431 }
432
433 bool isSCSrcV2F16() const {
434 return isSCSrcF16();
435 }
436
437 bool isSCSrcF32() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
439 }
440
441 bool isSCSrcF64() const {
442 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
443 }
444
445 bool isSSrcB32() const {
446 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
447 }
448
449 bool isSSrcB16() const {
450 return isSCSrcB16() || isLiteralImm(MVT::i16);
451 }
452
453 bool isSSrcV2B16() const {
454 llvm_unreachable("cannot happen");
455 return isSSrcB16();
456 }
457
458 bool isSSrcB64() const {
459 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
460 // See isVSrc64().
461 return isSCSrcB64() || isLiteralImm(MVT::i64);
462 }
463
464 bool isSSrcF32() const {
465 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
466 }
467
468 bool isSSrcF64() const {
469 return isSCSrcB64() || isLiteralImm(MVT::f64);
470 }
471
472 bool isSSrcF16() const {
473 return isSCSrcB16() || isLiteralImm(MVT::f16);
474 }
475
476 bool isSSrcV2F16() const {
477 llvm_unreachable("cannot happen");
478 return isSSrcF16();
479 }
480
481 bool isSSrcV2FP32() const {
482 llvm_unreachable("cannot happen");
483 return isSSrcF32();
484 }
485
486 bool isSCSrcV2FP32() const {
487 llvm_unreachable("cannot happen");
488 return isSCSrcF32();
489 }
490
491 bool isSSrcV2INT32() const {
492 llvm_unreachable("cannot happen");
493 return isSSrcB32();
494 }
495
496 bool isSCSrcV2INT32() const {
497 llvm_unreachable("cannot happen");
498 return isSCSrcB32();
499 }
500
501 bool isSSrcOrLdsB32() const {
502 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
503 isLiteralImm(MVT::i32) || isExpr();
504 }
505
506 bool isVCSrcB32() const {
507 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
508 }
509
510 bool isVCSrcB64() const {
511 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
512 }
513
514 bool isVCSrcTB16_Lo128() const {
515 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
516 }
517
518 bool isVCSrcB16() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
520 }
521
522 bool isVCSrcV2B16() const {
523 return isVCSrcB16();
524 }
525
526 bool isVCSrcF32() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
528 }
529
530 bool isVCSrcF64() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
532 }
533
534 bool isVCSrcTF16_Lo128() const {
535 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
536 }
537
538 bool isVCSrcF16() const {
539 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
540 }
541
542 bool isVCSrcV2F16() const {
543 return isVCSrcF16();
544 }
545
546 bool isVSrcB32() const {
547 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
548 }
549
550 bool isVSrcB64() const {
551 return isVCSrcF64() || isLiteralImm(MVT::i64);
552 }
553
554 bool isVSrcTB16_Lo128() const {
555 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
556 }
557
558 bool isVSrcB16() const {
559 return isVCSrcB16() || isLiteralImm(MVT::i16);
560 }
561
562 bool isVSrcV2B16() const {
563 return isVSrcB16() || isLiteralImm(MVT::v2i16);
564 }
565
566 bool isVCSrcV2FP32() const {
567 return isVCSrcF64();
568 }
569
570 bool isVSrcV2FP32() const {
571 return isVSrcF64() || isLiteralImm(MVT::v2f32);
572 }
573
574 bool isVCSrcV2INT32() const {
575 return isVCSrcB64();
576 }
577
578 bool isVSrcV2INT32() const {
579 return isVSrcB64() || isLiteralImm(MVT::v2i32);
580 }
581
582 bool isVSrcF32() const {
583 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
584 }
585
586 bool isVSrcF64() const {
587 return isVCSrcF64() || isLiteralImm(MVT::f64);
588 }
589
590 bool isVSrcTF16_Lo128() const {
591 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
592 }
593
594 bool isVSrcF16() const {
595 return isVCSrcF16() || isLiteralImm(MVT::f16);
596 }
597
598 bool isVSrcV2F16() const {
599 return isVSrcF16() || isLiteralImm(MVT::v2f16);
600 }
601
602 bool isVISrcB32() const {
603 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
604 }
605
606 bool isVISrcB16() const {
607 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
608 }
609
610 bool isVISrcV2B16() const {
611 return isVISrcB16();
612 }
613
614 bool isVISrcF32() const {
615 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
616 }
617
618 bool isVISrcF16() const {
619 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
620 }
621
622 bool isVISrcV2F16() const {
623 return isVISrcF16() || isVISrcB32();
624 }
625
626 bool isVISrc_64B64() const {
627 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
628 }
629
630 bool isVISrc_64F64() const {
631 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
632 }
633
634 bool isVISrc_64V2FP32() const {
635 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
636 }
637
638 bool isVISrc_64V2INT32() const {
639 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
640 }
641
642 bool isVISrc_256B64() const {
643 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
644 }
645
646 bool isVISrc_256F64() const {
647 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
648 }
649
650 bool isVISrc_128B16() const {
651 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
652 }
653
654 bool isVISrc_128V2B16() const {
655 return isVISrc_128B16();
656 }
657
658 bool isVISrc_128B32() const {
659 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
660 }
661
662 bool isVISrc_128F32() const {
663 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
664 }
665
666 bool isVISrc_256V2FP32() const {
667 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
668 }
669
670 bool isVISrc_256V2INT32() const {
671 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
672 }
673
674 bool isVISrc_512B32() const {
675 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
676 }
677
678 bool isVISrc_512B16() const {
679 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
680 }
681
682 bool isVISrc_512V2B16() const {
683 return isVISrc_512B16();
684 }
685
686 bool isVISrc_512F32() const {
687 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
688 }
689
690 bool isVISrc_512F16() const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
692 }
693
694 bool isVISrc_512V2F16() const {
695 return isVISrc_512F16() || isVISrc_512B32();
696 }
697
698 bool isVISrc_1024B32() const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
700 }
701
702 bool isVISrc_1024B16() const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
704 }
705
706 bool isVISrc_1024V2B16() const {
707 return isVISrc_1024B16();
708 }
709
710 bool isVISrc_1024F32() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
712 }
713
714 bool isVISrc_1024F16() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
716 }
717
718 bool isVISrc_1024V2F16() const {
719 return isVISrc_1024F16() || isVISrc_1024B32();
720 }
721
722 bool isAISrcB32() const {
723 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
724 }
725
726 bool isAISrcB16() const {
727 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
728 }
729
730 bool isAISrcV2B16() const {
731 return isAISrcB16();
732 }
733
734 bool isAISrcF32() const {
735 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
736 }
737
738 bool isAISrcF16() const {
739 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
740 }
741
742 bool isAISrcV2F16() const {
743 return isAISrcF16() || isAISrcB32();
744 }
745
746 bool isAISrc_64B64() const {
747 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
748 }
749
750 bool isAISrc_64F64() const {
751 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
752 }
753
754 bool isAISrc_128B32() const {
755 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
756 }
757
758 bool isAISrc_128B16() const {
759 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
760 }
761
762 bool isAISrc_128V2B16() const {
763 return isAISrc_128B16();
764 }
765
766 bool isAISrc_128F32() const {
767 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
768 }
769
770 bool isAISrc_128F16() const {
771 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
772 }
773
774 bool isAISrc_128V2F16() const {
775 return isAISrc_128F16() || isAISrc_128B32();
776 }
777
778 bool isVISrc_128F16() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
780 }
781
782 bool isVISrc_128V2F16() const {
783 return isVISrc_128F16() || isVISrc_128B32();
784 }
785
786 bool isAISrc_256B64() const {
787 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
788 }
789
790 bool isAISrc_256F64() const {
791 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
792 }
793
794 bool isAISrc_512B32() const {
795 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
796 }
797
798 bool isAISrc_512B16() const {
799 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
800 }
801
802 bool isAISrc_512V2B16() const {
803 return isAISrc_512B16();
804 }
805
806 bool isAISrc_512F32() const {
807 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
808 }
809
810 bool isAISrc_512F16() const {
811 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
812 }
813
814 bool isAISrc_512V2F16() const {
815 return isAISrc_512F16() || isAISrc_512B32();
816 }
817
818 bool isAISrc_1024B32() const {
819 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
820 }
821
822 bool isAISrc_1024B16() const {
823 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
824 }
825
826 bool isAISrc_1024V2B16() const {
827 return isAISrc_1024B16();
828 }
829
830 bool isAISrc_1024F32() const {
831 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
832 }
833
834 bool isAISrc_1024F16() const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
836 }
837
838 bool isAISrc_1024V2F16() const {
839 return isAISrc_1024F16() || isAISrc_1024B32();
840 }
841
842 bool isKImmFP32() const {
843 return isLiteralImm(MVT::f32);
844 }
845
846 bool isKImmFP16() const {
847 return isLiteralImm(MVT::f16);
848 }
849
850 bool isMem() const override {
851 return false;
852 }
853
854 bool isExpr() const {
855 return Kind == Expression;
856 }
857
858 bool isSoppBrTarget() const {
859 return isExpr() || isImm();
860 }
861
862 bool isSWaitCnt() const;
863 bool isDepCtr() const;
864 bool isSDelayAlu() const;
865 bool isHwreg() const;
866 bool isSendMsg() const;
867 bool isSwizzle() const;
868 bool isSMRDOffset8() const;
869 bool isSMEMOffset() const;
870 bool isSMRDLiteralOffset() const;
871 bool isDPP8() const;
872 bool isDPPCtrl() const;
873 bool isBLGP() const;
874 bool isCBSZ() const;
875 bool isABID() const;
876 bool isGPRIdxMode() const;
877 bool isS16Imm() const;
878 bool isU16Imm() const;
879 bool isEndpgm() const;
880 bool isWaitVDST() const;
881 bool isWaitEXP() const;
882
883 StringRef getToken() const {
884 assert(isToken());
885 return StringRef(Tok.Data, Tok.Length);
886 }
887
888 int64_t getImm() const {
889 assert(isImm());
890 return Imm.Val;
891 }
892
893 void setImm(int64_t Val) {
894 assert(isImm());
895 Imm.Val = Val;
896 }
897
898 ImmTy getImmTy() const {
899 assert(isImm());
900 return Imm.Type;
901 }
902
903 unsigned getReg() const override {
904 assert(isRegKind());
905 return Reg.RegNo;
906 }
907
908 SMLoc getStartLoc() const override {
909 return StartLoc;
910 }
911
912 SMLoc getEndLoc() const override {
913 return EndLoc;
914 }
915
916 SMRange getLocRange() const {
917 return SMRange(StartLoc, EndLoc);
918 }
919
920 Modifiers getModifiers() const {
921 assert(isRegKind() || isImmTy(ImmTyNone));
922 return isRegKind() ? Reg.Mods : Imm.Mods;
923 }
924
925 void setModifiers(Modifiers Mods) {
926 assert(isRegKind() || isImmTy(ImmTyNone));
927 if (isRegKind())
928 Reg.Mods = Mods;
929 else
930 Imm.Mods = Mods;
931 }
932
933 bool hasModifiers() const {
934 return getModifiers().hasModifiers();
935 }
936
937 bool hasFPModifiers() const {
938 return getModifiers().hasFPModifiers();
939 }
940
941 bool hasIntModifiers() const {
942 return getModifiers().hasIntModifiers();
943 }
944
945 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
946
947 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
948
949 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
950
951 template <unsigned Bitwidth>
952 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
953
954 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
955 addKImmFPOperands<16>(Inst, N);
956 }
957
958 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
959 addKImmFPOperands<32>(Inst, N);
960 }
961
962 void addRegOperands(MCInst &Inst, unsigned N) const;
963
964 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
965 addRegOperands(Inst, N);
966 }
967
968 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
969 if (isRegKind())
970 addRegOperands(Inst, N);
971 else if (isExpr())
973 else
974 addImmOperands(Inst, N);
975 }
976
977 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
978 Modifiers Mods = getModifiers();
979 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
980 if (isRegKind()) {
981 addRegOperands(Inst, N);
982 } else {
983 addImmOperands(Inst, N, false);
984 }
985 }
986
987 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
988 assert(!hasIntModifiers());
989 addRegOrImmWithInputModsOperands(Inst, N);
990 }
991
992 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
993 assert(!hasFPModifiers());
994 addRegOrImmWithInputModsOperands(Inst, N);
995 }
996
997 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
998 Modifiers Mods = getModifiers();
999 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1000 assert(isRegKind());
1001 addRegOperands(Inst, N);
1002 }
1003
1004 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1005 assert(!hasIntModifiers());
1006 addRegWithInputModsOperands(Inst, N);
1007 }
1008
1009 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1010 assert(!hasFPModifiers());
1011 addRegWithInputModsOperands(Inst, N);
1012 }
1013
1014 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1015 if (isImm())
1016 addImmOperands(Inst, N);
1017 else {
1018 assert(isExpr());
1020 }
1021 }
1022
1023 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1024 switch (Type) {
1025 case ImmTyNone: OS << "None"; break;
1026 case ImmTyGDS: OS << "GDS"; break;
1027 case ImmTyLDS: OS << "LDS"; break;
1028 case ImmTyOffen: OS << "Offen"; break;
1029 case ImmTyIdxen: OS << "Idxen"; break;
1030 case ImmTyAddr64: OS << "Addr64"; break;
1031 case ImmTyOffset: OS << "Offset"; break;
1032 case ImmTyInstOffset: OS << "InstOffset"; break;
1033 case ImmTyOffset0: OS << "Offset0"; break;
1034 case ImmTyOffset1: OS << "Offset1"; break;
1035 case ImmTyCPol: OS << "CPol"; break;
1036 case ImmTySWZ: OS << "SWZ"; break;
1037 case ImmTyTFE: OS << "TFE"; break;
1038 case ImmTyD16: OS << "D16"; break;
1039 case ImmTyFORMAT: OS << "FORMAT"; break;
1040 case ImmTyClampSI: OS << "ClampSI"; break;
1041 case ImmTyOModSI: OS << "OModSI"; break;
1042 case ImmTyDPP8: OS << "DPP8"; break;
1043 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1044 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1045 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1046 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1047 case ImmTyDppFi: OS << "FI"; break;
1048 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1049 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1050 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1051 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1052 case ImmTyDMask: OS << "DMask"; break;
1053 case ImmTyDim: OS << "Dim"; break;
1054 case ImmTyUNorm: OS << "UNorm"; break;
1055 case ImmTyDA: OS << "DA"; break;
1056 case ImmTyR128A16: OS << "R128A16"; break;
1057 case ImmTyA16: OS << "A16"; break;
1058 case ImmTyLWE: OS << "LWE"; break;
1059 case ImmTyOff: OS << "Off"; break;
1060 case ImmTyExpTgt: OS << "ExpTgt"; break;
1061 case ImmTyExpCompr: OS << "ExpCompr"; break;
1062 case ImmTyExpVM: OS << "ExpVM"; break;
1063 case ImmTyHwreg: OS << "Hwreg"; break;
1064 case ImmTySendMsg: OS << "SendMsg"; break;
1065 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1066 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1067 case ImmTyAttrChan: OS << "AttrChan"; break;
1068 case ImmTyOpSel: OS << "OpSel"; break;
1069 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1070 case ImmTyNegLo: OS << "NegLo"; break;
1071 case ImmTyNegHi: OS << "NegHi"; break;
1072 case ImmTySwizzle: OS << "Swizzle"; break;
1073 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1074 case ImmTyHigh: OS << "High"; break;
1075 case ImmTyBLGP: OS << "BLGP"; break;
1076 case ImmTyCBSZ: OS << "CBSZ"; break;
1077 case ImmTyABID: OS << "ABID"; break;
1078 case ImmTyEndpgm: OS << "Endpgm"; break;
1079 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1080 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1081 }
1082 }
1083
1084 void print(raw_ostream &OS) const override {
1085 switch (Kind) {
1086 case Register:
1087 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1088 break;
1089 case Immediate:
1090 OS << '<' << getImm();
1091 if (getImmTy() != ImmTyNone) {
1092 OS << " type: "; printImmTy(OS, getImmTy());
1093 }
1094 OS << " mods: " << Imm.Mods << '>';
1095 break;
1096 case Token:
1097 OS << '\'' << getToken() << '\'';
1098 break;
1099 case Expression:
1100 OS << "<expr " << *Expr << '>';
1101 break;
1102 }
1103 }
1104
1105 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1106 int64_t Val, SMLoc Loc,
1107 ImmTy Type = ImmTyNone,
1108 bool IsFPImm = false) {
1109 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1110 Op->Imm.Val = Val;
1111 Op->Imm.IsFPImm = IsFPImm;
1112 Op->Imm.Kind = ImmKindTyNone;
1113 Op->Imm.Type = Type;
1114 Op->Imm.Mods = Modifiers();
1115 Op->StartLoc = Loc;
1116 Op->EndLoc = Loc;
1117 return Op;
1118 }
1119
1120 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1121 StringRef Str, SMLoc Loc,
1122 bool HasExplicitEncodingSize = true) {
1123 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1124 Res->Tok.Data = Str.data();
1125 Res->Tok.Length = Str.size();
1126 Res->StartLoc = Loc;
1127 Res->EndLoc = Loc;
1128 return Res;
1129 }
1130
1131 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1132 unsigned RegNo, SMLoc S,
1133 SMLoc E) {
1134 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1135 Op->Reg.RegNo = RegNo;
1136 Op->Reg.Mods = Modifiers();
1137 Op->StartLoc = S;
1138 Op->EndLoc = E;
1139 return Op;
1140 }
1141
1142 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1143 const class MCExpr *Expr, SMLoc S) {
1144 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1145 Op->Expr = Expr;
1146 Op->StartLoc = S;
1147 Op->EndLoc = S;
1148 return Op;
1149 }
1150};
1151
1152raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1153 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1154 return OS;
1155}
1156
1157//===----------------------------------------------------------------------===//
1158// AsmParser
1159//===----------------------------------------------------------------------===//
1160
1161// Holds info related to the current kernel, e.g. count of SGPRs used.
1162// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1163// .amdgpu_hsa_kernel or at EOF.
1164class KernelScopeInfo {
1165 int SgprIndexUnusedMin = -1;
1166 int VgprIndexUnusedMin = -1;
1167 int AgprIndexUnusedMin = -1;
1168 MCContext *Ctx = nullptr;
1169 MCSubtargetInfo const *MSTI = nullptr;
1170
1171 void usesSgprAt(int i) {
1172 if (i >= SgprIndexUnusedMin) {
1173 SgprIndexUnusedMin = ++i;
1174 if (Ctx) {
1175 MCSymbol* const Sym =
1176 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1177 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1178 }
1179 }
1180 }
1181
1182 void usesVgprAt(int i) {
1183 if (i >= VgprIndexUnusedMin) {
1184 VgprIndexUnusedMin = ++i;
1185 if (Ctx) {
1186 MCSymbol* const Sym =
1187 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1188 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1189 VgprIndexUnusedMin);
1190 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1191 }
1192 }
1193 }
1194
1195 void usesAgprAt(int i) {
1196 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1197 if (!hasMAIInsts(*MSTI))
1198 return;
1199
1200 if (i >= AgprIndexUnusedMin) {
1201 AgprIndexUnusedMin = ++i;
1202 if (Ctx) {
1203 MCSymbol* const Sym =
1204 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1205 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1206
1207 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1208 MCSymbol* const vSym =
1209 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1210 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1211 VgprIndexUnusedMin);
1212 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1213 }
1214 }
1215 }
1216
1217public:
1218 KernelScopeInfo() = default;
1219
1220 void initialize(MCContext &Context) {
1221 Ctx = &Context;
1222 MSTI = Ctx->getSubtargetInfo();
1223
1224 usesSgprAt(SgprIndexUnusedMin = -1);
1225 usesVgprAt(VgprIndexUnusedMin = -1);
1226 if (hasMAIInsts(*MSTI)) {
1227 usesAgprAt(AgprIndexUnusedMin = -1);
1228 }
1229 }
1230
1231 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1232 unsigned RegWidth) {
1233 switch (RegKind) {
1234 case IS_SGPR:
1235 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1236 break;
1237 case IS_AGPR:
1238 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1239 break;
1240 case IS_VGPR:
1241 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1242 break;
1243 default:
1244 break;
1245 }
1246 }
1247};
1248
1249class AMDGPUAsmParser : public MCTargetAsmParser {
1250 MCAsmParser &Parser;
1251
1252 unsigned ForcedEncodingSize = 0;
1253 bool ForcedDPP = false;
1254 bool ForcedSDWA = false;
1255 KernelScopeInfo KernelScope;
1256
1257 /// @name Auto-generated Match Functions
1258 /// {
1259
1260#define GET_ASSEMBLER_HEADER
1261#include "AMDGPUGenAsmMatcher.inc"
1262
1263 /// }
1264
1265private:
1266 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1267 bool OutOfRangeError(SMRange Range);
1268 /// Calculate VGPR/SGPR blocks required for given target, reserved
1269 /// registers, and user-specified NextFreeXGPR values.
1270 ///
1271 /// \param Features [in] Target features, used for bug corrections.
1272 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1273 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1274 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1275 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1276 /// descriptor field, if valid.
1277 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1278 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1279 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1280 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1281 /// \param VGPRBlocks [out] Result VGPR block count.
1282 /// \param SGPRBlocks [out] Result SGPR block count.
1283 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1284 bool FlatScrUsed, bool XNACKUsed,
1285 std::optional<bool> EnableWavefrontSize32,
1286 unsigned NextFreeVGPR, SMRange VGPRRange,
1287 unsigned NextFreeSGPR, SMRange SGPRRange,
1288 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1289 bool ParseDirectiveAMDGCNTarget();
1290 bool ParseDirectiveAMDHSAKernel();
1291 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1292 bool ParseDirectiveHSACodeObjectVersion();
1293 bool ParseDirectiveHSACodeObjectISA();
1294 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1295 bool ParseDirectiveAMDKernelCodeT();
1296 // TODO: Possibly make subtargetHasRegister const.
1297 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1298 bool ParseDirectiveAMDGPUHsaKernel();
1299
1300 bool ParseDirectiveISAVersion();
1301 bool ParseDirectiveHSAMetadata();
1302 bool ParseDirectivePALMetadataBegin();
1303 bool ParseDirectivePALMetadata();
1304 bool ParseDirectiveAMDGPULDS();
1305
1306 /// Common code to parse out a block of text (typically YAML) between start and
1307 /// end directives.
1308 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1309 const char *AssemblerDirectiveEnd,
1310 std::string &CollectString);
1311
1312 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1313 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1314 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1315 unsigned &RegNum, unsigned &RegWidth,
1316 bool RestoreOnFailure = false);
1317 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1318 unsigned &RegNum, unsigned &RegWidth,
1320 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1321 unsigned &RegWidth,
1323 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1324 unsigned &RegWidth,
1326 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1327 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1328 bool ParseRegRange(unsigned& Num, unsigned& Width);
1329 unsigned getRegularReg(RegisterKind RegKind,
1330 unsigned RegNum,
1331 unsigned RegWidth,
1332 SMLoc Loc);
1333
1334 bool isRegister();
1335 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1336 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1337 void initializeGprCountSymbol(RegisterKind RegKind);
1338 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1339 unsigned RegWidth);
1340 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1341 bool IsAtomic);
1342 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1343 bool IsGdsHardcoded);
1344
1345public:
1346 enum AMDGPUMatchResultTy {
1347 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1348 };
1349 enum OperandMode {
1350 OperandMode_Default,
1351 OperandMode_NSA,
1352 };
1353
1354 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1355
1356 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1357 const MCInstrInfo &MII,
1358 const MCTargetOptions &Options)
1359 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1361
1362 if (getFeatureBits().none()) {
1363 // Set default features.
1364 copySTI().ToggleFeature("southern-islands");
1365 }
1366
1367 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1368
1369 {
1370 // TODO: make those pre-defined variables read-only.
1371 // Currently there is none suitable machinery in the core llvm-mc for this.
1372 // MCSymbol::isRedefinable is intended for another purpose, and
1373 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1375 MCContext &Ctx = getContext();
1376 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1377 MCSymbol *Sym =
1378 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1380 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1382 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1383 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1384 } else {
1385 MCSymbol *Sym =
1386 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1388 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1390 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1391 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1392 }
1393 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1394 initializeGprCountSymbol(IS_VGPR);
1395 initializeGprCountSymbol(IS_SGPR);
1396 } else
1397 KernelScope.initialize(getContext());
1398 }
1399 }
1400
1401 bool hasMIMG_R128() const {
1402 return AMDGPU::hasMIMG_R128(getSTI());
1403 }
1404
1405 bool hasPackedD16() const {
1406 return AMDGPU::hasPackedD16(getSTI());
1407 }
1408
1409 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1410
1411 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1412
1413 bool isSI() const {
1414 return AMDGPU::isSI(getSTI());
1415 }
1416
1417 bool isCI() const {
1418 return AMDGPU::isCI(getSTI());
1419 }
1420
1421 bool isVI() const {
1422 return AMDGPU::isVI(getSTI());
1423 }
1424
1425 bool isGFX9() const {
1426 return AMDGPU::isGFX9(getSTI());
1427 }
1428
1429 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1430 bool isGFX90A() const {
1431 return AMDGPU::isGFX90A(getSTI());
1432 }
1433
1434 bool isGFX940() const {
1435 return AMDGPU::isGFX940(getSTI());
1436 }
1437
1438 bool isGFX9Plus() const {
1439 return AMDGPU::isGFX9Plus(getSTI());
1440 }
1441
1442 bool isGFX10() const {
1443 return AMDGPU::isGFX10(getSTI());
1444 }
1445
1446 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1447
1448 bool isGFX11() const {
1449 return AMDGPU::isGFX11(getSTI());
1450 }
1451
1452 bool isGFX11Plus() const {
1453 return AMDGPU::isGFX11Plus(getSTI());
1454 }
1455
1456 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1457
1458 bool isGFX10_BEncoding() const {
1460 }
1461
1462 bool hasInv2PiInlineImm() const {
1463 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1464 }
1465
1466 bool hasFlatOffsets() const {
1467 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1468 }
1469
1470 bool hasArchitectedFlatScratch() const {
1471 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1472 }
1473
1474 bool hasSGPR102_SGPR103() const {
1475 return !isVI() && !isGFX9();
1476 }
1477
1478 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1479
1480 bool hasIntClamp() const {
1481 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1482 }
1483
1484 AMDGPUTargetStreamer &getTargetStreamer() {
1486 return static_cast<AMDGPUTargetStreamer &>(TS);
1487 }
1488
1489 const MCRegisterInfo *getMRI() const {
1490 // We need this const_cast because for some reason getContext() is not const
1491 // in MCAsmParser.
1492 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1493 }
1494
1495 const MCInstrInfo *getMII() const {
1496 return &MII;
1497 }
1498
1499 const FeatureBitset &getFeatureBits() const {
1500 return getSTI().getFeatureBits();
1501 }
1502
1503 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1504 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1505 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1506
1507 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1508 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1509 bool isForcedDPP() const { return ForcedDPP; }
1510 bool isForcedSDWA() const { return ForcedSDWA; }
1511 ArrayRef<unsigned> getMatchedVariants() const;
1512 StringRef getMatchedVariantName() const;
1513
1514 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1515 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1516 bool RestoreOnFailure);
1517 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1518 SMLoc &EndLoc) override;
1520 SMLoc &EndLoc) override;
1521 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1523 unsigned Kind) override;
1524 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1527 bool MatchingInlineAsm) override;
1528 bool ParseDirective(AsmToken DirectiveID) override;
1529 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1530 OperandMode Mode = OperandMode_Default);
1531 StringRef parseMnemonicSuffix(StringRef Name);
1533 SMLoc NameLoc, OperandVector &Operands) override;
1534 //bool ProcessInstruction(MCInst &Inst);
1535
1537
1538 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1539
1541 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1542 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1543 bool (*ConvertResult)(int64_t &) = nullptr);
1544
1546 parseOperandArrayWithPrefix(const char *Prefix,
1548 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1549 bool (*ConvertResult)(int64_t&) = nullptr);
1550
1552 parseNamedBit(StringRef Name, OperandVector &Operands,
1553 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1554 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1556 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1558 SMLoc &StringLoc);
1559
1560 bool isModifier();
1561 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1562 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1563 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1564 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1565 bool parseSP3NegModifier();
1566 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1568 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1569 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1570 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1571 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1572 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1573 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1574 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1575 OperandMatchResultTy parseUfmt(int64_t &Format);
1576 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1577 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1579 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1580 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1583 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1584 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1585
1586 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1587 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1588 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1589 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1590
1591 bool parseCnt(int64_t &IntVal);
1592 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1593
1594 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1595 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1597
1598 bool parseDelay(int64_t &Delay);
1599 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1600
1602
1603private:
1604 struct OperandInfoTy {
1605 SMLoc Loc;
1606 int64_t Id;
1607 bool IsSymbolic = false;
1608 bool IsDefined = false;
1609
1610 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1611 };
1612
1613 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1614 bool validateSendMsg(const OperandInfoTy &Msg,
1615 const OperandInfoTy &Op,
1616 const OperandInfoTy &Stream);
1617
1618 bool parseHwregBody(OperandInfoTy &HwReg,
1619 OperandInfoTy &Offset,
1620 OperandInfoTy &Width);
1621 bool validateHwreg(const OperandInfoTy &HwReg,
1622 const OperandInfoTy &Offset,
1623 const OperandInfoTy &Width);
1624
1625 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1626 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1627 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1628
1629 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1630 const OperandVector &Operands) const;
1631 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1632 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1633 SMLoc getLitLoc(const OperandVector &Operands,
1634 bool SearchMandatoryLiterals = false) const;
1635 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1636 SMLoc getConstLoc(const OperandVector &Operands) const;
1637 SMLoc getInstLoc(const OperandVector &Operands) const;
1638
1639 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1640 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1641 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1642 bool validateSOPLiteral(const MCInst &Inst) const;
1643 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1644 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1645 const OperandVector &Operands);
1646 bool validateIntClampSupported(const MCInst &Inst);
1647 bool validateMIMGAtomicDMask(const MCInst &Inst);
1648 bool validateMIMGGatherDMask(const MCInst &Inst);
1649 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1650 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1651 bool validateMIMGAddrSize(const MCInst &Inst);
1652 bool validateMIMGD16(const MCInst &Inst);
1653 bool validateMIMGMSAA(const MCInst &Inst);
1654 bool validateOpSel(const MCInst &Inst);
1655 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1656 bool validateVccOperand(unsigned Reg) const;
1657 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1658 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1659 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1660 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1661 bool validateAGPRLdSt(const MCInst &Inst) const;
1662 bool validateVGPRAlign(const MCInst &Inst) const;
1663 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1664 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1665 bool validateDivScale(const MCInst &Inst);
1666 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1667 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1668 const SMLoc &IDLoc);
1669 bool validateExeczVcczOperands(const OperandVector &Operands);
1670 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1671 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1672 unsigned getConstantBusLimit(unsigned Opcode) const;
1673 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1674 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1675 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1676
1677 bool isSupportedMnemo(StringRef Mnemo,
1678 const FeatureBitset &FBS);
1679 bool isSupportedMnemo(StringRef Mnemo,
1680 const FeatureBitset &FBS,
1681 ArrayRef<unsigned> Variants);
1682 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1683
1684 bool isId(const StringRef Id) const;
1685 bool isId(const AsmToken &Token, const StringRef Id) const;
1686 bool isToken(const AsmToken::TokenKind Kind) const;
1687 StringRef getId() const;
1688 bool trySkipId(const StringRef Id);
1689 bool trySkipId(const StringRef Pref, const StringRef Id);
1690 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1691 bool trySkipToken(const AsmToken::TokenKind Kind);
1692 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1693 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1694 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1695
1696 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1697 AsmToken::TokenKind getTokenKind() const;
1698 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1699 bool parseExpr(OperandVector &Operands);
1700 StringRef getTokenStr() const;
1701 AsmToken peekToken(bool ShouldSkipSpace = true);
1702 AsmToken getToken() const;
1703 SMLoc getLoc() const;
1704 void lex();
1705
1706public:
1707 void onBeginOfFile() override;
1708
1709 OperandMatchResultTy parseCustomOperand(OperandVector &Operands,
1710 unsigned MCK);
1711
1716 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1718
1719 bool parseSwizzleOperand(int64_t &Op,
1720 const unsigned MinVal,
1721 const unsigned MaxVal,
1722 const StringRef ErrMsg,
1723 SMLoc &Loc);
1724 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1725 const unsigned MinVal,
1726 const unsigned MaxVal,
1727 const StringRef ErrMsg);
1729 bool parseSwizzleOffset(int64_t &Imm);
1730 bool parseSwizzleMacro(int64_t &Imm);
1731 bool parseSwizzleQuadPerm(int64_t &Imm);
1732 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1733 bool parseSwizzleBroadcast(int64_t &Imm);
1734 bool parseSwizzleSwap(int64_t &Imm);
1735 bool parseSwizzleReverse(int64_t &Imm);
1736
1738 int64_t parseGPRIdxMacro();
1739
1740 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1741 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1742 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1743
1744 AMDGPUOperand::Ptr defaultCPol() const;
1745
1746 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1747 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1748 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1749 AMDGPUOperand::Ptr defaultFlatOffset() const;
1750
1751 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1752
1753 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1754 OptionalImmIndexMap &OptionalIdx);
1755 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1756 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1757 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1758 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1759 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1760 OptionalImmIndexMap &OptionalIdx);
1761 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1762 OptionalImmIndexMap &OptionalIdx);
1763
1764 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1765 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1766
1767 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1768 bool IsAtomic = false);
1769 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1770 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1771
1772 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1773
1774 bool parseDimId(unsigned &Encoding);
1778 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1779 int64_t parseDPPCtrlSel(StringRef Ctrl);
1780 int64_t parseDPPCtrlPerm();
1781 AMDGPUOperand::Ptr defaultRowMask() const;
1782 AMDGPUOperand::Ptr defaultBankMask() const;
1783 AMDGPUOperand::Ptr defaultDppBoundCtrl() const;
1784 AMDGPUOperand::Ptr defaultFI() const;
1785 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1786 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1787 cvtDPP(Inst, Operands, true);
1788 }
1789 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1790 bool IsDPP8 = false);
1791 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1792 cvtVOP3DPP(Inst, Operands, true);
1793 }
1794
1796 AMDGPUOperand::ImmTy Type);
1797 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1798 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1799 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1800 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1801 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1802 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1803 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1804 uint64_t BasicInstType,
1805 bool SkipDstVcc = false,
1806 bool SkipSrcVcc = false);
1807
1808 AMDGPUOperand::Ptr defaultBLGP() const;
1809 AMDGPUOperand::Ptr defaultCBSZ() const;
1810 AMDGPUOperand::Ptr defaultABID() const;
1811
1813 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1814
1815 AMDGPUOperand::Ptr defaultWaitVDST() const;
1816 AMDGPUOperand::Ptr defaultWaitEXP() const;
1818};
1819
1820} // end anonymous namespace
1821
1822// May be called with integer type with equivalent bitwidth.
1823static const fltSemantics *getFltSemantics(unsigned Size) {
1824 switch (Size) {
1825 case 4:
1826 return &APFloat::IEEEsingle();
1827 case 8:
1828 return &APFloat::IEEEdouble();
1829 case 2:
1830 return &APFloat::IEEEhalf();
1831 default:
1832 llvm_unreachable("unsupported fp type");
1833 }
1834}
1835
1837 return getFltSemantics(VT.getSizeInBits() / 8);
1838}
1839
1841 switch (OperandType) {
1842 case AMDGPU::OPERAND_REG_IMM_INT32:
1843 case AMDGPU::OPERAND_REG_IMM_FP32:
1844 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1845 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1846 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1847 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1848 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1849 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1850 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1851 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1852 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1853 case AMDGPU::OPERAND_KIMM32:
1854 return &APFloat::IEEEsingle();
1855 case AMDGPU::OPERAND_REG_IMM_INT64:
1856 case AMDGPU::OPERAND_REG_IMM_FP64:
1857 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1858 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1859 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1860 return &APFloat::IEEEdouble();
1861 case AMDGPU::OPERAND_REG_IMM_INT16:
1862 case AMDGPU::OPERAND_REG_IMM_FP16:
1863 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1864 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1865 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1866 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1867 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1868 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1869 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1870 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1871 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1872 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1873 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1874 case AMDGPU::OPERAND_KIMM16:
1875 return &APFloat::IEEEhalf();
1876 default:
1877 llvm_unreachable("unsupported fp type");
1878 }
1879}
1880
1881//===----------------------------------------------------------------------===//
1882// Operand
1883//===----------------------------------------------------------------------===//
1884
1885static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1886 bool Lost;
1887
1888 // Convert literal to single precision
1890 APFloat::rmNearestTiesToEven,
1891 &Lost);
1892 // We allow precision lost but not overflow or underflow
1893 if (Status != APFloat::opOK &&
1894 Lost &&
1895 ((Status & APFloat::opOverflow) != 0 ||
1896 (Status & APFloat::opUnderflow) != 0)) {
1897 return false;
1898 }
1899
1900 return true;
1901}
1902
1903static bool isSafeTruncation(int64_t Val, unsigned Size) {
1904 return isUIntN(Size, Val) || isIntN(Size, Val);
1905}
1906
1907static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1908 if (VT.getScalarType() == MVT::i16) {
1909 // FP immediate values are broken.
1910 return isInlinableIntLiteral(Val);
1911 }
1912
1913 // f16/v2f16 operands work correctly for all values.
1914 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1915}
1916
1917bool AMDGPUOperand::isInlinableImm(MVT type) const {
1918
1919 // This is a hack to enable named inline values like
1920 // shared_base with both 32-bit and 64-bit operands.
1921 // Note that these values are defined as
1922 // 32-bit operands only.
1923 if (isInlineValue()) {
1924 return true;
1925 }
1926
1927 if (!isImmTy(ImmTyNone)) {
1928 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1929 return false;
1930 }
1931 // TODO: We should avoid using host float here. It would be better to
1932 // check the float bit values which is what a few other places do.
1933 // We've had bot failures before due to weird NaN support on mips hosts.
1934
1935 APInt Literal(64, Imm.Val);
1936
1937 if (Imm.IsFPImm) { // We got fp literal token
1938 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1939 return AMDGPU::isInlinableLiteral64(Imm.Val,
1940 AsmParser->hasInv2PiInlineImm());
1941 }
1942
1943 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1944 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1945 return false;
1946
1947 if (type.getScalarSizeInBits() == 16) {
1949 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1950 type, AsmParser->hasInv2PiInlineImm());
1951 }
1952
1953 // Check if single precision literal is inlinable
1954 return AMDGPU::isInlinableLiteral32(
1955 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1956 AsmParser->hasInv2PiInlineImm());
1957 }
1958
1959 // We got int literal token.
1960 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1961 return AMDGPU::isInlinableLiteral64(Imm.Val,
1962 AsmParser->hasInv2PiInlineImm());
1963 }
1964
1965 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1966 return false;
1967 }
1968
1969 if (type.getScalarSizeInBits() == 16) {
1971 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1972 type, AsmParser->hasInv2PiInlineImm());
1973 }
1974
1975 return AMDGPU::isInlinableLiteral32(
1976 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1977 AsmParser->hasInv2PiInlineImm());
1978}
1979
1980bool AMDGPUOperand::isLiteralImm(MVT type) const {
1981 // Check that this immediate can be added as literal
1982 if (!isImmTy(ImmTyNone)) {
1983 return false;
1984 }
1985
1986 if (!Imm.IsFPImm) {
1987 // We got int literal token.
1988
1989 if (type == MVT::f64 && hasFPModifiers()) {
1990 // Cannot apply fp modifiers to int literals preserving the same semantics
1991 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1992 // disable these cases.
1993 return false;
1994 }
1995
1996 unsigned Size = type.getSizeInBits();
1997 if (Size == 64)
1998 Size = 32;
1999
2000 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2001 // types.
2002 return isSafeTruncation(Imm.Val, Size);
2003 }
2004
2005 // We got fp literal token
2006 if (type == MVT::f64) { // Expected 64-bit fp operand
2007 // We would set low 64-bits of literal to zeroes but we accept this literals
2008 return true;
2009 }
2010
2011 if (type == MVT::i64) { // Expected 64-bit int operand
2012 // We don't allow fp literals in 64-bit integer instructions. It is
2013 // unclear how we should encode them.
2014 return false;
2015 }
2016
2017 // We allow fp literals with f16x2 operands assuming that the specified
2018 // literal goes into the lower half and the upper half is zero. We also
2019 // require that the literal may be losslessly converted to f16.
2020 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2021 (type == MVT::v2i16)? MVT::i16 :
2022 (type == MVT::v2f32)? MVT::f32 : type;
2023
2024 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2025 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2026}
2027
2028bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2029 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2030}
2031
2032bool AMDGPUOperand::isVRegWithInputMods() const {
2033 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2034 // GFX90A allows DPP on 64-bit operands.
2035 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2036 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2037}
2038
2039bool AMDGPUOperand::isT16VRegWithInputMods() const {
2040 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2041}
2042
2043bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2044 if (AsmParser->isVI())
2045 return isVReg32();
2046 else if (AsmParser->isGFX9Plus())
2047 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2048 else
2049 return false;
2050}
2051
2052bool AMDGPUOperand::isSDWAFP16Operand() const {
2053 return isSDWAOperand(MVT::f16);
2054}
2055
2056bool AMDGPUOperand::isSDWAFP32Operand() const {
2057 return isSDWAOperand(MVT::f32);
2058}
2059
2060bool AMDGPUOperand::isSDWAInt16Operand() const {
2061 return isSDWAOperand(MVT::i16);
2062}
2063
2064bool AMDGPUOperand::isSDWAInt32Operand() const {
2065 return isSDWAOperand(MVT::i32);
2066}
2067
2068bool AMDGPUOperand::isBoolReg() const {
2069 auto FB = AsmParser->getFeatureBits();
2070 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2071 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2072}
2073
2074uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2075{
2076 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2077 assert(Size == 2 || Size == 4 || Size == 8);
2078
2079 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2080
2081 if (Imm.Mods.Abs) {
2082 Val &= ~FpSignMask;
2083 }
2084 if (Imm.Mods.Neg) {
2085 Val ^= FpSignMask;
2086 }
2087
2088 return Val;
2089}
2090
2091void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2092 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2093 Inst.getNumOperands())) {
2094 addLiteralImmOperand(Inst, Imm.Val,
2095 ApplyModifiers &
2096 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2097 } else {
2098 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2100 setImmKindNone();
2101 }
2102}
2103
2104void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2105 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2106 auto OpNum = Inst.getNumOperands();
2107 // Check that this operand accepts literals
2108 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2109
2110 if (ApplyModifiers) {
2111 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2112 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2113 Val = applyInputFPModifiers(Val, Size);
2114 }
2115
2116 APInt Literal(64, Val);
2117 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2118
2119 if (Imm.IsFPImm) { // We got fp literal token
2120 switch (OpTy) {
2121 case AMDGPU::OPERAND_REG_IMM_INT64:
2122 case AMDGPU::OPERAND_REG_IMM_FP64:
2123 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2124 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2125 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2126 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2127 AsmParser->hasInv2PiInlineImm())) {
2128 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2129 setImmKindConst();
2130 return;
2131 }
2132
2133 // Non-inlineable
2134 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2135 // For fp operands we check if low 32 bits are zeros
2136 if (Literal.getLoBits(32) != 0) {
2137 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2138 "Can't encode literal as exact 64-bit floating-point operand. "
2139 "Low 32-bits will be set to zero");
2140 }
2141
2142 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2143 setImmKindLiteral();
2144 return;
2145 }
2146
2147 // We don't allow fp literals in 64-bit integer instructions. It is
2148 // unclear how we should encode them. This case should be checked earlier
2149 // in predicate methods (isLiteralImm())
2150 llvm_unreachable("fp literal in 64-bit integer instruction.");
2151
2152 case AMDGPU::OPERAND_REG_IMM_INT32:
2153 case AMDGPU::OPERAND_REG_IMM_FP32:
2154 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2155 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2156 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2157 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2158 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2159 case AMDGPU::OPERAND_REG_IMM_INT16:
2160 case AMDGPU::OPERAND_REG_IMM_FP16:
2161 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2162 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2163 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2164 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2165 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2166 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2167 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2168 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2169 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2170 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2171 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2172 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2173 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2174 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2175 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2176 case AMDGPU::OPERAND_KIMM32:
2177 case AMDGPU::OPERAND_KIMM16: {
2178 bool lost;
2179 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2180 // Convert literal to single precision
2181 FPLiteral.convert(*getOpFltSemantics(OpTy),
2182 APFloat::rmNearestTiesToEven, &lost);
2183 // We allow precision lost but not overflow or underflow. This should be
2184 // checked earlier in isLiteralImm()
2185
2186 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2187 Inst.addOperand(MCOperand::createImm(ImmVal));
2188 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2189 setImmKindMandatoryLiteral();
2190 } else {
2191 setImmKindLiteral();
2192 }
2193 return;
2194 }
2195 default:
2196 llvm_unreachable("invalid operand size");
2197 }
2198
2199 return;
2200 }
2201
2202 // We got int literal token.
2203 // Only sign extend inline immediates.
2204 switch (OpTy) {
2205 case AMDGPU::OPERAND_REG_IMM_INT32:
2206 case AMDGPU::OPERAND_REG_IMM_FP32:
2207 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2208 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2209 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2210 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2211 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2212 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2213 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2214 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2215 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2216 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2217 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2218 if (isSafeTruncation(Val, 32) &&
2219 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2220 AsmParser->hasInv2PiInlineImm())) {
2222 setImmKindConst();
2223 return;
2224 }
2225
2226 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2227 setImmKindLiteral();
2228 return;
2229
2230 case AMDGPU::OPERAND_REG_IMM_INT64:
2231 case AMDGPU::OPERAND_REG_IMM_FP64:
2232 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2233 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2234 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2235 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2237 setImmKindConst();
2238 return;
2239 }
2240
2242 setImmKindLiteral();
2243 return;
2244
2245 case AMDGPU::OPERAND_REG_IMM_INT16:
2246 case AMDGPU::OPERAND_REG_IMM_FP16:
2247 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2248 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2249 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2250 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2251 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2252 if (isSafeTruncation(Val, 16) &&
2253 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2254 AsmParser->hasInv2PiInlineImm())) {
2256 setImmKindConst();
2257 return;
2258 }
2259
2260 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2261 setImmKindLiteral();
2262 return;
2263
2264 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2265 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2266 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2267 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2268 assert(isSafeTruncation(Val, 16));
2269 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2270 AsmParser->hasInv2PiInlineImm()));
2271
2273 return;
2274 }
2275 case AMDGPU::OPERAND_KIMM32:
2276 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2277 setImmKindMandatoryLiteral();
2278 return;
2279 case AMDGPU::OPERAND_KIMM16:
2280 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2281 setImmKindMandatoryLiteral();
2282 return;
2283 default:
2284 llvm_unreachable("invalid operand size");
2285 }
2286}
2287
2288template <unsigned Bitwidth>
2289void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2290 APInt Literal(64, Imm.Val);
2291 setImmKindMandatoryLiteral();
2292
2293 if (!Imm.IsFPImm) {
2294 // We got int literal token.
2295 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2296 return;
2297 }
2298
2299 bool Lost;
2300 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2301 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2302 APFloat::rmNearestTiesToEven, &Lost);
2303 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2304}
2305
2306void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2307 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2308}
2309
2310bool AMDGPUOperand::isInlineValue() const {
2311 return isRegKind() && ::isInlineValue(getReg());
2312}
2313
2314//===----------------------------------------------------------------------===//
2315// AsmParser
2316//===----------------------------------------------------------------------===//
2317
2318static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2319 if (Is == IS_VGPR) {
2320 switch (RegWidth) {
2321 default: return -1;
2322 case 32:
2323 return AMDGPU::VGPR_32RegClassID;
2324 case 64:
2325 return AMDGPU::VReg_64RegClassID;
2326 case 96:
2327 return AMDGPU::VReg_96RegClassID;
2328 case 128:
2329 return AMDGPU::VReg_128RegClassID;
2330 case 160:
2331 return AMDGPU::VReg_160RegClassID;
2332 case 192:
2333 return AMDGPU::VReg_192RegClassID;
2334 case 224:
2335 return AMDGPU::VReg_224RegClassID;
2336 case 256:
2337 return AMDGPU::VReg_256RegClassID;
2338 case 288:
2339 return AMDGPU::VReg_288RegClassID;
2340 case 320:
2341 return AMDGPU::VReg_320RegClassID;
2342 case 352:
2343 return AMDGPU::VReg_352RegClassID;
2344 case 384:
2345 return AMDGPU::VReg_384RegClassID;
2346 case 512:
2347 return AMDGPU::VReg_512RegClassID;
2348 case 1024:
2349 return AMDGPU::VReg_1024RegClassID;
2350 }
2351 } else if (Is == IS_TTMP) {
2352 switch (RegWidth) {
2353 default: return -1;
2354 case 32:
2355 return AMDGPU::TTMP_32RegClassID;
2356 case 64:
2357 return AMDGPU::TTMP_64RegClassID;
2358 case 128:
2359 return AMDGPU::TTMP_128RegClassID;
2360 case 256:
2361 return AMDGPU::TTMP_256RegClassID;
2362 case 512:
2363 return AMDGPU::TTMP_512RegClassID;
2364 }
2365 } else if (Is == IS_SGPR) {
2366 switch (RegWidth) {
2367 default: return -1;
2368 case 32:
2369 return AMDGPU::SGPR_32RegClassID;
2370 case 64:
2371 return AMDGPU::SGPR_64RegClassID;
2372 case 96:
2373 return AMDGPU::SGPR_96RegClassID;
2374 case 128:
2375 return AMDGPU::SGPR_128RegClassID;
2376 case 160:
2377 return AMDGPU::SGPR_160RegClassID;
2378 case 192:
2379 return AMDGPU::SGPR_192RegClassID;
2380 case 224:
2381 return AMDGPU::SGPR_224RegClassID;
2382 case 256:
2383 return AMDGPU::SGPR_256RegClassID;
2384 case 288:
2385 return AMDGPU::SGPR_288RegClassID;
2386 case 320:
2387 return AMDGPU::SGPR_320RegClassID;
2388 case 352:
2389 return AMDGPU::SGPR_352RegClassID;
2390 case 384:
2391 return AMDGPU::SGPR_384RegClassID;
2392 case 512:
2393 return AMDGPU::SGPR_512RegClassID;
2394 }
2395 } else if (Is == IS_AGPR) {
2396 switch (RegWidth) {
2397 default: return -1;
2398 case 32:
2399 return AMDGPU::AGPR_32RegClassID;
2400 case 64:
2401 return AMDGPU::AReg_64RegClassID;
2402 case 96:
2403 return AMDGPU::AReg_96RegClassID;
2404 case 128:
2405 return AMDGPU::AReg_128RegClassID;
2406 case 160:
2407 return AMDGPU::AReg_160RegClassID;
2408 case 192:
2409 return AMDGPU::AReg_192RegClassID;
2410 case 224:
2411 return AMDGPU::AReg_224RegClassID;
2412 case 256:
2413 return AMDGPU::AReg_256RegClassID;
2414 case 288:
2415 return AMDGPU::AReg_288RegClassID;
2416 case 320:
2417 return AMDGPU::AReg_320RegClassID;
2418 case 352:
2419 return AMDGPU::AReg_352RegClassID;
2420 case 384:
2421 return AMDGPU::AReg_384RegClassID;
2422 case 512:
2423 return AMDGPU::AReg_512RegClassID;
2424 case 1024:
2425 return AMDGPU::AReg_1024RegClassID;
2426 }
2427 }
2428 return -1;
2429}
2430
2433 .Case("exec", AMDGPU::EXEC)
2434 .Case("vcc", AMDGPU::VCC)
2435 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2436 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2437 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2438 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2439 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2440 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2441 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2442 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2443 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2444 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2445 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2446 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2447 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2448 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2449 .Case("m0", AMDGPU::M0)
2450 .Case("vccz", AMDGPU::SRC_VCCZ)
2451 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2452 .Case("execz", AMDGPU::SRC_EXECZ)
2453 .Case("src_execz", AMDGPU::SRC_EXECZ)
2454 .Case("scc", AMDGPU::SRC_SCC)
2455 .Case("src_scc", AMDGPU::SRC_SCC)
2456 .Case("tba", AMDGPU::TBA)
2457 .Case("tma", AMDGPU::TMA)
2458 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2459 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2460 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2461 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2462 .Case("vcc_lo", AMDGPU::VCC_LO)
2463 .Case("vcc_hi", AMDGPU::VCC_HI)
2464 .Case("exec_lo", AMDGPU::EXEC_LO)
2465 .Case("exec_hi", AMDGPU::EXEC_HI)
2466 .Case("tma_lo", AMDGPU::TMA_LO)
2467 .Case("tma_hi", AMDGPU::TMA_HI)
2468 .Case("tba_lo", AMDGPU::TBA_LO)
2469 .Case("tba_hi", AMDGPU::TBA_HI)
2470 .Case("pc", AMDGPU::PC_REG)
2471 .Case("null", AMDGPU::SGPR_NULL)
2472 .Default(AMDGPU::NoRegister);
2473}
2474
2475bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2476 SMLoc &EndLoc, bool RestoreOnFailure) {
2477 auto R = parseRegister();
2478 if (!R) return true;
2479 assert(R->isReg());
2480 RegNo = R->getReg();
2481 StartLoc = R->getStartLoc();
2482 EndLoc = R->getEndLoc();
2483 return false;
2484}
2485
2486bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2487 SMLoc &EndLoc) {
2488 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2489}
2490
2491OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
2492 SMLoc &StartLoc,
2493 SMLoc &EndLoc) {
2494 bool Result =
2495 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2496 bool PendingErrors = getParser().hasPendingError();
2497 getParser().clearPendingErrors();
2498 if (PendingErrors)
2500 if (Result)
2501 return MatchOperand_NoMatch;
2502 return MatchOperand_Success;
2503}
2504
2505bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2506 RegisterKind RegKind, unsigned Reg1,
2507 SMLoc Loc) {
2508 switch (RegKind) {
2509 case IS_SPECIAL:
2510 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2511 Reg = AMDGPU::EXEC;
2512 RegWidth = 64;
2513 return true;
2514 }
2515 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2516 Reg = AMDGPU::FLAT_SCR;
2517 RegWidth = 64;
2518 return true;
2519 }
2520 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2521 Reg = AMDGPU::XNACK_MASK;
2522 RegWidth = 64;
2523 return true;
2524 }
2525 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2526 Reg = AMDGPU::VCC;
2527 RegWidth = 64;
2528 return true;
2529 }
2530 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2531 Reg = AMDGPU::TBA;
2532 RegWidth = 64;
2533 return true;
2534 }
2535 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2536 Reg = AMDGPU::TMA;
2537 RegWidth = 64;
2538 return true;
2539 }
2540 Error(Loc, "register does not fit in the list");
2541 return false;
2542 case IS_VGPR:
2543 case IS_SGPR:
2544 case IS_AGPR:
2545 case IS_TTMP:
2546 if (Reg1 != Reg + RegWidth / 32) {
2547 Error(Loc, "registers in a list must have consecutive indices");
2548 return false;
2549 }
2550 RegWidth += 32;
2551 return true;
2552 default:
2553 llvm_unreachable("unexpected register kind");
2554 }
2555}
2556
2557struct RegInfo {
2559 RegisterKind Kind;
2560};
2561
2562static constexpr RegInfo RegularRegisters[] = {
2563 {{"v"}, IS_VGPR},
2564 {{"s"}, IS_SGPR},
2565 {{"ttmp"}, IS_TTMP},
2566 {{"acc"}, IS_AGPR},
2567 {{"a"}, IS_AGPR},
2568};
2569
2570static bool isRegularReg(RegisterKind Kind) {
2571 return Kind == IS_VGPR ||
2572 Kind == IS_SGPR ||
2573 Kind == IS_TTMP ||
2574 Kind == IS_AGPR;
2575}
2576
2578 for (const RegInfo &Reg : RegularRegisters)
2579 if (Str.startswith(Reg.Name))
2580 return &Reg;
2581 return nullptr;
2582}
2583
2584static bool getRegNum(StringRef Str, unsigned& Num) {
2585 return !Str.getAsInteger(10, Num);
2586}
2587
2588bool
2589AMDGPUAsmParser::isRegister(const AsmToken &Token,
2590 const AsmToken &NextToken) const {
2591
2592 // A list of consecutive registers: [s0,s1,s2,s3]
2593 if (Token.is(AsmToken::LBrac))
2594 return true;
2595
2596 if (!Token.is(AsmToken::Identifier))
2597 return false;
2598
2599 // A single register like s0 or a range of registers like s[0:1]
2600
2601 StringRef Str = Token.getString();
2602 const RegInfo *Reg = getRegularRegInfo(Str);
2603 if (Reg) {
2604 StringRef RegName = Reg->Name;
2605 StringRef RegSuffix = Str.substr(RegName.size());
2606 if (!RegSuffix.empty()) {
2607 unsigned Num;
2608 // A single register with an index: rXX
2609 if (getRegNum(RegSuffix, Num))
2610 return true;
2611 } else {
2612 // A range of registers: r[XX:YY].
2613 if (NextToken.is(AsmToken::LBrac))
2614 return true;
2615 }
2616 }
2617
2618 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2619}
2620
2621bool
2622AMDGPUAsmParser::isRegister()
2623{
2624 return isRegister(getToken(), peekToken());
2625}
2626
2627unsigned
2628AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2629 unsigned RegNum,
2630 unsigned RegWidth,
2631 SMLoc Loc) {
2632
2633 assert(isRegularReg(RegKind));
2634
2635 unsigned AlignSize = 1;
2636 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2637 // SGPR and TTMP registers must be aligned.
2638 // Max required alignment is 4 dwords.
2639 AlignSize = std::min(RegWidth / 32, 4u);
2640 }
2641
2642 if (RegNum % AlignSize != 0) {
2643 Error(Loc, "invalid register alignment");
2644 return AMDGPU::NoRegister;
2645 }
2646
2647 unsigned RegIdx = RegNum / AlignSize;
2648 int RCID = getRegClass(RegKind, RegWidth);
2649 if (RCID == -1) {
2650 Error(Loc, "invalid or unsupported register size");
2651 return AMDGPU::NoRegister;
2652 }
2653
2654 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2655 const MCRegisterClass RC = TRI->getRegClass(RCID);
2656 if (RegIdx >= RC.getNumRegs()) {
2657 Error(Loc, "register index is out of range");
2658 return AMDGPU::NoRegister;
2659 }
2660
2661 return RC.getRegister(RegIdx);
2662}
2663
2664bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2665 int64_t RegLo, RegHi;
2666 if (!skipToken(AsmToken::LBrac, "missing register index"))
2667 return false;
2668
2669 SMLoc FirstIdxLoc = getLoc();
2670 SMLoc SecondIdxLoc;
2671
2672 if (!parseExpr(RegLo))
2673 return false;
2674
2675 if (trySkipToken(AsmToken::Colon)) {
2676 SecondIdxLoc = getLoc();
2677 if (!parseExpr(RegHi))
2678 return false;
2679 } else {
2680 RegHi = RegLo;
2681 }
2682
2683 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2684 return false;
2685
2686 if (!isUInt<32>(RegLo)) {
2687 Error(FirstIdxLoc, "invalid register index");
2688 return false;
2689 }
2690
2691 if (!isUInt<32>(RegHi)) {
2692 Error(SecondIdxLoc, "invalid register index");
2693 return false;
2694 }
2695
2696 if (RegLo > RegHi) {
2697 Error(FirstIdxLoc, "first register index should not exceed second index");
2698 return false;
2699 }
2700
2701 Num = static_cast<unsigned>(RegLo);
2702 RegWidth = 32 * ((RegHi - RegLo) + 1);
2703 return true;
2704}
2705
2706unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2707 unsigned &RegNum, unsigned &RegWidth,
2708 SmallVectorImpl<AsmToken> &Tokens) {
2709 assert(isToken(AsmToken::Identifier));
2710 unsigned Reg = getSpecialRegForName(getTokenStr());
2711 if (Reg) {
2712 RegNum = 0;
2713 RegWidth = 32;
2714 RegKind = IS_SPECIAL;
2715 Tokens.push_back(getToken());
2716 lex(); // skip register name
2717 }
2718 return Reg;
2719}
2720
2721unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2722 unsigned &RegNum, unsigned &RegWidth,
2723 SmallVectorImpl<AsmToken> &Tokens) {
2724 assert(isToken(AsmToken::Identifier));
2725 StringRef RegName = getTokenStr();
2726 auto Loc = getLoc();
2727
2728 const RegInfo *RI = getRegularRegInfo(RegName);
2729 if (!RI) {
2730 Error(Loc, "invalid register name");
2731 return AMDGPU::NoRegister;
2732 }
2733
2734 Tokens.push_back(getToken());
2735 lex(); // skip register name
2736
2737 RegKind = RI->Kind;
2738 StringRef RegSuffix = RegName.substr(RI->Name.size());
2739 if (!RegSuffix.empty()) {
2740 // Single 32-bit register: vXX.
2741 if (!getRegNum(RegSuffix, RegNum)) {
2742 Error(Loc, "invalid register index");
2743 return AMDGPU::NoRegister;
2744 }
2745 RegWidth = 32;
2746 } else {
2747 // Range of registers: v[XX:YY]. ":YY" is optional.
2748 if (!ParseRegRange(RegNum, RegWidth))
2749 return AMDGPU::NoRegister;
2750 }
2751
2752 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2753}
2754
2755unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2756 unsigned &RegWidth,
2757 SmallVectorImpl<AsmToken> &Tokens) {
2758 unsigned Reg = AMDGPU::NoRegister;
2759 auto ListLoc = getLoc();
2760
2761 if (!skipToken(AsmToken::LBrac,
2762 "expected a register or a list of registers")) {
2763 return AMDGPU::NoRegister;
2764 }
2765
2766 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2767
2768 auto Loc = getLoc();
2769 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2770 return AMDGPU::NoRegister;
2771 if (RegWidth != 32) {
2772 Error(Loc, "expected a single 32-bit register");
2773 return AMDGPU::NoRegister;
2774 }
2775
2776 for (; trySkipToken(AsmToken::Comma); ) {
2777 RegisterKind NextRegKind;
2778 unsigned NextReg, NextRegNum, NextRegWidth;
2779 Loc = getLoc();
2780
2781 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2782 NextRegNum, NextRegWidth,
2783 Tokens)) {
2784 return AMDGPU::NoRegister;
2785 }
2786 if (NextRegWidth != 32) {
2787 Error(Loc, "expected a single 32-bit register");
2788 return AMDGPU::NoRegister;
2789 }
2790 if (NextRegKind != RegKind) {
2791 Error(Loc, "registers in a list must be of the same kind");
2792 return AMDGPU::NoRegister;
2793 }
2794 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2795 return AMDGPU::NoRegister;
2796 }
2797
2798 if (!skipToken(AsmToken::RBrac,
2799 "expected a comma or a closing square bracket")) {
2800 return AMDGPU::NoRegister;
2801 }
2802
2803 if (isRegularReg(RegKind))
2804 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2805
2806 return Reg;
2807}
2808
2809bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2810 unsigned &RegNum, unsigned &RegWidth,
2811 SmallVectorImpl<AsmToken> &Tokens) {
2812 auto Loc = getLoc();
2813 Reg = AMDGPU::NoRegister;
2814
2815 if (isToken(AsmToken::Identifier)) {
2816 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2817 if (Reg == AMDGPU::NoRegister)
2818 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2819 } else {
2820 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2821 }
2822
2823 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2824 if (Reg == AMDGPU::NoRegister) {
2825 assert(Parser.hasPendingError());
2826 return false;
2827 }
2828
2829 if (!subtargetHasRegister(*TRI, Reg)) {
2830 if (Reg == AMDGPU::SGPR_NULL) {
2831 Error(Loc, "'null' operand is not supported on this GPU");
2832 } else {
2833 Error(Loc, "register not available on this GPU");
2834 }
2835 return false;
2836 }
2837
2838 return true;
2839}
2840
2841bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2842 unsigned &RegNum, unsigned &RegWidth,
2843 bool RestoreOnFailure /*=false*/) {
2844 Reg = AMDGPU::NoRegister;
2845
2847 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2848 if (RestoreOnFailure) {
2849 while (!Tokens.empty()) {
2850 getLexer().UnLex(Tokens.pop_back_val());
2851 }
2852 }
2853 return true;
2854 }
2855 return false;
2856}
2857
2858std::optional<StringRef>
2859AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2860 switch (RegKind) {
2861 case IS_VGPR:
2862 return StringRef(".amdgcn.next_free_vgpr");
2863 case IS_SGPR:
2864 return StringRef(".amdgcn.next_free_sgpr");
2865 default:
2866 return std::nullopt;
2867 }
2868}
2869
2870void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2871 auto SymbolName = getGprCountSymbolName(RegKind);
2872 assert(SymbolName && "initializing invalid register kind");
2873 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2874 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2875}
2876
2877bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2878 unsigned DwordRegIndex,
2879 unsigned RegWidth) {
2880 // Symbols are only defined for GCN targets
2881 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2882 return true;
2883
2884 auto SymbolName = getGprCountSymbolName(RegKind);
2885 if (!SymbolName)
2886 return true;
2887 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2888
2889 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2890 int64_t OldCount;
2891
2892 if (!Sym->isVariable())
2893 return !Error(getLoc(),
2894 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2895 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2896 return !Error(
2897 getLoc(),
2898 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2899
2900 if (OldCount <= NewMax)
2901 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2902
2903 return true;
2904}
2905
2906std::unique_ptr<AMDGPUOperand>
2907AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2908 const auto &Tok = getToken();
2909 SMLoc StartLoc = Tok.getLoc();
2910 SMLoc EndLoc = Tok.getEndLoc();
2911 RegisterKind RegKind;
2912 unsigned Reg, RegNum, RegWidth;
2913
2914 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2915 return nullptr;
2916 }
2917 if (isHsaAbiVersion3AndAbove(&getSTI())) {
2918 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2919 return nullptr;
2920 } else
2921 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2922 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2923}
2924
2926AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2927 // TODO: add syntactic sugar for 1/(2*PI)
2928
2929 if (isRegister())
2930 return MatchOperand_NoMatch;
2931 assert(!isModifier());
2932
2933 const auto& Tok = getToken();
2934 const auto& NextTok = peekToken();
2935 bool IsReal = Tok.is(AsmToken::Real);
2936 SMLoc S = getLoc();
2937 bool Negate = false;
2938
2939 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2940 lex();
2941 IsReal = true;
2942 Negate = true;
2943 }
2944
2945 if (IsReal) {
2946 // Floating-point expressions are not supported.
2947 // Can only allow floating-point literals with an
2948 // optional sign.
2949
2950 StringRef Num = getTokenStr();
2951 lex();
2952
2953 APFloat RealVal(APFloat::IEEEdouble());
2954 auto roundMode = APFloat::rmNearestTiesToEven;
2955 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2957 }
2958 if (Negate)
2959 RealVal.changeSign();
2960
2961 Operands.push_back(
2962 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2963 AMDGPUOperand::ImmTyNone, true));
2964
2965 return MatchOperand_Success;
2966
2967 } else {
2968 int64_t IntVal;
2969 const MCExpr *Expr;
2970 SMLoc S = getLoc();
2971
2972 if (HasSP3AbsModifier) {
2973 // This is a workaround for handling expressions
2974 // as arguments of SP3 'abs' modifier, for example:
2975 // |1.0|
2976 // |-1|
2977 // |1+x|
2978 // This syntax is not compatible with syntax of standard
2979 // MC expressions (due to the trailing '|').
2980 SMLoc EndLoc;
2981 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2983 } else {
2984 if (Parser.parseExpression(Expr))
2986 }
2987
2988 if (Expr->evaluateAsAbsolute(IntVal)) {
2989 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2990 } else {
2991 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2992 }
2993
2994 return MatchOperand_Success;
2995 }
2996
2997 return MatchOperand_NoMatch;
2998}
2999
3001AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3002 if (!isRegister())
3003 return MatchOperand_NoMatch;
3004
3005 if (auto R = parseRegister()) {
3006 assert(R->isReg());
3007 Operands.push_back(std::move(R));
3008 return MatchOperand_Success;
3009 }
3011}
3012
3014AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
3015 auto res = parseReg(Operands);
3016 if (res != MatchOperand_NoMatch) {
3017 return res;
3018 } else if (isModifier()) {
3019 return MatchOperand_NoMatch;
3020 } else {
3021 return parseImm(Operands, HasSP3AbsMod);
3022 }
3023}
3024
3025bool
3026AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3027 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3028 const auto &str = Token.getString();
3029 return str == "abs" || str == "neg" || str == "sext";
3030 }
3031 return false;
3032}
3033
3034bool
3035AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3036 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3037}
3038
3039bool
3040AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3041 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3042}
3043
3044bool
3045AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3046 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3047}
3048
3049// Check if this is an operand modifier or an opcode modifier
3050// which may look like an expression but it is not. We should
3051// avoid parsing these modifiers as expressions. Currently
3052// recognized sequences are:
3053// |...|
3054// abs(...)
3055// neg(...)
3056// sext(...)
3057// -reg
3058// -|...|
3059// -abs(...)
3060// name:...
3061//
3062bool
3063AMDGPUAsmParser::isModifier() {
3064
3065 AsmToken Tok = getToken();
3066 AsmToken NextToken[2];
3067 peekTokens(NextToken);
3068
3069 return isOperandModifier(Tok, NextToken[0]) ||
3070 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3071 isOpcodeModifierWithVal(Tok, NextToken[0]);
3072}
3073
3074// Check if the current token is an SP3 'neg' modifier.
3075// Currently this modifier is allowed in the following context:
3076//
3077// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3078// 2. Before an 'abs' modifier: -abs(...)
3079// 3. Before an SP3 'abs' modifier: -|...|
3080//
3081// In all other cases "-" is handled as a part
3082// of an expression that follows the sign.
3083//
3084// Note: When "-" is followed by an integer literal,
3085// this is interpreted as integer negation rather
3086// than a floating-point NEG modifier applied to N.
3087// Beside being contr-intuitive, such use of floating-point
3088// NEG modifier would have resulted in different meaning
3089// of integer literals used with VOP1/2/C and VOP3,
3090// for example:
3091// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3092// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3093// Negative fp literals with preceding "-" are
3094// handled likewise for uniformity
3095//
3096bool
3097AMDGPUAsmParser::parseSP3NegModifier() {
3098
3099 AsmToken NextToken[2];
3100 peekTokens(NextToken);
3101
3102 if (isToken(AsmToken::Minus) &&
3103 (isRegister(NextToken[0], NextToken[1]) ||
3104 NextToken[0].is(AsmToken::Pipe) ||
3105 isId(NextToken[0], "abs"))) {
3106 lex();
3107 return true;
3108 }
3109
3110 return false;
3111}
3112
3114AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3115 bool AllowImm) {
3116 bool Neg, SP3Neg;
3117 bool Abs, SP3Abs;
3118 SMLoc Loc;
3119
3120 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3121 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3122 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3124 }
3125
3126 SP3Neg = parseSP3NegModifier();
3127
3128 Loc = getLoc();
3129 Neg = trySkipId("neg");
3130 if (Neg && SP3Neg) {
3131 Error(Loc, "expected register or immediate");
3133 }
3134 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3136
3137 Abs = trySkipId("abs");
3138 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3140
3141 Loc = getLoc();
3142 SP3Abs = trySkipToken(AsmToken::Pipe);
3143 if (Abs && SP3Abs) {
3144 Error(Loc, "expected register or immediate");
3146 }
3147
3149 if (AllowImm) {
3150 Res = parseRegOrImm(Operands, SP3Abs);
3151 } else {
3152 Res = parseReg(Operands);
3153 }
3154 if (Res != MatchOperand_Success) {
3155 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3156 }
3157
3158 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3160 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3162 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3164
3165 AMDGPUOperand::Modifiers Mods;
3166 Mods.Abs = Abs || SP3Abs;
3167 Mods.Neg = Neg || SP3Neg;
3168
3169 if (Mods.hasFPModifiers()) {
3170 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3171 if (Op.isExpr()) {
3172 Error(Op.getStartLoc(), "expected an absolute expression");
3174 }
3175 Op.setModifiers(Mods);
3176 }
3177 return MatchOperand_Success;
3178}
3179
3181AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3182 bool AllowImm) {
3183 bool Sext = trySkipId("sext");
3184 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3186
3188 if (AllowImm) {
3189 Res = parseRegOrImm(Operands);
3190 } else {
3191 Res = parseReg(Operands);
3192 }
3193 if (Res != MatchOperand_Success) {
3194 return Sext? MatchOperand_ParseFail : Res;
3195 }
3196
3197 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3199
3200 AMDGPUOperand::Modifiers Mods;
3201 Mods.Sext = Sext;
3202
3203 if (Mods.hasIntModifiers()) {
3204 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3205 if (Op.isExpr()) {
3206 Error(Op.getStartLoc(), "expected an absolute expression");
3208 }
3209 Op.setModifiers(Mods);
3210 }
3211
3212 return MatchOperand_Success;
3213}
3214
3216AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3217 return parseRegOrImmWithFPInputMods(Operands, false);
3218}
3219
3221AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3222 return parseRegOrImmWithIntInputMods(Operands, false);
3223}
3224
3225OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3226 auto Loc = getLoc();
3227 if (trySkipId("off")) {
3228 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3229 AMDGPUOperand::ImmTyOff, false));
3230 return MatchOperand_Success;
3231 }
3232
3233 if (!isRegister())
3234 return MatchOperand_NoMatch;
3235
3236 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3237 if (Reg) {
3238 Operands.push_back(std::move(Reg));
3239 return MatchOperand_Success;
3240 }
3241
3243
3244}
3245
3246unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3247 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3248
3249 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3250 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3251 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3252 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3253 return Match_InvalidOperand;
3254
3255 if ((TSFlags & SIInstrFlags::VOP3) &&
3257 getForcedEncodingSize() != 64)
3258 return Match_PreferE32;
3259
3260 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3261 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3262 // v_mac_f32/16 allow only dst_sel == DWORD;
3263 auto OpNum =
3264 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3265 const auto &Op = Inst.getOperand(OpNum);
3266 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3267 return Match_InvalidOperand;
3268 }
3269 }
3270
3271 return Match_Success;
3272}
3273
3275 static const unsigned Variants[] = {
3279 };
3280
3281 return ArrayRef(Variants);
3282}
3283
3284// What asm variants we should check
3285ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3286 if (isForcedDPP() && isForcedVOP3()) {
3287 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3288 return ArrayRef(Variants);
3289 }
3290 if (getForcedEncodingSize() == 32) {
3291 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3292 return ArrayRef(Variants);
3293 }
3294
3295 if (isForcedVOP3()) {
3296 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3297 return ArrayRef(Variants);
3298 }
3299
3300 if (isForcedSDWA()) {
3301 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3303 return ArrayRef(Variants);
3304 }
3305
3306 if (isForcedDPP()) {
3307 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3308 return ArrayRef(Variants);
3309 }
3310
3311 return getAllVariants();
3312}
3313
3314StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3315 if (isForcedDPP() && isForcedVOP3())
3316 return "e64_dpp";
3317
3318 if (getForcedEncodingSize() == 32)
3319 return "e32";
3320
3321 if (isForcedVOP3())
3322 return "e64";
3323
3324 if (isForcedSDWA())
3325 return "sdwa";
3326
3327 if (isForcedDPP())
3328 return "dpp";
3329
3330 return "";
3331}
3332
3333unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3334 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3335 for (MCPhysReg Reg : Desc.implicit_uses()) {
3336 switch (Reg) {
3337 case AMDGPU::FLAT_SCR:
3338 case AMDGPU::VCC:
3339 case AMDGPU::VCC_LO:
3340 case AMDGPU::VCC_HI:
3341 case AMDGPU::M0:
3342 return Reg;
3343 default:
3344 break;
3345 }
3346 }
3347 return AMDGPU::NoRegister;
3348}
3349
3350// NB: This code is correct only when used to check constant
3351// bus limitations because GFX7 support no f16 inline constants.
3352// Note that there are no cases when a GFX7 opcode violates
3353// constant bus limitations due to the use of an f16 constant.
3354bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3355 unsigned OpIdx) const {
3356 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3357
3358 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3359 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3360 return false;
3361 }
3362
3363 const MCOperand &MO = Inst.getOperand(OpIdx);
3364
3365 int64_t Val = MO.getImm();
3366 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3367
3368 switch (OpSize) { // expected operand size
3369 case 8:
3370 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3371 case 4:
3372 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3373 case 2: {
3374 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3375 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3376 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3377 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3378 return AMDGPU::isInlinableIntLiteral(Val);
3379
3380 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3381 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3382 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3383 return AMDGPU::isInlinableIntLiteralV216(Val);
3384
3385 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3386 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3387 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3388 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3389
3390 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3391 }
3392 default:
3393 llvm_unreachable("invalid operand size");
3394 }
3395}
3396
3397unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3398 if (!isGFX10Plus())
3399 return 1;
3400
3401 switch (Opcode) {
3402 // 64-bit shift instructions can use only one scalar value input
3403 case AMDGPU::V_LSHLREV_B64_e64:
3404 case AMDGPU::V_LSHLREV_B64_gfx10:
3405 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3406 case AMDGPU::V_LSHRREV_B64_e64:
3407 case AMDGPU::V_LSHRREV_B64_gfx10:
3408 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3409 case AMDGPU::V_ASHRREV_I64_e64:
3410 case AMDGPU::V_ASHRREV_I64_gfx10:
3411 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3412 case AMDGPU::V_LSHL_B64_e64:
3413 case AMDGPU::V_LSHR_B64_e64:
3414 case AMDGPU::V_ASHR_I64_e64:
3415 return 1;
3416 default:
3417 return 2;
3418 }
3419}
3420
3421constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3423
3424// Get regular operand indices in the same order as specified
3425// in the instruction (but append mandatory literals to the end).
3427 bool AddMandatoryLiterals = false) {
3428
3429 int16_t ImmIdx =
3430 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3431
3432 if (isVOPD(Opcode)) {
3433 int16_t ImmDeferredIdx =
3434 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3435 : -1;
3436
3437 return {getNamedOperandIdx(Opcode, OpName::src0X),
3438 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3439 getNamedOperandIdx(Opcode, OpName::src0Y),
3440 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3441 ImmDeferredIdx,
3442 ImmIdx};
3443 }
3444
3445 return {getNamedOperandIdx(Opcode, OpName::src0),
3446 getNamedOperandIdx(Opcode, OpName::src1),
3447 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3448}
3449
3450bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3451 const MCOperand &MO = Inst.getOperand(OpIdx);
3452 if (MO.isImm()) {
3453 return !isInlineConstant(Inst, OpIdx);
3454 } else if (MO.isReg()) {
3455 auto Reg = MO.getReg();
3456 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3457 auto PReg = mc2PseudoReg(Reg);
3458 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3459 } else {
3460 return true;
3461 }
3462}
3463
3464bool AMDGPUAsmParser::validateConstantBusLimitations(
3465 const MCInst &Inst, const OperandVector &Operands) {
3466 const unsigned Opcode = Inst.getOpcode();
3467 const MCInstrDesc &Desc = MII.get(Opcode);
3468 unsigned LastSGPR = AMDGPU::NoRegister;
3469 unsigned ConstantBusUseCount = 0;
3470 unsigned NumLiterals = 0;
3471 unsigned LiteralSize;
3472
3473 if (!(Desc.TSFlags &
3476 !isVOPD(Opcode))
3477 return true;
3478
3479 // Check special imm operands (used by madmk, etc)
3480 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3481 ++NumLiterals;
3482 LiteralSize = 4;
3483 }
3484
3485 SmallDenseSet<unsigned> SGPRsUsed;
3486 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3487 if (SGPRUsed != AMDGPU::NoRegister) {
3488 SGPRsUsed.insert(SGPRUsed);
3489 ++ConstantBusUseCount;
3490 }
3491
3492 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3493
3494 for (int OpIdx : OpIndices) {
3495 if (OpIdx == -1)
3496 continue;
3497
3498 const MCOperand &MO = Inst.getOperand(OpIdx);
3499 if (usesConstantBus(Inst, OpIdx)) {
3500 if (MO.isReg()) {
3501 LastSGPR = mc2PseudoReg(MO.getReg());
3502 // Pairs of registers with a partial intersections like these
3503 // s0, s[0:1]
3504 // flat_scratch_lo, flat_scratch
3505 // flat_scratch_lo, flat_scratch_hi
3506 // are theoretically valid but they are disabled anyway.
3507 // Note that this code mimics SIInstrInfo::verifyInstruction
3508 if (SGPRsUsed.insert(LastSGPR).second) {
3509 ++ConstantBusUseCount;
3510 }
3511 } else { // Expression or a literal
3512
3513 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3514 continue; // special operand like VINTERP attr_chan
3515
3516 // An instruction may use only one literal.
3517 // This has been validated on the previous step.
3518 // See validateVOPLiteral.
3519 // This literal may be used as more than one operand.
3520 // If all these operands are of the same size,
3521 // this literal counts as one scalar value.
3522 // Otherwise it counts as 2 scalar values.
3523 // See "GFX10 Shader Programming", section 3.6.2.3.
3524
3525 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3526 if (Size < 4)
3527 Size = 4;
3528
3529 if (NumLiterals == 0) {
3530 NumLiterals = 1;
3531 LiteralSize = Size;
3532 } else if (LiteralSize != Size) {
3533 NumLiterals = 2;
3534 }
3535 }
3536 }
3537 }
3538 ConstantBusUseCount += NumLiterals;
3539
3540 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3541 return true;
3542
3543 SMLoc LitLoc = getLitLoc(Operands);
3544 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3545 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3546 Error(Loc, "invalid operand (violates constant bus restrictions)");
3547 return false;
3548}
3549
3550bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3551 const MCInst &Inst, const OperandVector &Operands) {
3552
3553 const unsigned Opcode = Inst.getOpcode();
3554 if (!isVOPD(Opcode))
3555 return true;
3556
3557 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3558
3559 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3560 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3561 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3562 ? Opr.getReg()
3564 };
3565
3566 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3567 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3568 if (!InvalidCompOprIdx)
3569 return true;
3570
3571 auto CompOprIdx = *InvalidCompOprIdx;
3572 auto ParsedIdx =
3573 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3574 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3575 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3576
3577 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3578 if (CompOprIdx == VOPD::Component::DST) {
3579 Error(Loc, "one dst register must be even and the other odd");
3580 } else {
3581 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3582 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3583 " operands must use different VGPR banks");
3584 }
3585
3586 return false;
3587}
3588
3589bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3590
3591 const unsigned Opc = Inst.getOpcode();
3592 const MCInstrDesc &Desc = MII.get(Opc);
3593
3594 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3595 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3596 assert(ClampIdx != -1);
3597 return Inst.getOperand(ClampIdx).getImm() == 0;
3598 }
3599
3600 return true;
3601}
3602
3603bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3604 const SMLoc &IDLoc) {
3605
3606 const unsigned Opc = Inst.getOpcode();
3607 const MCInstrDesc &Desc = MII.get(Opc);
3608
3609 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3610 return true;
3611
3612 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3613 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3614 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3615
3616 assert(VDataIdx != -1);
3617
3618 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3619 return true;
3620
3621 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3622 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3623 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3624 if (DMask == 0)
3625 DMask = 1;
3626
3627 bool IsPackedD16 = false;
3628 unsigned DataSize =
3629 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3630 if (hasPackedD16()) {
3631 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3632 IsPackedD16 = D16Idx >= 0;
3633 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3634 DataSize = (DataSize + 1) / 2;
3635 }
3636
3637 if ((VDataSize / 4) == DataSize + TFESize)
3638 return true;
3639
3640 StringRef Modifiers;
3641 if (isGFX90A())
3642 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3643 else
3644 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3645
3646 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3647 return false;
3648}
3649
3650bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3651 const unsigned Opc = Inst.getOpcode();
3652 const MCInstrDesc &Desc = MII.get(Opc);
3653
3654 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3655 return true;
3656
3657 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3658
3659 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3660 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3661 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3662 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3663 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3664 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3665
3666 assert(VAddr0Idx != -1);
3667 assert(SrsrcIdx != -1);
3668 assert(SrsrcIdx > VAddr0Idx);
3669
3670 if (DimIdx == -1)
3671 return true; // intersect_ray
3672
3673 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3674 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3675 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3676 unsigned ActualAddrSize =
3677 IsNSA ? SrsrcIdx - VAddr0Idx
3678 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3679 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3680
3681 unsigned ExpectedAddrSize =
3682 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3683
3684 if (!IsNSA) {
3685 if (ExpectedAddrSize > 12)
3686 ExpectedAddrSize = 16;
3687
3688 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3689 // This provides backward compatibility for assembly created
3690 // before 160b/192b/224b types were directly supported.
3691 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3692 return true;
3693 }
3694
3695 return ActualAddrSize == ExpectedAddrSize;
3696}
3697
3698bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3699
3700 const unsigned Opc = Inst.getOpcode();
3701 const MCInstrDesc &Desc = MII.get(Opc);
3702
3703 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3704 return true;
3705 if (!Desc.mayLoad() || !Desc.mayStore())
3706 return true; // Not atomic
3707
3708 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3709 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3710
3711 // This is an incomplete check because image_atomic_cmpswap
3712 // may only use 0x3 and 0xf while other atomic operations
3713 // may use 0x1 and 0x3. However these limitations are
3714 // verified when we check that dmask matches dst size.
3715 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3716}
3717
3718bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3719
3720 const unsigned Opc = Inst.getOpcode();
3721 const MCInstrDesc &Desc = MII.get(Opc);
3722
3723 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3724 return true;
3725
3726 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3727 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3728
3729 // GATHER4 instructions use dmask in a different fashion compared to
3730 // other MIMG instructions. The only useful DMASK values are
3731 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3732 // (red,red,red,red) etc.) The ISA document doesn't mention
3733 // this.
3734 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3735}
3736
3737bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3738 const unsigned Opc = Inst.getOpcode();
3739 const MCInstrDesc &Desc = MII.get(Opc);
3740
3741 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3742 return true;
3743
3744 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3745 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3746 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3747
3748 if (!BaseOpcode->MSAA)
3749 return true;
3750
3751 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3752 assert(DimIdx != -1);
3753
3754 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3755 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3756
3757 return DimInfo->MSAA;
3758}
3759
3760static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3761{
3762 switch (Opcode) {
3763 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3764 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3765 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3766 return true;
3767 default:
3768 return false;
3769 }
3770}
3771
3772// movrels* opcodes should only allow VGPRS as src0.
3773// This is specified in .td description for vop1/vop3,
3774// but sdwa is handled differently. See isSDWAOperand.
3775bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3776 const OperandVector &Operands) {
3777
3778 const unsigned Opc = Inst.getOpcode();
3779 const MCInstrDesc &Desc = MII.get(Opc);
3780
3781 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3782 return true;
3783
3784 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3785 assert(Src0Idx != -1);
3786
3787 SMLoc ErrLoc;
3788 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3789 if (Src0.isReg()) {
3790 auto Reg = mc2PseudoReg(Src0.getReg());
3791 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3792 if (!isSGPR(Reg, TRI))
3793 return true;
3794 ErrLoc = getRegLoc(Reg, Operands);
3795 } else {
3796 ErrLoc = getConstLoc(Operands);
3797 }
3798
3799 Error(ErrLoc, "source operand must be a VGPR");
3800 return false;
3801}
3802
3803bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3804 const OperandVector &Operands) {
3805
3806 const unsigned Opc = Inst.getOpcode();
3807
3808 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3809 return true;
3810
3811 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3812 assert(Src0Idx != -1);
3813
3814 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3815 if (!Src0.isReg())
3816 return true;
3817
3818 auto Reg = mc2PseudoReg(Src0.getReg());
3819 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3820 if (!isGFX90A() && isSGPR(Reg, TRI)) {
3821 Error(getRegLoc(Reg, Operands),
3822 "source operand must be either a VGPR or an inline constant");
3823 return false;
3824 }
3825
3826 return true;
3827}
3828
3829bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3830 const OperandVector &Operands) {
3831 unsigned Opcode = Inst.getOpcode();
3832 const MCInstrDesc &Desc = MII.get(Opcode);
3833
3834 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3835 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3836 return true;
3837
3838 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3839 if (Src2Idx == -1)
3840 return true;
3841
3842 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3843 Error(getConstLoc(Operands),
3844 "inline constants are not allowed for this operand");
3845 return false;
3846 }
3847
3848 return true;
3849}
3850
3851bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3852 const OperandVector &Operands) {
3853 const unsigned Opc = Inst.getOpcode();
3854 const MCInstrDesc &Desc = MII.get(Opc);
3855
3856 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3857 return true;
3858
3859 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3860 if (Src2Idx == -1)
3861 return true;
3862
3863 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3864 if (!Src2.isReg())
3865 return true;
3866
3867 MCRegister Src2Reg = Src2.getReg();
3868 MCRegister DstReg = Inst.getOperand(0).getReg();
3869 if (Src2Reg == DstReg)
3870 return true;
3871
3872 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3873 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3874 return true;
3875
3876 if (TRI->regsOverlap(Src2Reg, DstReg)) {
3877 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3878 "source 2 operand must not partially overlap with dst");
3879 return false;
3880 }
3881
3882 return true;
3883}
3884
3885bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3886 switch (Inst.getOpcode()) {
3887 default:
3888 return true;
3889 case V_DIV_SCALE_F32_gfx6_gfx7:
3890 case V_DIV_SCALE_F32_vi:
3891 case V_DIV_SCALE_F32_gfx10:
3892 case V_DIV_SCALE_F64_gfx6_gfx7:
3893 case V_DIV_SCALE_F64_vi:
3894 case V_DIV_SCALE_F64_gfx10:
3895 break;
3896 }
3897
3898 // TODO: Check that src0 = src1 or src2.
3899
3900 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3901 AMDGPU::OpName::src2_modifiers,
3902 AMDGPU::OpName::src2_modifiers}) {
3903 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3904 .getImm() &
3906 return false;
3907 }
3908 }
3909
3910 return true;
3911}
3912
3913bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3914
3915 const unsigned Opc = Inst.getOpcode();
3916 const MCInstrDesc &Desc = MII.get(Opc);
3917
3918 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3919 return true;
3920
3921 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3922 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3923 if (isCI() || isSI())
3924 return false;
3925 }
3926
3927 return true;
3928}
3929
3930static bool IsRevOpcode(const unsigned Opcode)
3931{
3932 switch (Opcode) {
3933 case AMDGPU::V_SUBREV_F32_e32:
3934 case AMDGPU::V_SUBREV_F32_e64:
3935 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3936 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3937 case AMDGPU::V_SUBREV_F32_e32_vi:
3938 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3939 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3940 case AMDGPU::V_SUBREV_F32_e64_vi:
3941
3942 case AMDGPU::V_SUBREV_CO_U32_e32:
3943 case AMDGPU::V_SUBREV_CO_U32_e64:
3944 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3945 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3946
3947 case AMDGPU::V_SUBBREV_U32_e32:
3948 case AMDGPU::V_SUBBREV_U32_e64:
3949 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3950 case AMDGPU::V_SUBBREV_U32_e32_vi:
3951 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3952 case AMDGPU::V_SUBBREV_U32_e64_vi:
3953
3954 case AMDGPU::V_SUBREV_U32_e32:
3955 case AMDGPU::V_SUBREV_U32_e64:
3956 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3957 case AMDGPU::V_SUBREV_U32_e32_vi:
3958 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3959 case AMDGPU::V_SUBREV_U32_e64_vi:
3960
3961 case AMDGPU::V_SUBREV_F16_e32:
3962 case AMDGPU::V_SUBREV_F16_e64:
3963 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3964 case AMDGPU::V_SUBREV_F16_e32_vi:
3965 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3966 case AMDGPU::V_SUBREV_F16_e64_vi:
3967
3968 case AMDGPU::V_SUBREV_U16_e32:
3969 case AMDGPU::V_SUBREV_U16_e64:
3970 case AMDGPU::V_SUBREV_U16_e32_vi:
3971 case AMDGPU::V_SUBREV_U16_e64_vi:
3972
3973 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3974 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3975 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3976
3977 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3978 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3979
3980 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3981 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3982
3983 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3984 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3985
3986 case AMDGPU::V_LSHRREV_B32_e32:
3987 case AMDGPU::V_LSHRREV_B32_e64:
3988 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3989 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3990 case AMDGPU::V_LSHRREV_B32_e32_vi:
3991 case AMDGPU::V_LSHRREV_B32_e64_vi:
3992 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3993 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3994
3995 case AMDGPU::V_ASHRREV_I32_e32:
3996 case AMDGPU::V_ASHRREV_I32_e64:
3997 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3998 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3999 case AMDGPU::V_ASHRREV_I32_e32_vi:
4000 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4001 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4002 case AMDGPU::V_ASHRREV_I32_e64_vi:
4003
4004 case AMDGPU::V_LSHLREV_B32_e32:
4005 case AMDGPU::V_LSHLREV_B32_e64:
4006 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4007 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4008 case AMDGPU::V_LSHLREV_B32_e32_vi:
4009 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4010 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4011 case AMDGPU::V_LSHLREV_B32_e64_vi:
4012
4013 case AMDGPU::V_LSHLREV_B16_e32:
4014 case AMDGPU::V_LSHLREV_B16_e64:
4015 case AMDGPU::V_LSHLREV_B16_e32_vi:
4016 case AMDGPU::V_LSHLREV_B16_e64_vi:
4017 case AMDGPU::V_LSHLREV_B16_gfx10:
4018
4019 case AMDGPU::V_LSHRREV_B16_e32:
4020 case AMDGPU::V_LSHRREV_B16_e64:
4021 case AMDGPU::V_LSHRREV_B16_e32_vi:
4022 case AMDGPU::V_LSHRREV_B16_e64_vi:
4023 case AMDGPU::V_LSHRREV_B16_gfx10:
4024
4025 case AMDGPU::V_ASHRREV_I16_e32:
4026 case AMDGPU::V_ASHRREV_I16_e64:
4027 case AMDGPU::V_ASHRREV_I16_e32_vi:
4028 case AMDGPU::V_ASHRREV_I16_e64_vi:
4029 case AMDGPU::V_ASHRREV_I16_gfx10:
4030
4031 case AMDGPU::V_LSHLREV_B64_e64:
4032 case AMDGPU::V_LSHLREV_B64_gfx10:
4033 case AMDGPU::V_LSHLREV_B64_vi:
4034
4035 case AMDGPU::V_LSHRREV_B64_e64:
4036 case AMDGPU::V_LSHRREV_B64_gfx10:
4037 case AMDGPU::V_LSHRREV_B64_vi:
4038
4039 case AMDGPU::V_ASHRREV_I64_e64:
4040 case AMDGPU::V_ASHRREV_I64_gfx10:
4041 case AMDGPU::V_ASHRREV_I64_vi:
4042
4043 case AMDGPU::V_PK_LSHLREV_B16:
4044 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4045 case AMDGPU::V_PK_LSHLREV_B16_vi:
4046
4047 case AMDGPU::V_PK_LSHRREV_B16:
4048 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4049 case AMDGPU::V_PK_LSHRREV_B16_vi:
4050 case AMDGPU::V_PK_ASHRREV_I16:
4051 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4052 case AMDGPU::V_PK_ASHRREV_I16_vi:
4053 return true;
4054 default:
4055 return false;
4056 }
4057}
4058
4059std::optional<StringRef>
4060AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4061
4062 using namespace SIInstrFlags;
4063 const unsigned Opcode = Inst.getOpcode();
4064 const MCInstrDesc &Desc = MII.get(Opcode);
4065
4066 // lds_direct register is defined so that it can be used
4067 // with 9-bit operands only. Ignore encodings which do not accept these.
4068 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4069 if ((Desc.TSFlags & Enc) == 0)
4070 return std::nullopt;
4071
4072 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4073 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4074 if (SrcIdx == -1)
4075 break;
4076 const auto &Src = Inst.getOperand(SrcIdx);
4077 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4078
4079 if (isGFX90A() || isGFX11Plus())
4080 return StringRef("lds_direct is not supported on this GPU");
4081
4082 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4083 return StringRef("lds_direct cannot be used with this instruction");
4084
4085 if (SrcName != OpName::src0)
4086 return StringRef("lds_direct may be used as src0 only");
4087 }
4088 }
4089
4090 return std::nullopt;
4091}
4092
4093SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4094 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4096 if (Op.isFlatOffset())
4097 return Op.getStartLoc();
4098 }
4099 return getLoc();
4100}
4101
4102bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4103 const OperandVector &Operands) {
4104 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4105 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4106 return true;
4107
4108 auto Opcode = Inst.getOpcode();
4109 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4110 assert(OpNum != -1);
4111
4112 const auto &Op = Inst.getOperand(OpNum);
4113 if (!hasFlatOffsets() && Op.getImm() != 0) {
4114 Error(getFlatOffsetLoc(Operands),
4115 "flat offset modifier is not supported on this GPU");
4116 return false;
4117 }
4118
4119 // For FLAT segment the offset must be positive;
4120 // MSB is ignored and forced to zero.
4121 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4122 bool AllowNegative =
4124 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4125 Error(getFlatOffsetLoc(Operands),
4126 Twine("expected a ") +
4127 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4128 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4129 return false;
4130 }
4131
4132 return true;
4133}
4134
4135SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4136 // Start with second operand because SMEM Offset cannot be dst or src0.
4137 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4138 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4139 if (Op.isSMEMOffset())
4140 return Op.getStartLoc();
4141 }
4142 return getLoc();
4143}
4144
4145bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4146 const OperandVector &Operands) {
4147 if (isCI() || isSI())
4148 return true;
4149
4150 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4151 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4152 return true;
4153
4154 auto Opcode = Inst.getOpcode();
4155 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4156 if (OpNum == -1)
4157 return true;
4158
4159 const auto &Op = Inst.getOperand(OpNum);
4160 if (!Op.isImm())
4161 return true;
4162
4163 uint64_t Offset = Op.getImm();
4164 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4165 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4166 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4167 return true;
4168
4169 Error(getSMEMOffsetLoc(Operands),
4170 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4171 "expected a 21-bit signed offset");
4172
4173 return false;
4174}
4175
4176bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4177 unsigned Opcode = Inst.getOpcode();
4178 const MCInstrDesc &Desc = MII.get(Opcode);
4180 return true;
4181
4182 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4183 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4184
4185 const int OpIndices[] = { Src0Idx, Src1Idx };
4186
4187 unsigned NumExprs = 0;
4188 unsigned NumLiterals = 0;
4190
4191 for (int OpIdx : OpIndices) {
4192 if (OpIdx == -1) break;
4193
4194 const MCOperand &MO = Inst.getOperand(OpIdx);
4195 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4196 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4197 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4198 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4199 if (NumLiterals == 0 || LiteralValue != Value) {
4201 ++NumLiterals;
4202 }
4203 } else if (MO.isExpr()) {
4204 ++NumExprs;
4205 }
4206 }
4207 }
4208
4209 return NumLiterals + NumExprs <= 1;
4210}
4211
4212bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4213 const unsigned Opc = Inst.getOpcode();
4214 if (isPermlane16(Opc)) {
4215 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4216 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4217
4218 if (OpSel & ~3)
4219 return false;
4220 }
4221
4222 uint64_t TSFlags = MII.get(Opc).TSFlags;
4223
4224 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4225 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4226 if (OpSelIdx != -1) {
4227 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4228 return false;
4229 }
4230 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4231 if (OpSelHiIdx != -1) {
4232 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4233 return false;
4234 }
4235 }
4236
4237 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4240 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4241 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4242 if (OpSel & 3)
4243 return false;
4244 }
4245
4246 return true;
4247}
4248
4249bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4250 const OperandVector &Operands) {
4251 const unsigned Opc = Inst.getOpcode();
4252 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4253 if (DppCtrlIdx < 0)
4254 return true;
4255 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4256
4257 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4258 // DPP64 is supported for row_newbcast only.
4259 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4260 if (Src0Idx >= 0 &&
4261 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4262 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4263 Error(S, "64 bit dpp only supports row_newbcast");
4264 return false;
4265 }
4266 }
4267
4268 return true;
4269}
4270
4271// Check if VCC register matches wavefront size
4272bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4273 auto FB = getFeatureBits();
4274 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4275 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4276}
4277
4278// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4279bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4280 const OperandVector &Operands) {
4281 unsigned Opcode = Inst.getOpcode();
4282 const MCInstrDesc &Desc = MII.get(Opcode);
4283 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4285 !HasMandatoryLiteral && !isVOPD(Opcode))
4286 return true;
4287
4288 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4289
4290 unsigned NumExprs = 0;
4291 unsigned NumLiterals = 0;
4293
4294 for (int OpIdx : OpIndices) {
4295 if (OpIdx == -1)
4296 continue;
4297
4298 const MCOperand &MO = Inst.getOperand(OpIdx);
4299 if (!MO.isImm() && !MO.isExpr())
4300 continue;
4301 if (!isSISrcOperand(Desc, OpIdx))
4302 continue;
4303
4304 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4305 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4306 if (NumLiterals == 0 || LiteralValue != Value) {
4308 ++NumLiterals;
4309 }
4310 } else if (MO.isExpr()) {
4311 ++NumExprs;
4312 }
4313 }
4314 NumLiterals += NumExprs;
4315
4316 if (!NumLiterals)
4317 return true;
4318
4319 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4320 Error(getLitLoc(Operands), "literal operands are not supported");
4321 return false;
4322 }
4323
4324 if (NumLiterals > 1) {
4325 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4326 return false;
4327 }
4328
4329 return true;
4330}
4331
4332// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4333static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4334 const MCRegisterInfo *MRI) {
4335 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4336 if (OpIdx < 0)
4337 return -1;
4338
4339 const MCOperand &Op = Inst.getOperand(OpIdx);
4340 if (!Op.isReg())
4341 return -1;
4342
4343 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4344 auto Reg = Sub ? Sub : Op.getReg();
4345 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4346 return AGPR32.contains(Reg) ? 1 : 0;
4347}
4348
4349bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4350 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4353 SIInstrFlags::DS)) == 0)
4354 return true;
4355
4356 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4357 : AMDGPU::OpName::vdata;
4358
4359 const MCRegisterInfo *MRI = getMRI();
4360 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4361 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4362
4363 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4364 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4365 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4366 return false;
4367 }
4368
4369 auto FB = getFeatureBits();
4370 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4371 if (DataAreg < 0 || DstAreg < 0)
4372 return true;
4373 return DstAreg == DataAreg;
4374 }
4375
4376 return DstAreg < 1 && DataAreg < 1;
4377}
4378
4379bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4380 auto FB = getFeatureBits();
4381 if (!FB[AMDGPU::FeatureGFX90AInsts])
4382 return true;
4383
4384 const MCRegisterInfo *MRI = getMRI();
4385 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4386 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4387 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4388 const MCOperand &Op = Inst.getOperand(I);
4389 if (!Op.isReg())
4390 continue;
4391
4392 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4393 if (!Sub)
4394 continue;
4395
4396 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4397 return false;
4398 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4399 return false;
4400 }
4401
4402 return true;
4403}
4404
4405SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4406 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4407 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4408 if (Op.isBLGP())
4409 return Op.getStartLoc();
4410 }
4411 return SMLoc();
4412}
4413
4414bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4415 const OperandVector &Operands) {
4416 unsigned Opc = Inst.getOpcode();
4417 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4418 if (BlgpIdx == -1)
4419 return true;
4420 SMLoc BLGPLoc = getBLGPLoc(Operands);
4421 if (!BLGPLoc.isValid())
4422 return true;
4423 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4424 auto FB = getFeatureBits();
4425 bool UsesNeg = false;
4426 if (FB[AMDGPU::FeatureGFX940Insts]) {
4427 switch (Opc) {
4428 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4429 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4430 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4431 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4432 UsesNeg = true;
4433 }
4434 }
4435
4436 if (IsNeg == UsesNeg)
4437 return true;
4438
4439 Error(BLGPLoc,
4440 UsesNeg ? "invalid modifier: blgp is not supported"
4441 : "invalid modifier: neg is not supported");
4442
4443 return false;
4444}
4445
4446bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4447 const OperandVector &Operands) {
4448 if (!isGFX11Plus())
4449 return true;
4450
4451 unsigned Opc = Inst.getOpcode();
4452 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4453 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4454 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4455 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4456 return true;
4457
4458 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4459 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4460 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4461 if (Reg == AMDGPU::SGPR_NULL)
4462 return true;
4463
4464 SMLoc RegLoc = getRegLoc(Reg, Operands);
4465 Error(RegLoc, "src0 must be null");
4466 return false;
4467}
4468
4469// gfx90a has an undocumented limitation:
4470// DS_GWS opcodes must use even aligned registers.
4471bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4472 const OperandVector &Operands) {
4473 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4474 return true;
4475
4476 int Opc = Inst.getOpcode();
4477 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4478 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4479 return true;
4480
4481 const MCRegisterInfo *MRI = getMRI();
4482 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4483 int Data0Pos =
4484 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4485 assert(Data0Pos != -1);
4486 auto Reg = Inst.getOperand(Data0Pos).getReg();
4487 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4488 if (RegIdx & 1) {
4489 SMLoc RegLoc = getRegLoc(Reg, Operands);
4490 Error(RegLoc, "vgpr must be even aligned");
4491 return false;
4492 }
4493
4494 return true;
4495}
4496
4497bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4498 const OperandVector &Operands,
4499 const SMLoc &IDLoc) {
4500 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4501 AMDGPU::OpName::cpol);
4502 if (CPolPos == -1)
4503 return true;
4504
4505 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4506
4507 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4509 if (CPol && (isSI() || isCI())) {
4510 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4511 Error(S, "cache policy is not supported for SMRD instructions");
4512 return false;
4513 }
4514 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4515 Error(IDLoc, "invalid cache policy for SMEM instruction");
4516 return false;
4517 }
4518 }
4519
4520 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4521 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4522 StringRef CStr(S.getPointer());
4523 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4524 Error(S, "scc is not supported on this GPU");
4525 return false;
4526 }
4527
4529 return true;
4530
4532 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4533 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4534 : "instruction must use glc");
4535 return false;
4536 }
4537 } else {
4538 if (CPol & CPol::GLC) {
4539 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4540 StringRef CStr(S.getPointer());
4542 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4543 Error(S, isGFX940() ? "instruction must not use sc0"
4544 : "instruction must not use glc");
4545 return false;
4546 }
4547 }
4548
4549 return true;
4550}
4551
4552bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4553 if (!isGFX11Plus())
4554 return true;
4555 for (auto &Operand : Operands) {
4556 if (!Operand->isReg())
4557 continue;
4558 unsigned Reg = Operand->getReg();
4559 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4560 Error(getRegLoc(Reg, Operands),
4561 "execz and vccz are not supported on this GPU");
4562 return false;
4563 }
4564 }
4565 return true;
4566}
4567
4568bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4569 const OperandVector &Operands) {
4570 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4571 if (Desc.mayStore() &&
4573 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4574 if (Loc != getInstLoc(Operands)) {
4575 Error(Loc, "TFE modifier has no meaning for store instructions");
4576 return false;
4577 }
4578 }
4579
4580 return true;
4581}
4582
4583bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4584 const SMLoc &IDLoc,
4585 const OperandVector &Operands) {
4586 if (auto ErrMsg = validateLdsDirect(Inst)) {
4587 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4588 return false;
4589 }
4590 if (!validateSOPLiteral(Inst)) {
4591 Error(getLitLoc(Operands),
4592 "only one unique literal operand is allowed");
4593 return false;
4594 }
4595 if (!validateVOPLiteral(Inst, Operands)) {
4596 return false;
4597 }
4598 if (!validateConstantBusLimitations(Inst, Operands)) {
4599 return false;
4600 }
4601 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4602 return false;
4603 }
4604 if (!validateIntClampSupported(Inst)) {
4605 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4606 "integer clamping is not supported on this GPU");
4607 return false;
4608 }
4609 if (!validateOpSel(Inst)) {
4610 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4611 "invalid op_sel operand");
4612 return false;
4613 }
4614 if (!validateDPP(Inst, Operands)) {
4615 return false;
4616 }
4617 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4618 if (!validateMIMGD16(Inst)) {
4619 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4620 "d16 modifier is not supported on this GPU");
4621 return false;
4622 }
4623 if (!validateMIMGMSAA(Inst)) {
4624 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4625 "invalid dim; must be MSAA type");
4626 return false;
4627 }
4628 if (!validateMIMGDataSize(Inst, IDLoc)) {
4629 return false;
4630 }
4631 if (!validateMIMGAddrSize(Inst)) {
4632 Error(IDLoc,
4633 "image address size does not match dim and a16");
4634 return false;
4635 }
4636 if (!validateMIMGAtomicDMask(Inst)) {
4637 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4638 "invalid atomic image dmask");
4639 return false;
4640 }
4641 if (!validateMIMGGatherDMask(Inst)) {
4642 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4643 "invalid image_gather dmask: only one bit must be set");
4644 return false;
4645 }
4646 if (!validateMovrels(Inst, Operands)) {
4647 return false;
4648 }
4649 if (!validateFlatOffset(Inst, Operands)) {
4650 return false;
4651 }
4652 if (!validateSMEMOffset(Inst, Operands)) {
4653 return false;
4654 }
4655 if (!validateMAIAccWrite(Inst, Operands)) {
4656 return false;
4657 }
4658 if (!validateMAISrc2(Inst, Operands)) {
4659 return false;
4660 }
4661 if (!validateMFMA(Inst, Operands)) {
4662 return false;
4663 }
4664 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4665 return false;
4666 }
4667
4668 if (!validateAGPRLdSt(Inst)) {
4669 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4670 ? "invalid register class: data and dst should be all VGPR or AGPR"
4671 : "invalid register class: agpr loads and stores not supported on this GPU"
4672 );
4673 return false;
4674 }
4675 if (!validateVGPRAlign(Inst)) {
4676 Error(IDLoc,
4677 "invalid register class: vgpr tuples must be 64 bit aligned");
4678 return false;
4679 }
4680 if (!validateGWS(Inst, Operands)) {
4681 return false;
4682 }
4683
4684 if (!validateBLGP(Inst, Operands)) {
4685 return false;
4686 }
4687
4688 if (!validateDivScale(Inst)) {
4689 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4690 return false;
4691 }
4692 if (!validateWaitCnt(Inst, Operands)) {
4693 return false;
4694 }
4695 if (!validateExeczVcczOperands(Operands)) {
4696 return false;
4697 }
4698 if (!validateTFE(Inst, Operands)) {
4699 return false;
4700 }
4701
4702 return true;
4703}
4704
4706 const FeatureBitset &FBS,
4707 unsigned VariantID = 0);
4708
4709static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4710 const FeatureBitset &AvailableFeatures,
4711 unsigned VariantID);
4712
4713bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4714 const FeatureBitset &FBS) {
4715 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4716}
4717
4718bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4719 const FeatureBitset &FBS,
4720 ArrayRef<unsigned> Variants) {
4721 for (auto Variant : Variants) {
4722 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4723 return true;
4724 }
4725
4726 return false;
4727}
4728
4729bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4730 const SMLoc &IDLoc) {
4731 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4732
4733 // Check if requested instruction variant is supported.
4734 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4735 return false;
4736
4737 // This instruction is not supported.
4738 // Clear any other pending errors because they are no longer relevant.
4739 getParser().clearPendingErrors();
4740
4741 // Requested instruction variant is not supported.
4742 // Check if any other variants are supported.
4743 StringRef VariantName = getMatchedVariantName();
4744 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4745 return Error(IDLoc,
4746 Twine(VariantName,
4747 " variant of this instruction is not supported"));
4748 }
4749
4750 // Check if this instruction may be used with a different wavesize.
4751 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4752 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4753
4754 FeatureBitset FeaturesWS32 = getFeatureBits();
4755 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4756 .flip(AMDGPU::FeatureWavefrontSize32);
4757 FeatureBitset AvailableFeaturesWS32 =
4758 ComputeAvailableFeatures(FeaturesWS32);
4759
4760 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4761 return Error(IDLoc, "instruction requires wavesize=32");
4762 }
4763
4764 // Finally check if this instruction is supported on any other GPU.
4765 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4766 return Error(IDLoc, "instruction not supported on this GPU");
4767 }
4768
4769 // Instruction not supported on any GPU. Probably a typo.
4770 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4771 return Error(IDLoc, "invalid instruction" + Suggestion);
4772}
4773
4775 uint64_t InvalidOprIdx) {
4776 assert(InvalidOprIdx < Operands.size());
4777 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4778 if (Op.isToken() && InvalidOprIdx > 1) {
4779 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4780 return PrevOp.isToken() && PrevOp.getToken() == "::";
4781 }
4782 return false;
4783}
4784
4785bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4787 MCStreamer &Out,
4789 bool MatchingInlineAsm) {
4790 MCInst Inst;
4791 unsigned Result = Match_Success;
4792 for (auto Variant : getMatchedVariants()) {
4793 uint64_t EI;
4794 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4795 Variant);
4796 // We order match statuses from least to most specific. We use most specific
4797 // status as resulting
4798 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4799 if ((R == Match_Success) ||
4800 (R == Match_PreferE32) ||
4801 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4802 (R == Match_InvalidOperand && Result != Match_MissingFeature
4803 && Result != Match_PreferE32) ||
4804 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4805 && Result != Match_MissingFeature
4806 && Result != Match_PreferE32)) {
4807 Result = R;
4808 ErrorInfo = EI;
4809 }
4810 if (R == Match_Success)
4811 break;
4812 }
4813
4814 if (Result == Match_Success) {
4815 if (!validateInstruction(Inst, IDLoc, Operands)) {
4816 return true;
4817 }
4818 Inst.setLoc(IDLoc);
4819 Out.emitInstruction(Inst, getSTI());
4820 return false;
4821 }
4822
4823 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4824 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4825 return true;
4826 }
4827
4828 switch (Result) {
4829 default: break;
4830 case Match_MissingFeature:
4831 // It has been verified that the specified instruction
4832 // mnemonic is valid. A match was found but it requires
4833 // features which are not supported on this GPU.
4834 return Error(IDLoc, "operands are not valid for this GPU or mode");
4835
4836 case Match_InvalidOperand: {
4837 SMLoc ErrorLoc = IDLoc;
4838 if (ErrorInfo != ~0ULL) {
4839 if (ErrorInfo >= Operands.size()) {
4840 return Error(IDLoc, "too few operands for instruction");
4841 }
4842 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4843 if (ErrorLoc == SMLoc())
4844 ErrorLoc = IDLoc;
4845
4847 return Error(ErrorLoc, "invalid VOPDY instruction");
4848 }
4849 return Error(ErrorLoc, "invalid operand for instruction");
4850 }
4851
4852 case Match_PreferE32:
4853 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4854 "should be encoded as e32");
4855 case Match_MnemonicFail:
4856 llvm_unreachable("Invalid instructions should have been handled already");
4857 }
4858 llvm_unreachable("Implement any new match types added!");
4859}
4860
4861bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4862 int64_t Tmp = -1;
4863 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4864 return true;
4865 }
4866 if (getParser().parseAbsoluteExpression(Tmp)) {
4867 return true;
4868 }
4869 Ret = static_cast<uint32_t>(Tmp);
4870 return false;
4871}
4872
4873bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4874 uint32_t &Minor) {
4875 if (ParseAsAbsoluteExpression(Major))
4876 return TokError("invalid major version");
4877
4878 if (!trySkipToken(AsmToken::Comma))
4879 return TokError("minor version number required, comma expected");
4880
4881 if (ParseAsAbsoluteExpression(Minor))
4882 return TokError("invalid minor version");
4883
4884 return false;
4885}
4886
4887bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4888 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4889 return TokError("directive only supported for amdgcn architecture");
4890
4891 std::string TargetIDDirective;
4892 SMLoc TargetStart = getTok().getLoc();
4893 if (getParser().parseEscapedString(TargetIDDirective))
4894 return true;
4895
4896 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4897 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4898 return getParser().Error(TargetRange.Start,
4899 (Twine(".amdgcn_target directive's target id ") +
4900 Twine(TargetIDDirective) +
4901 Twine(" does not match the specified target id ") +
4902 Twine(getTargetStreamer().getTargetID()->toString())).str());
4903
4904 return false;
4905}
4906
4907bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4908 return Error(Range.Start, "value out of range", Range);
4909}
4910
4911bool AMDGPUAsmParser::calculateGPRBlocks(
4912 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4913 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4914 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4915 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4916 // TODO(scott.linder): These calculations are duplicated from
4917 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4918 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4919
4920 unsigned NumVGPRs = NextFreeVGPR;
4921 unsigned NumSGPRs = NextFreeSGPR;
4922
4923 if (Version.Major >= 10)
4924 NumSGPRs = 0;
4925 else {
4926 unsigned MaxAddressableNumSGPRs =
4928
4929 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4930 NumSGPRs > MaxAddressableNumSGPRs)
4931 return OutOfRangeError(SGPRRange);
4932
4933 NumSGPRs +=
4934 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4935
4936 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4937 NumSGPRs > MaxAddressableNumSGPRs)
4938 return OutOfRangeError(SGPRRange);
4939
4940 if (Features.test(FeatureSGPRInitBug))
4942 }
4943
4944 VGPRBlocks =
4945 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4946 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4947
4948 return false;
4949}
4950
4951bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4952 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4953 return TokError("directive only supported for amdgcn architecture");
4954
4955 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4956 return TokError("directive only supported for amdhsa OS");
4957
4958 StringRef KernelName;
4959 if (getParser().parseIdentifier(KernelName))
4960 return true;
4961
4963
4964 StringSet<> Seen;
4965
4966 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4967
4968 SMRange VGPRRange;
4969 uint64_t NextFreeVGPR = 0;
4970 uint64_t AccumOffset = 0;
4971 uint64_t SharedVGPRCount = 0;
4972 SMRange SGPRRange;
4973 uint64_t NextFreeSGPR = 0;
4974
4975 // Count the number of user SGPRs implied from the enabled feature bits.
4976 unsigned ImpliedUserSGPRCount = 0;
4977
4978 // Track if the asm explicitly contains the directive for the user SGPR
4979 // count.
4980 std::optional<unsigned> ExplicitUserSGPRCount;
4981 bool ReserveVCC = true;
4982 bool ReserveFlatScr = true;
4983 std::optional<bool> EnableWavefrontSize32;
4984
4985 while (true) {
4986 while (trySkipToken(AsmToken::EndOfStatement));
4987
4988 StringRef ID;
4989 SMRange IDRange = getTok().getLocRange();
4990 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4991 return true;
4992
4993 if (ID == ".end_amdhsa_kernel")
4994 break;
4995
4996 if (!Seen.insert(ID).second)
4997 return TokError(".amdhsa_ directives cannot be repeated");
4998
4999 SMLoc ValStart = getLoc();
5000 int64_t IVal;
5001 if (getParser().parseAbsoluteExpression(IVal))
5002 return true;
5003 SMLoc ValEnd = getLoc();
5004 SMRange ValRange = SMRange(ValStart, ValEnd);
5005
5006 if (IVal < 0)
5007 return OutOfRangeError(ValRange);
5008
5009 uint64_t Val = IVal;
5010
5011#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5012 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5013 return OutOfRangeError(RANGE); \
5014 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5015
5016 if (ID == ".amdhsa_group_segment_fixed_size") {
5017 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5018 return OutOfRangeError(ValRange);
5019 KD.group_segment_fixed_size = Val;
5020 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5021 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5022 return OutOfRangeError(ValRange);
5024 } else if (ID == ".amdhsa_kernarg_size") {
5025 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5026 return OutOfRangeError(ValRange);
5027 KD.kernarg_size = Val;
5028 } else if (ID == ".amdhsa_user_sgpr_count") {
5029 ExplicitUserSGPRCount = Val;
5030 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5032 return Error(IDRange.Start,
5033 "directive is not supported with architected flat scratch",
5034 IDRange);
5036 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5037 Val, ValRange);
5038 if (Val)
5039 ImpliedUserSGPRCount += 4;
5040 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5042 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5043 ValRange);
5044 if (Val)
5045 ImpliedUserSGPRCount += 2;
5046 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5048 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5049 ValRange);
5050 if (Val)
5051 ImpliedUserSGPRCount += 2;
5052 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5054 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5055 Val, ValRange);
5056 if (Val)
5057 ImpliedUserSGPRCount += 2;
5058 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5060 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5061 ValRange);
5062 if (Val)
5063 ImpliedUserSGPRCount += 2;
5064 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5066 return Error(IDRange.Start,
5067 "directive is not supported with architected flat scratch",
5068 IDRange);
5070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5071 ValRange);
5072 if (Val)
5073 ImpliedUserSGPRCount += 2;
5074 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5077 Val, ValRange);
5078 if (Val)
5079 ImpliedUserSGPRCount += 1;
5080 } else if (ID == ".amdhsa_wavefront_size32") {
5081 if (IVersion.Major < 10)
5082 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5083 EnableWavefrontSize32 = Val;
5085 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5086 Val, ValRange);
5087 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5089 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5090 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5092 return Error(IDRange.Start,
5093 "directive is not supported with architected flat scratch",
5094 IDRange);
5096 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5097 } else if (ID == ".amdhsa_enable_private_segment") {
5099 return Error(
5100 IDRange.Start,
5101 "directive is not supported without architected flat scratch",
5102 IDRange);
5104 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5105 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5107 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5108 ValRange);
5109 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5111 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5112 ValRange);
5113 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5115 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5116 ValRange);
5117 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5119 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5120 ValRange);
5121 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5123 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5124 ValRange);
5125 } else if (ID == ".amdhsa_next_free_vgpr") {
5126 VGPRRange = ValRange;
5127 NextFreeVGPR = Val;
5128 } else if (ID == ".amdhsa_next_free_sgpr") {
5129 SGPRRange = ValRange;
5130 NextFreeSGPR = Val;
5131 } else if (ID == ".amdhsa_accum_offset") {
5132 if (!isGFX90A())
5133 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5134 AccumOffset = Val;
5135 } else if (ID == ".amdhsa_reserve_vcc") {
5136 if (!isUInt<1>(Val))
5137 return OutOfRangeError(ValRange);
5138 ReserveVCC = Val;
5139 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5140 if (IVersion.Major < 7)
5141 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5143 return Error(IDRange.Start,
5144 "directive is not supported with architected flat scratch",
5145 IDRange);
5146 if (!isUInt<1>(Val))
5147 return OutOfRangeError(ValRange);
5148 ReserveFlatScr = Val;
5149 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5150 if (IVersion.Major < 8)
5151 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5152 if (!isUInt<1>(Val))
5153 return OutOfRangeError(ValRange);
5154 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5155 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5156 IDRange);
5157 } else if (ID == ".amdhsa_float_round_mode_32") {
5159 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5160 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5162 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5163 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5165 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5166 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5168 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5169 ValRange);
5170 } else if (ID == ".amdhsa_dx10_clamp") {
5172 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5173 } else if (ID == ".amdhsa_ieee_mode") {
5174 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5175 Val, ValRange);
5176 } else if (ID == ".amdhsa_fp16_overflow") {
5177 if (IVersion.Major < 9)
5178 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5179 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5180 ValRange);
5181 } else if (ID == ".amdhsa_tg_split") {
5182 if (!isGFX90A())
5183 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5184 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5185 ValRange);
5186 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5187 if (IVersion.Major < 10)
5188 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5189 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5190 ValRange);
5191 } else if (ID == ".amdhsa_memory_ordered") {
5192 if (IVersion.Major < 10)
5193 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5194 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5195 ValRange);
5196 } else if (ID == ".amdhsa_forward_progress") {
5197 if (IVersion.Major < 10)
5198 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5199 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5200 ValRange);
5201 } else if (ID == ".amdhsa_shared_vgpr_count") {
5202 if (IVersion.Major < 10)
5203 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5204 SharedVGPRCount = Val;
5206 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5207 ValRange);
5208 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5211 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5212 ValRange);
5213 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5215 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5216 Val, ValRange);
5217 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5220 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5221 ValRange);
5222 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5224 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5225 Val, ValRange);
5226 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5228 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5229 Val, ValRange);
5230 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5232 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5233 Val, ValRange);
5234 } else if (ID == ".amdhsa_exception_int_div_zero") {
5236 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5237 Val, ValRange);
5238 } else {
5239 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5240 }
5241
5242#undef PARSE_BITS_ENTRY
5243 }
5244
5245 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5246 return TokError(".amdhsa_next_free_vgpr directive is required");
5247
5248 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5249 return TokError(".amdhsa_next_free_sgpr directive is required");
5250
5251 unsigned VGPRBlocks;
5252 unsigned SGPRBlocks;
5253 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5254 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5255 EnableWavefrontSize32, NextFreeVGPR,
5256 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5257 SGPRBlocks))
5258 return true;
5259
5260 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5261 VGPRBlocks))
5262 return OutOfRangeError(VGPRRange);
5264 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5265
5266 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5267 SGPRBlocks))
5268 return OutOfRangeError(SGPRRange);
5270 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5271 SGPRBlocks);
5272
5273 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5274 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5275 "enabled user SGPRs");
5276
5277 unsigned UserSGPRCount =
5278 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5279
5280 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5281 return TokError("too many user SGPRs enabled");
5282 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5283 UserSGPRCount);
5284
5285 if (isGFX90A()) {
5286 if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5287 return TokError(".amdhsa_accum_offset directive is required");
5288 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5289 return TokError("accum_offset should be in range [4..256] in "
5290 "increments of 4");
5291 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5292 return TokError("accum_offset exceeds total VGPR allocation");
5293 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5294 (AccumOffset / 4 - 1));
5295 }
5296
5297 if (IVersion.Major == 10) {
5298 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5299 if (SharedVGPRCount && EnableWavefrontSize32) {
5300 return TokError("shared_vgpr_count directive not valid on "
5301 "wavefront size 32");
5302 }
5303 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5304 return TokError("shared_vgpr_count*2 + "
5305 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5306 "exceed 63\n");
5307 }
5308 }
5309
5310 getTargetStreamer().EmitAmdhsaKernelDescriptor(
5311 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5312 ReserveFlatScr);
5313 return false;
5314}