LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 } else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
106 } else {
107 return 0;
108 }
109 }
110
111 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
112 };
113
114 enum ImmTy {
115 ImmTyNone,
116 ImmTyGDS,
117 ImmTyLDS,
118 ImmTyOffen,
119 ImmTyIdxen,
120 ImmTyAddr64,
121 ImmTyOffset,
122 ImmTyInstOffset,
123 ImmTyOffset0,
124 ImmTyOffset1,
125 ImmTySMEMOffsetMod,
126 ImmTyCPol,
127 ImmTyTFE,
128 ImmTyD16,
129 ImmTyClampSI,
130 ImmTyOModSI,
131 ImmTySDWADstSel,
132 ImmTySDWASrc0Sel,
133 ImmTySDWASrc1Sel,
134 ImmTySDWADstUnused,
135 ImmTyDMask,
136 ImmTyDim,
137 ImmTyUNorm,
138 ImmTyDA,
139 ImmTyR128A16,
140 ImmTyA16,
141 ImmTyLWE,
142 ImmTyExpTgt,
143 ImmTyExpCompr,
144 ImmTyExpVM,
145 ImmTyFORMAT,
146 ImmTyHwreg,
147 ImmTyOff,
148 ImmTySendMsg,
149 ImmTyInterpSlot,
150 ImmTyInterpAttr,
151 ImmTyInterpAttrChan,
152 ImmTyOpSel,
153 ImmTyOpSelHi,
154 ImmTyNegLo,
155 ImmTyNegHi,
156 ImmTyIndexKey8bit,
157 ImmTyIndexKey16bit,
158 ImmTyDPP8,
159 ImmTyDppCtrl,
160 ImmTyDppRowMask,
161 ImmTyDppBankMask,
162 ImmTyDppBoundCtrl,
163 ImmTyDppFI,
164 ImmTySwizzle,
165 ImmTyGprIdxMode,
166 ImmTyHigh,
167 ImmTyBLGP,
168 ImmTyCBSZ,
169 ImmTyABID,
170 ImmTyEndpgm,
171 ImmTyWaitVDST,
172 ImmTyWaitEXP,
173 ImmTyWaitVAVDst,
174 ImmTyWaitVMVSrc,
175 ImmTyByteSel,
176 };
177
178 // Immediate operand kind.
179 // It helps to identify the location of an offending operand after an error.
180 // Note that regular literals and mandatory literals (KImm) must be handled
181 // differently. When looking for an offending operand, we should usually
182 // ignore mandatory literals because they are part of the instruction and
183 // cannot be changed. Report location of mandatory operands only for VOPD,
184 // when both OpX and OpY have a KImm and there are no other literals.
185 enum ImmKindTy {
186 ImmKindTyNone,
187 ImmKindTyLiteral,
188 ImmKindTyMandatoryLiteral,
189 ImmKindTyConst,
190 };
191
192private:
193 struct TokOp {
194 const char *Data;
195 unsigned Length;
196 };
197
198 struct ImmOp {
199 int64_t Val;
200 ImmTy Type;
201 bool IsFPImm;
202 mutable ImmKindTy Kind;
203 Modifiers Mods;
204 };
205
206 struct RegOp {
207 unsigned RegNo;
208 Modifiers Mods;
209 };
210
211 union {
212 TokOp Tok;
213 ImmOp Imm;
214 RegOp Reg;
215 const MCExpr *Expr;
216 };
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 void setImmKindNone() const {
230 assert(isImm());
231 Imm.Kind = ImmKindTyNone;
232 }
233
234 void setImmKindLiteral() const {
235 assert(isImm());
236 Imm.Kind = ImmKindTyLiteral;
237 }
238
239 void setImmKindMandatoryLiteral() const {
240 assert(isImm());
241 Imm.Kind = ImmKindTyMandatoryLiteral;
242 }
243
244 void setImmKindConst() const {
245 assert(isImm());
246 Imm.Kind = ImmKindTyConst;
247 }
248
249 bool IsImmKindLiteral() const {
250 return isImm() && Imm.Kind == ImmKindTyLiteral;
251 }
252
253 bool IsImmKindMandatoryLiteral() const {
254 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
255 }
256
257 bool isImmKindConst() const {
258 return isImm() && Imm.Kind == ImmKindTyConst;
259 }
260
261 bool isInlinableImm(MVT type) const;
262 bool isLiteralImm(MVT type) const;
263
264 bool isRegKind() const {
265 return Kind == Register;
266 }
267
268 bool isReg() const override {
269 return isRegKind() && !hasModifiers();
270 }
271
272 bool isRegOrInline(unsigned RCID, MVT type) const {
273 return isRegClass(RCID) || isInlinableImm(type);
274 }
275
276 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
277 return isRegOrInline(RCID, type) || isLiteralImm(type);
278 }
279
280 bool isRegOrImmWithInt16InputMods() const {
281 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
282 }
283
284 bool isRegOrImmWithIntT16InputMods() const {
285 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
286 }
287
288 bool isRegOrImmWithInt32InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
290 }
291
292 bool isRegOrInlineImmWithInt16InputMods() const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
294 }
295
296 bool isRegOrInlineImmWithInt32InputMods() const {
297 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
298 }
299
300 bool isRegOrImmWithInt64InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
302 }
303
304 bool isRegOrImmWithFP16InputMods() const {
305 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
306 }
307
308 bool isRegOrImmWithFPT16InputMods() const {
309 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
310 }
311
312 bool isRegOrImmWithFP32InputMods() const {
313 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
314 }
315
316 bool isRegOrImmWithFP64InputMods() const {
317 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
318 }
319
320 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
321 return isRegOrInline(
322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
323 }
324
325 bool isRegOrInlineImmWithFP32InputMods() const {
326 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
327 }
328
329 bool isPackedFP16InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isVRegWithInputMods() const;
358 template <bool IsFake16> bool isT16VRegWithInputMods() const;
359
360 bool isSDWAOperand(MVT type) const;
361 bool isSDWAFP16Operand() const;
362 bool isSDWAFP32Operand() const;
363 bool isSDWAInt16Operand() const;
364 bool isSDWAInt32Operand() const;
365
366 bool isImmTy(ImmTy ImmT) const {
367 return isImm() && Imm.Type == ImmT;
368 }
369
370 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
371
372 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
373
374 bool isImmModifier() const {
375 return isImm() && Imm.Type != ImmTyNone;
376 }
377
378 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
379 bool isDMask() const { return isImmTy(ImmTyDMask); }
380 bool isDim() const { return isImmTy(ImmTyDim); }
381 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
382 bool isOff() const { return isImmTy(ImmTyOff); }
383 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
384 bool isOffen() const { return isImmTy(ImmTyOffen); }
385 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
386 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
387 bool isOffset() const { return isImmTy(ImmTyOffset); }
388 bool isOffset0() const { return isImmTy(ImmTyOffset0); }
389 bool isOffset1() const { return isImmTy(ImmTyOffset1); }
390 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
391 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
392 bool isGDS() const { return isImmTy(ImmTyGDS); }
393 bool isLDS() const { return isImmTy(ImmTyLDS); }
394 bool isCPol() const { return isImmTy(ImmTyCPol); }
395 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
396 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
397 bool isTFE() const { return isImmTy(ImmTyTFE); }
398 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
399 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
400 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
401 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
402 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
403 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
404 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
405 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
406 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
407 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
408 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
409 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
410 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
411 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
412 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
413 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
414 bool isByteSel() const { return isImmTy(ImmTyByteSel); }
415
416 bool isRegOrImm() const {
417 return isReg() || isImm();
418 }
419
420 bool isRegClass(unsigned RCID) const;
421
422 bool isInlineValue() const;
423
424 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
425 return isRegOrInline(RCID, type) && !hasModifiers();
426 }
427
428 bool isSCSrcB16() const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
430 }
431
432 bool isSCSrcV2B16() const {
433 return isSCSrcB16();
434 }
435
436 bool isSCSrc_b32() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
438 }
439
440 bool isSCSrc_b64() const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
442 }
443
444 bool isBoolReg() const;
445
446 bool isSCSrcF16() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
448 }
449
450 bool isSCSrcV2F16() const {
451 return isSCSrcF16();
452 }
453
454 bool isSCSrcF32() const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
456 }
457
458 bool isSCSrcF64() const {
459 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
460 }
461
462 bool isSSrc_b32() const {
463 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
464 }
465
466 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
467
468 bool isSSrcV2B16() const {
469 llvm_unreachable("cannot happen");
470 return isSSrc_b16();
471 }
472
473 bool isSSrc_b64() const {
474 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
475 // See isVSrc64().
476 return isSCSrc_b64() || isLiteralImm(MVT::i64);
477 }
478
479 bool isSSrc_f32() const {
480 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
481 }
482
483 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
484
485 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
486
487 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
488
489 bool isSSrcV2F16() const {
490 llvm_unreachable("cannot happen");
491 return isSSrc_f16();
492 }
493
494 bool isSSrcV2FP32() const {
495 llvm_unreachable("cannot happen");
496 return isSSrc_f32();
497 }
498
499 bool isSCSrcV2FP32() const {
500 llvm_unreachable("cannot happen");
501 return isSCSrcF32();
502 }
503
504 bool isSSrcV2INT32() const {
505 llvm_unreachable("cannot happen");
506 return isSSrc_b32();
507 }
508
509 bool isSCSrcV2INT32() const {
510 llvm_unreachable("cannot happen");
511 return isSCSrc_b32();
512 }
513
514 bool isSSrcOrLds_b32() const {
515 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
516 isLiteralImm(MVT::i32) || isExpr();
517 }
518
519 bool isVCSrc_b32() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
521 }
522
523 bool isVCSrcB64() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
525 }
526
527 bool isVCSrcTB16() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
529 }
530
531 bool isVCSrcTB16_Lo128() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
533 }
534
535 bool isVCSrcFake16B16_Lo128() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
537 }
538
539 bool isVCSrc_b16() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
541 }
542
543 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
544
545 bool isVCSrc_f32() const {
546 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
547 }
548
549 bool isVCSrcF64() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
551 }
552
553 bool isVCSrcTBF16() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
555 }
556
557 bool isVCSrcTF16() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
559 }
560
561 bool isVCSrcTBF16_Lo128() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
563 }
564
565 bool isVCSrcTF16_Lo128() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
567 }
568
569 bool isVCSrcFake16BF16_Lo128() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
571 }
572
573 bool isVCSrcFake16F16_Lo128() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
575 }
576
577 bool isVCSrc_bf16() const {
578 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
579 }
580
581 bool isVCSrc_f16() const {
582 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
583 }
584
585 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
586
587 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
588
589 bool isVSrc_b32() const {
590 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
591 }
592
593 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
594
595 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
596
597 bool isVSrcT_b16_Lo128() const {
598 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
599 }
600
601 bool isVSrcFake16_b16_Lo128() const {
602 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
603 }
604
605 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
606
607 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
608
609 bool isVCSrcV2FP32() const {
610 return isVCSrcF64();
611 }
612
613 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
614
615 bool isVCSrcV2INT32() const {
616 return isVCSrcB64();
617 }
618
619 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
620
621 bool isVSrc_f32() const {
622 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
623 }
624
625 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
626
627 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
628
629 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
630
631 bool isVSrcT_bf16_Lo128() const {
632 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
633 }
634
635 bool isVSrcT_f16_Lo128() const {
636 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
637 }
638
639 bool isVSrcFake16_bf16_Lo128() const {
640 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
641 }
642
643 bool isVSrcFake16_f16_Lo128() const {
644 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
645 }
646
647 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
648
649 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
650
651 bool isVSrc_v2bf16() const {
652 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
653 }
654
655 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
656
657 bool isVISrcB32() const {
658 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
659 }
660
661 bool isVISrcB16() const {
662 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
663 }
664
665 bool isVISrcV2B16() const {
666 return isVISrcB16();
667 }
668
669 bool isVISrcF32() const {
670 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
671 }
672
673 bool isVISrcF16() const {
674 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
675 }
676
677 bool isVISrcV2F16() const {
678 return isVISrcF16() || isVISrcB32();
679 }
680
681 bool isVISrc_64_bf16() const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
683 }
684
685 bool isVISrc_64_f16() const {
686 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
687 }
688
689 bool isVISrc_64_b32() const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
691 }
692
693 bool isVISrc_64B64() const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
695 }
696
697 bool isVISrc_64_f64() const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
699 }
700
701 bool isVISrc_64V2FP32() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
703 }
704
705 bool isVISrc_64V2INT32() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
707 }
708
709 bool isVISrc_256_b32() const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
711 }
712
713 bool isVISrc_256_f32() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
715 }
716
717 bool isVISrc_256B64() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
719 }
720
721 bool isVISrc_256_f64() const {
722 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
723 }
724
725 bool isVISrc_128B16() const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
727 }
728
729 bool isVISrc_128V2B16() const {
730 return isVISrc_128B16();
731 }
732
733 bool isVISrc_128_b32() const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
735 }
736
737 bool isVISrc_128_f32() const {
738 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
739 }
740
741 bool isVISrc_256V2FP32() const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
743 }
744
745 bool isVISrc_256V2INT32() const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
747 }
748
749 bool isVISrc_512_b32() const {
750 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
751 }
752
753 bool isVISrc_512B16() const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
755 }
756
757 bool isVISrc_512V2B16() const {
758 return isVISrc_512B16();
759 }
760
761 bool isVISrc_512_f32() const {
762 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
763 }
764
765 bool isVISrc_512F16() const {
766 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
767 }
768
769 bool isVISrc_512V2F16() const {
770 return isVISrc_512F16() || isVISrc_512_b32();
771 }
772
773 bool isVISrc_1024_b32() const {
774 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
775 }
776
777 bool isVISrc_1024B16() const {
778 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
779 }
780
781 bool isVISrc_1024V2B16() const {
782 return isVISrc_1024B16();
783 }
784
785 bool isVISrc_1024_f32() const {
786 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
787 }
788
789 bool isVISrc_1024F16() const {
790 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
791 }
792
793 bool isVISrc_1024V2F16() const {
794 return isVISrc_1024F16() || isVISrc_1024_b32();
795 }
796
797 bool isAISrcB32() const {
798 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
799 }
800
801 bool isAISrcB16() const {
802 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
803 }
804
805 bool isAISrcV2B16() const {
806 return isAISrcB16();
807 }
808
809 bool isAISrcF32() const {
810 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
811 }
812
813 bool isAISrcF16() const {
814 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
815 }
816
817 bool isAISrcV2F16() const {
818 return isAISrcF16() || isAISrcB32();
819 }
820
821 bool isAISrc_64B64() const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
823 }
824
825 bool isAISrc_64_f64() const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
827 }
828
829 bool isAISrc_128_b32() const {
830 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
831 }
832
833 bool isAISrc_128B16() const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
835 }
836
837 bool isAISrc_128V2B16() const {
838 return isAISrc_128B16();
839 }
840
841 bool isAISrc_128_f32() const {
842 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
843 }
844
845 bool isAISrc_128F16() const {
846 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
847 }
848
849 bool isAISrc_128V2F16() const {
850 return isAISrc_128F16() || isAISrc_128_b32();
851 }
852
853 bool isVISrc_128_bf16() const {
854 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
855 }
856
857 bool isVISrc_128_f16() const {
858 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
859 }
860
861 bool isVISrc_128V2F16() const {
862 return isVISrc_128_f16() || isVISrc_128_b32();
863 }
864
865 bool isAISrc_256B64() const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
867 }
868
869 bool isAISrc_256_f64() const {
870 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
871 }
872
873 bool isAISrc_512_b32() const {
874 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
875 }
876
877 bool isAISrc_512B16() const {
878 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
879 }
880
881 bool isAISrc_512V2B16() const {
882 return isAISrc_512B16();
883 }
884
885 bool isAISrc_512_f32() const {
886 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
887 }
888
889 bool isAISrc_512F16() const {
890 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
891 }
892
893 bool isAISrc_512V2F16() const {
894 return isAISrc_512F16() || isAISrc_512_b32();
895 }
896
897 bool isAISrc_1024_b32() const {
898 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
899 }
900
901 bool isAISrc_1024B16() const {
902 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
903 }
904
905 bool isAISrc_1024V2B16() const {
906 return isAISrc_1024B16();
907 }
908
909 bool isAISrc_1024_f32() const {
910 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
911 }
912
913 bool isAISrc_1024F16() const {
914 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
915 }
916
917 bool isAISrc_1024V2F16() const {
918 return isAISrc_1024F16() || isAISrc_1024_b32();
919 }
920
921 bool isKImmFP32() const {
922 return isLiteralImm(MVT::f32);
923 }
924
925 bool isKImmFP16() const {
926 return isLiteralImm(MVT::f16);
927 }
928
929 bool isMem() const override {
930 return false;
931 }
932
933 bool isExpr() const {
934 return Kind == Expression;
935 }
936
937 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
938
939 bool isSWaitCnt() const;
940 bool isDepCtr() const;
941 bool isSDelayALU() const;
942 bool isHwreg() const;
943 bool isSendMsg() const;
944 bool isSplitBarrier() const;
945 bool isSwizzle() const;
946 bool isSMRDOffset8() const;
947 bool isSMEMOffset() const;
948 bool isSMRDLiteralOffset() const;
949 bool isDPP8() const;
950 bool isDPPCtrl() const;
951 bool isBLGP() const;
952 bool isCBSZ() const;
953 bool isABID() const;
954 bool isGPRIdxMode() const;
955 bool isS16Imm() const;
956 bool isU16Imm() const;
957 bool isEndpgm() const;
958 bool isWaitVDST() const;
959 bool isWaitEXP() const;
960 bool isWaitVAVDst() const;
961 bool isWaitVMVSrc() const;
962
963 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
964 return std::bind(P, *this);
965 }
966
967 StringRef getToken() const {
968 assert(isToken());
969 return StringRef(Tok.Data, Tok.Length);
970 }
971
972 int64_t getImm() const {
973 assert(isImm());
974 return Imm.Val;
975 }
976
977 void setImm(int64_t Val) {
978 assert(isImm());
979 Imm.Val = Val;
980 }
981
982 ImmTy getImmTy() const {
983 assert(isImm());
984 return Imm.Type;
985 }
986
987 MCRegister getReg() const override {
988 assert(isRegKind());
989 return Reg.RegNo;
990 }
991
992 SMLoc getStartLoc() const override {
993 return StartLoc;
994 }
995
996 SMLoc getEndLoc() const override {
997 return EndLoc;
998 }
999
1000 SMRange getLocRange() const {
1001 return SMRange(StartLoc, EndLoc);
1002 }
1003
1004 Modifiers getModifiers() const {
1005 assert(isRegKind() || isImmTy(ImmTyNone));
1006 return isRegKind() ? Reg.Mods : Imm.Mods;
1007 }
1008
1009 void setModifiers(Modifiers Mods) {
1010 assert(isRegKind() || isImmTy(ImmTyNone));
1011 if (isRegKind())
1012 Reg.Mods = Mods;
1013 else
1014 Imm.Mods = Mods;
1015 }
1016
1017 bool hasModifiers() const {
1018 return getModifiers().hasModifiers();
1019 }
1020
1021 bool hasFPModifiers() const {
1022 return getModifiers().hasFPModifiers();
1023 }
1024
1025 bool hasIntModifiers() const {
1026 return getModifiers().hasIntModifiers();
1027 }
1028
1029 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1030
1031 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1032
1033 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1034
1035 void addRegOperands(MCInst &Inst, unsigned N) const;
1036
1037 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1038 if (isRegKind())
1039 addRegOperands(Inst, N);
1040 else
1041 addImmOperands(Inst, N);
1042 }
1043
1044 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1045 Modifiers Mods = getModifiers();
1046 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1047 if (isRegKind()) {
1048 addRegOperands(Inst, N);
1049 } else {
1050 addImmOperands(Inst, N, false);
1051 }
1052 }
1053
1054 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1055 assert(!hasIntModifiers());
1056 addRegOrImmWithInputModsOperands(Inst, N);
1057 }
1058
1059 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1060 assert(!hasFPModifiers());
1061 addRegOrImmWithInputModsOperands(Inst, N);
1062 }
1063
1064 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1065 Modifiers Mods = getModifiers();
1066 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1067 assert(isRegKind());
1068 addRegOperands(Inst, N);
1069 }
1070
1071 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1072 assert(!hasIntModifiers());
1073 addRegWithInputModsOperands(Inst, N);
1074 }
1075
1076 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1077 assert(!hasFPModifiers());
1078 addRegWithInputModsOperands(Inst, N);
1079 }
1080
1081 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1082 // clang-format off
1083 switch (Type) {
1084 case ImmTyNone: OS << "None"; break;
1085 case ImmTyGDS: OS << "GDS"; break;
1086 case ImmTyLDS: OS << "LDS"; break;
1087 case ImmTyOffen: OS << "Offen"; break;
1088 case ImmTyIdxen: OS << "Idxen"; break;
1089 case ImmTyAddr64: OS << "Addr64"; break;
1090 case ImmTyOffset: OS << "Offset"; break;
1091 case ImmTyInstOffset: OS << "InstOffset"; break;
1092 case ImmTyOffset0: OS << "Offset0"; break;
1093 case ImmTyOffset1: OS << "Offset1"; break;
1094 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1095 case ImmTyCPol: OS << "CPol"; break;
1096 case ImmTyIndexKey8bit: OS << "index_key"; break;
1097 case ImmTyIndexKey16bit: OS << "index_key"; break;
1098 case ImmTyTFE: OS << "TFE"; break;
1099 case ImmTyD16: OS << "D16"; break;
1100 case ImmTyFORMAT: OS << "FORMAT"; break;
1101 case ImmTyClampSI: OS << "ClampSI"; break;
1102 case ImmTyOModSI: OS << "OModSI"; break;
1103 case ImmTyDPP8: OS << "DPP8"; break;
1104 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1105 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1106 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1107 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1108 case ImmTyDppFI: OS << "DppFI"; break;
1109 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1110 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1111 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1112 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1113 case ImmTyDMask: OS << "DMask"; break;
1114 case ImmTyDim: OS << "Dim"; break;
1115 case ImmTyUNorm: OS << "UNorm"; break;
1116 case ImmTyDA: OS << "DA"; break;
1117 case ImmTyR128A16: OS << "R128A16"; break;
1118 case ImmTyA16: OS << "A16"; break;
1119 case ImmTyLWE: OS << "LWE"; break;
1120 case ImmTyOff: OS << "Off"; break;
1121 case ImmTyExpTgt: OS << "ExpTgt"; break;
1122 case ImmTyExpCompr: OS << "ExpCompr"; break;
1123 case ImmTyExpVM: OS << "ExpVM"; break;
1124 case ImmTyHwreg: OS << "Hwreg"; break;
1125 case ImmTySendMsg: OS << "SendMsg"; break;
1126 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1127 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1128 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1129 case ImmTyOpSel: OS << "OpSel"; break;
1130 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1131 case ImmTyNegLo: OS << "NegLo"; break;
1132 case ImmTyNegHi: OS << "NegHi"; break;
1133 case ImmTySwizzle: OS << "Swizzle"; break;
1134 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1135 case ImmTyHigh: OS << "High"; break;
1136 case ImmTyBLGP: OS << "BLGP"; break;
1137 case ImmTyCBSZ: OS << "CBSZ"; break;
1138 case ImmTyABID: OS << "ABID"; break;
1139 case ImmTyEndpgm: OS << "Endpgm"; break;
1140 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1141 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1142 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1143 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1144 case ImmTyByteSel: OS << "ByteSel" ; break;
1145 }
1146 // clang-format on
1147 }
1148
1149 void print(raw_ostream &OS) const override {
1150 switch (Kind) {
1151 case Register:
1152 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1153 break;
1154 case Immediate:
1155 OS << '<' << getImm();
1156 if (getImmTy() != ImmTyNone) {
1157 OS << " type: "; printImmTy(OS, getImmTy());
1158 }
1159 OS << " mods: " << Imm.Mods << '>';
1160 break;
1161 case Token:
1162 OS << '\'' << getToken() << '\'';
1163 break;
1164 case Expression:
1165 OS << "<expr " << *Expr << '>';
1166 break;
1167 }
1168 }
1169
1170 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1171 int64_t Val, SMLoc Loc,
1172 ImmTy Type = ImmTyNone,
1173 bool IsFPImm = false) {
1174 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1175 Op->Imm.Val = Val;
1176 Op->Imm.IsFPImm = IsFPImm;
1177 Op->Imm.Kind = ImmKindTyNone;
1178 Op->Imm.Type = Type;
1179 Op->Imm.Mods = Modifiers();
1180 Op->StartLoc = Loc;
1181 Op->EndLoc = Loc;
1182 return Op;
1183 }
1184
1185 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1186 StringRef Str, SMLoc Loc,
1187 bool HasExplicitEncodingSize = true) {
1188 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1189 Res->Tok.Data = Str.data();
1190 Res->Tok.Length = Str.size();
1191 Res->StartLoc = Loc;
1192 Res->EndLoc = Loc;
1193 return Res;
1194 }
1195
1196 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1197 unsigned RegNo, SMLoc S,
1198 SMLoc E) {
1199 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1200 Op->Reg.RegNo = RegNo;
1201 Op->Reg.Mods = Modifiers();
1202 Op->StartLoc = S;
1203 Op->EndLoc = E;
1204 return Op;
1205 }
1206
1207 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1208 const class MCExpr *Expr, SMLoc S) {
1209 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1210 Op->Expr = Expr;
1211 Op->StartLoc = S;
1212 Op->EndLoc = S;
1213 return Op;
1214 }
1215};
1216
1217raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1218 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1219 return OS;
1220}
1221
1222//===----------------------------------------------------------------------===//
1223// AsmParser
1224//===----------------------------------------------------------------------===//
1225
1226// Holds info related to the current kernel, e.g. count of SGPRs used.
1227// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1228// .amdgpu_hsa_kernel or at EOF.
1229class KernelScopeInfo {
1230 int SgprIndexUnusedMin = -1;
1231 int VgprIndexUnusedMin = -1;
1232 int AgprIndexUnusedMin = -1;
1233 MCContext *Ctx = nullptr;
1234 MCSubtargetInfo const *MSTI = nullptr;
1235
1236 void usesSgprAt(int i) {
1237 if (i >= SgprIndexUnusedMin) {
1238 SgprIndexUnusedMin = ++i;
1239 if (Ctx) {
1240 MCSymbol* const Sym =
1241 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1242 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1243 }
1244 }
1245 }
1246
1247 void usesVgprAt(int i) {
1248 if (i >= VgprIndexUnusedMin) {
1249 VgprIndexUnusedMin = ++i;
1250 if (Ctx) {
1251 MCSymbol* const Sym =
1252 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1253 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1254 VgprIndexUnusedMin);
1255 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1256 }
1257 }
1258 }
1259
1260 void usesAgprAt(int i) {
1261 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1262 if (!hasMAIInsts(*MSTI))
1263 return;
1264
1265 if (i >= AgprIndexUnusedMin) {
1266 AgprIndexUnusedMin = ++i;
1267 if (Ctx) {
1268 MCSymbol* const Sym =
1269 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1270 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1271
1272 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1273 MCSymbol* const vSym =
1274 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1275 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1276 VgprIndexUnusedMin);
1277 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1278 }
1279 }
1280 }
1281
1282public:
1283 KernelScopeInfo() = default;
1284
1285 void initialize(MCContext &Context) {
1286 Ctx = &Context;
1287 MSTI = Ctx->getSubtargetInfo();
1288
1289 usesSgprAt(SgprIndexUnusedMin = -1);
1290 usesVgprAt(VgprIndexUnusedMin = -1);
1291 if (hasMAIInsts(*MSTI)) {
1292 usesAgprAt(AgprIndexUnusedMin = -1);
1293 }
1294 }
1295
1296 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1297 unsigned RegWidth) {
1298 switch (RegKind) {
1299 case IS_SGPR:
1300 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1301 break;
1302 case IS_AGPR:
1303 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1304 break;
1305 case IS_VGPR:
1306 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1307 break;
1308 default:
1309 break;
1310 }
1311 }
1312};
1313
1314class AMDGPUAsmParser : public MCTargetAsmParser {
1315 MCAsmParser &Parser;
1316
1317 unsigned ForcedEncodingSize = 0;
1318 bool ForcedDPP = false;
1319 bool ForcedSDWA = false;
1320 KernelScopeInfo KernelScope;
1321
1322 /// @name Auto-generated Match Functions
1323 /// {
1324
1325#define GET_ASSEMBLER_HEADER
1326#include "AMDGPUGenAsmMatcher.inc"
1327
1328 /// }
1329
1330private:
1331 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1332 bool OutOfRangeError(SMRange Range);
1333 /// Calculate VGPR/SGPR blocks required for given target, reserved
1334 /// registers, and user-specified NextFreeXGPR values.
1335 ///
1336 /// \param Features [in] Target features, used for bug corrections.
1337 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1338 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1339 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1340 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1341 /// descriptor field, if valid.
1342 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1343 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1344 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1345 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1346 /// \param VGPRBlocks [out] Result VGPR block count.
1347 /// \param SGPRBlocks [out] Result SGPR block count.
1348 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1349 bool FlatScrUsed, bool XNACKUsed,
1350 std::optional<bool> EnableWavefrontSize32,
1351 unsigned NextFreeVGPR, SMRange VGPRRange,
1352 unsigned NextFreeSGPR, SMRange SGPRRange,
1353 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1354 bool ParseDirectiveAMDGCNTarget();
1355 bool ParseDirectiveAMDHSACodeObjectVersion();
1356 bool ParseDirectiveAMDHSAKernel();
1357 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1358 bool ParseDirectiveAMDKernelCodeT();
1359 // TODO: Possibly make subtargetHasRegister const.
1360 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1361 bool ParseDirectiveAMDGPUHsaKernel();
1362
1363 bool ParseDirectiveISAVersion();
1364 bool ParseDirectiveHSAMetadata();
1365 bool ParseDirectivePALMetadataBegin();
1366 bool ParseDirectivePALMetadata();
1367 bool ParseDirectiveAMDGPULDS();
1368
1369 /// Common code to parse out a block of text (typically YAML) between start and
1370 /// end directives.
1371 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1372 const char *AssemblerDirectiveEnd,
1373 std::string &CollectString);
1374
1375 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1376 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1377 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1378 unsigned &RegNum, unsigned &RegWidth,
1379 bool RestoreOnFailure = false);
1380 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1381 unsigned &RegNum, unsigned &RegWidth,
1383 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1384 unsigned &RegWidth,
1386 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1387 unsigned &RegWidth,
1389 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1390 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1391 bool ParseRegRange(unsigned& Num, unsigned& Width);
1392 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1393 unsigned RegWidth, SMLoc Loc);
1394
1395 bool isRegister();
1396 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1397 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1398 void initializeGprCountSymbol(RegisterKind RegKind);
1399 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1400 unsigned RegWidth);
1401 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1402 bool IsAtomic);
1403
1404public:
1405 enum AMDGPUMatchResultTy {
1406 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1407 };
1408 enum OperandMode {
1409 OperandMode_Default,
1410 OperandMode_NSA,
1411 };
1412
1413 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1414
1415 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1416 const MCInstrInfo &MII,
1417 const MCTargetOptions &Options)
1418 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1420
1421 if (getFeatureBits().none()) {
1422 // Set default features.
1423 copySTI().ToggleFeature("southern-islands");
1424 }
1425
1426 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1427
1428 {
1429 // TODO: make those pre-defined variables read-only.
1430 // Currently there is none suitable machinery in the core llvm-mc for this.
1431 // MCSymbol::isRedefinable is intended for another purpose, and
1432 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1434 MCContext &Ctx = getContext();
1435 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1436 MCSymbol *Sym =
1437 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1438 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1439 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1440 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1441 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1442 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1443 } else {
1444 MCSymbol *Sym =
1445 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1446 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1447 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1448 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1449 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1450 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1451 }
1452 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1453 initializeGprCountSymbol(IS_VGPR);
1454 initializeGprCountSymbol(IS_SGPR);
1455 } else
1456 KernelScope.initialize(getContext());
1457 }
1458 }
1459
1460 bool hasMIMG_R128() const {
1461 return AMDGPU::hasMIMG_R128(getSTI());
1462 }
1463
1464 bool hasPackedD16() const {
1465 return AMDGPU::hasPackedD16(getSTI());
1466 }
1467
1468 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1469
1470 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1471
1472 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1473
1474 bool isSI() const {
1475 return AMDGPU::isSI(getSTI());
1476 }
1477
1478 bool isCI() const {
1479 return AMDGPU::isCI(getSTI());
1480 }
1481
1482 bool isVI() const {
1483 return AMDGPU::isVI(getSTI());
1484 }
1485
1486 bool isGFX9() const {
1487 return AMDGPU::isGFX9(getSTI());
1488 }
1489
1490 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1491 bool isGFX90A() const {
1492 return AMDGPU::isGFX90A(getSTI());
1493 }
1494
1495 bool isGFX940() const {
1496 return AMDGPU::isGFX940(getSTI());
1497 }
1498
1499 bool isGFX9Plus() const {
1500 return AMDGPU::isGFX9Plus(getSTI());
1501 }
1502
1503 bool isGFX10() const {
1504 return AMDGPU::isGFX10(getSTI());
1505 }
1506
1507 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1508
1509 bool isGFX11() const {
1510 return AMDGPU::isGFX11(getSTI());
1511 }
1512
1513 bool isGFX11Plus() const {
1514 return AMDGPU::isGFX11Plus(getSTI());
1515 }
1516
1517 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1518
1519 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1520
1521 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1522
1523 bool isGFX10_BEncoding() const {
1525 }
1526
1527 bool hasInv2PiInlineImm() const {
1528 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1529 }
1530
1531 bool hasFlatOffsets() const {
1532 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1533 }
1534
1535 bool hasArchitectedFlatScratch() const {
1536 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1537 }
1538
1539 bool hasSGPR102_SGPR103() const {
1540 return !isVI() && !isGFX9();
1541 }
1542
1543 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1544
1545 bool hasIntClamp() const {
1546 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1547 }
1548
1549 bool hasPartialNSAEncoding() const {
1550 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1551 }
1552
1553 unsigned getNSAMaxSize(bool HasSampler = false) const {
1554 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1555 }
1556
1557 unsigned getMaxNumUserSGPRs() const {
1559 }
1560
1561 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1562
1563 AMDGPUTargetStreamer &getTargetStreamer() {
1565 return static_cast<AMDGPUTargetStreamer &>(TS);
1566 }
1567
1568 const MCRegisterInfo *getMRI() const {
1569 // We need this const_cast because for some reason getContext() is not const
1570 // in MCAsmParser.
1571 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1572 }
1573
1574 const MCInstrInfo *getMII() const {
1575 return &MII;
1576 }
1577
1578 const FeatureBitset &getFeatureBits() const {
1579 return getSTI().getFeatureBits();
1580 }
1581
1582 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1583 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1584 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1585
1586 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1587 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1588 bool isForcedDPP() const { return ForcedDPP; }
1589 bool isForcedSDWA() const { return ForcedSDWA; }
1590 ArrayRef<unsigned> getMatchedVariants() const;
1591 StringRef getMatchedVariantName() const;
1592
1593 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1594 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1595 bool RestoreOnFailure);
1596 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1598 SMLoc &EndLoc) override;
1599 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1601 unsigned Kind) override;
1602 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1605 bool MatchingInlineAsm) override;
1606 bool ParseDirective(AsmToken DirectiveID) override;
1607 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1608 OperandMode Mode = OperandMode_Default);
1609 StringRef parseMnemonicSuffix(StringRef Name);
1611 SMLoc NameLoc, OperandVector &Operands) override;
1612 //bool ProcessInstruction(MCInst &Inst);
1613
1615
1616 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1617
1619 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1620 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1621 std::function<bool(int64_t &)> ConvertResult = nullptr);
1622
1623 ParseStatus parseOperandArrayWithPrefix(
1624 const char *Prefix, OperandVector &Operands,
1625 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1626 bool (*ConvertResult)(int64_t &) = nullptr);
1627
1629 parseNamedBit(StringRef Name, OperandVector &Operands,
1630 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1631 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1633 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1634 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1635 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1636 SMLoc &StringLoc);
1637
1638 bool isModifier();
1639 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1640 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1641 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1642 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1643 bool parseSP3NegModifier();
1644 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1645 bool HasLit = false);
1647 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1648 bool HasLit = false);
1649 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1650 bool AllowImm = true);
1651 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1652 bool AllowImm = true);
1653 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1654 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1655 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1656 ParseStatus tryParseIndexKey(OperandVector &Operands,
1657 AMDGPUOperand::ImmTy ImmTy);
1658 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1659 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1660
1661 ParseStatus parseDfmtNfmt(int64_t &Format);
1662 ParseStatus parseUfmt(int64_t &Format);
1663 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1664 int64_t &Format);
1665 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1666 int64_t &Format);
1667 ParseStatus parseFORMAT(OperandVector &Operands);
1668 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1669 ParseStatus parseNumericFormat(int64_t &Format);
1670 ParseStatus parseFlatOffset(OperandVector &Operands);
1671 ParseStatus parseR128A16(OperandVector &Operands);
1673 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1674 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1675
1676 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1677
1678 bool parseCnt(int64_t &IntVal);
1679 ParseStatus parseSWaitCnt(OperandVector &Operands);
1680
1681 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1682 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1683 ParseStatus parseDepCtr(OperandVector &Operands);
1684
1685 bool parseDelay(int64_t &Delay);
1686 ParseStatus parseSDelayALU(OperandVector &Operands);
1687
1688 ParseStatus parseHwreg(OperandVector &Operands);
1689
1690private:
1691 struct OperandInfoTy {
1692 SMLoc Loc;
1693 int64_t Val;
1694 bool IsSymbolic = false;
1695 bool IsDefined = false;
1696
1697 OperandInfoTy(int64_t Val) : Val(Val) {}
1698 };
1699
1700 struct StructuredOpField : OperandInfoTy {
1703 unsigned Width;
1704 bool IsDefined = false;
1705
1706 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1707 int64_t Default)
1708 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1709 virtual ~StructuredOpField() = default;
1710
1711 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1712 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1713 return false;
1714 }
1715
1716 virtual bool validate(AMDGPUAsmParser &Parser) const {
1717 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1718 return Error(Parser, "not supported on this GPU");
1719 if (!isUIntN(Width, Val))
1720 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1721 return true;
1722 }
1723 };
1724
1725 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1726 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1727
1728 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1729 bool validateSendMsg(const OperandInfoTy &Msg,
1730 const OperandInfoTy &Op,
1731 const OperandInfoTy &Stream);
1732
1733 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1734 OperandInfoTy &Width);
1735
1736 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1737 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1738 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1739
1740 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1741 const OperandVector &Operands) const;
1742 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1743 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1744 SMLoc getLitLoc(const OperandVector &Operands,
1745 bool SearchMandatoryLiterals = false) const;
1746 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1747 SMLoc getConstLoc(const OperandVector &Operands) const;
1748 SMLoc getInstLoc(const OperandVector &Operands) const;
1749
1750 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1751 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1752 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1753 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1754 bool validateSOPLiteral(const MCInst &Inst) const;
1755 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1757 const OperandVector &Operands);
1758 bool validateIntClampSupported(const MCInst &Inst);
1759 bool validateMIMGAtomicDMask(const MCInst &Inst);
1760 bool validateMIMGGatherDMask(const MCInst &Inst);
1761 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1762 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1763 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1764 bool validateMIMGD16(const MCInst &Inst);
1765 bool validateMIMGMSAA(const MCInst &Inst);
1766 bool validateOpSel(const MCInst &Inst);
1767 bool validateNeg(const MCInst &Inst, int OpName);
1768 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1769 bool validateVccOperand(unsigned Reg) const;
1770 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1771 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1772 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1773 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateAGPRLdSt(const MCInst &Inst) const;
1775 bool validateVGPRAlign(const MCInst &Inst) const;
1776 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1777 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1778 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1779 bool validateDivScale(const MCInst &Inst);
1780 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1781 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1782 const SMLoc &IDLoc);
1783 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1784 const unsigned CPol);
1785 bool validateExeczVcczOperands(const OperandVector &Operands);
1786 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1787 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1788 unsigned getConstantBusLimit(unsigned Opcode) const;
1789 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1790 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1791 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1792
1793 bool isSupportedMnemo(StringRef Mnemo,
1794 const FeatureBitset &FBS);
1795 bool isSupportedMnemo(StringRef Mnemo,
1796 const FeatureBitset &FBS,
1797 ArrayRef<unsigned> Variants);
1798 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1799
1800 bool isId(const StringRef Id) const;
1801 bool isId(const AsmToken &Token, const StringRef Id) const;
1802 bool isToken(const AsmToken::TokenKind Kind) const;
1803 StringRef getId() const;
1804 bool trySkipId(const StringRef Id);
1805 bool trySkipId(const StringRef Pref, const StringRef Id);
1806 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1807 bool trySkipToken(const AsmToken::TokenKind Kind);
1808 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1809 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1810 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1811
1812 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1813 AsmToken::TokenKind getTokenKind() const;
1814 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1815 bool parseExpr(OperandVector &Operands);
1816 StringRef getTokenStr() const;
1817 AsmToken peekToken(bool ShouldSkipSpace = true);
1818 AsmToken getToken() const;
1819 SMLoc getLoc() const;
1820 void lex();
1821
1822public:
1823 void onBeginOfFile() override;
1824 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1825
1826 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1827
1828 ParseStatus parseExpTgt(OperandVector &Operands);
1829 ParseStatus parseSendMsg(OperandVector &Operands);
1830 ParseStatus parseInterpSlot(OperandVector &Operands);
1831 ParseStatus parseInterpAttr(OperandVector &Operands);
1832 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1833 ParseStatus parseBoolReg(OperandVector &Operands);
1834
1835 bool parseSwizzleOperand(int64_t &Op,
1836 const unsigned MinVal,
1837 const unsigned MaxVal,
1838 const StringRef ErrMsg,
1839 SMLoc &Loc);
1840 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1841 const unsigned MinVal,
1842 const unsigned MaxVal,
1843 const StringRef ErrMsg);
1844 ParseStatus parseSwizzle(OperandVector &Operands);
1845 bool parseSwizzleOffset(int64_t &Imm);
1846 bool parseSwizzleMacro(int64_t &Imm);
1847 bool parseSwizzleQuadPerm(int64_t &Imm);
1848 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1849 bool parseSwizzleBroadcast(int64_t &Imm);
1850 bool parseSwizzleSwap(int64_t &Imm);
1851 bool parseSwizzleReverse(int64_t &Imm);
1852
1853 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1854 int64_t parseGPRIdxMacro();
1855
1856 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1857 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1858
1859 ParseStatus parseOModSI(OperandVector &Operands);
1860
1861 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1862 OptionalImmIndexMap &OptionalIdx);
1863 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1864 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1865 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1866 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1867
1868 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1869 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1870 OptionalImmIndexMap &OptionalIdx);
1871 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1872 OptionalImmIndexMap &OptionalIdx);
1873
1874 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1875 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1876
1877 bool parseDimId(unsigned &Encoding);
1879 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1881 ParseStatus parseDPPCtrl(OperandVector &Operands);
1882 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1883 int64_t parseDPPCtrlSel(StringRef Ctrl);
1884 int64_t parseDPPCtrlPerm();
1885 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1886 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1887 cvtDPP(Inst, Operands, true);
1888 }
1889 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1890 bool IsDPP8 = false);
1891 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1892 cvtVOP3DPP(Inst, Operands, true);
1893 }
1894
1895 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1896 AMDGPUOperand::ImmTy Type);
1897 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1898 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1899 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1900 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1901 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1902 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1903 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1904 uint64_t BasicInstType,
1905 bool SkipDstVcc = false,
1906 bool SkipSrcVcc = false);
1907
1908 ParseStatus parseEndpgm(OperandVector &Operands);
1909
1911};
1912
1913} // end anonymous namespace
1914
1915// May be called with integer type with equivalent bitwidth.
1916static const fltSemantics *getFltSemantics(unsigned Size) {
1917 switch (Size) {
1918 case 4:
1919 return &APFloat::IEEEsingle();
1920 case 8:
1921 return &APFloat::IEEEdouble();
1922 case 2:
1923 return &APFloat::IEEEhalf();
1924 default:
1925 llvm_unreachable("unsupported fp type");
1926 }
1927}
1928
1930 return getFltSemantics(VT.getSizeInBits() / 8);
1931}
1932
1934 switch (OperandType) {
1935 // When floating-point immediate is used as operand of type i16, the 32-bit
1936 // representation of the constant truncated to the 16 LSBs should be used.
1956 return &APFloat::IEEEsingle();
1962 return &APFloat::IEEEdouble();
1971 return &APFloat::IEEEhalf();
1979 return &APFloat::BFloat();
1980 default:
1981 llvm_unreachable("unsupported fp type");
1982 }
1983}
1984
1985//===----------------------------------------------------------------------===//
1986// Operand
1987//===----------------------------------------------------------------------===//
1988
1989static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1990 bool Lost;
1991
1992 // Convert literal to single precision
1994 APFloat::rmNearestTiesToEven,
1995 &Lost);
1996 // We allow precision lost but not overflow or underflow
1997 if (Status != APFloat::opOK &&
1998 Lost &&
1999 ((Status & APFloat::opOverflow) != 0 ||
2000 (Status & APFloat::opUnderflow) != 0)) {
2001 return false;
2002 }
2003
2004 return true;
2005}
2006
2007static bool isSafeTruncation(int64_t Val, unsigned Size) {
2008 return isUIntN(Size, Val) || isIntN(Size, Val);
2009}
2010
2011static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2012 if (VT.getScalarType() == MVT::i16)
2013 return isInlinableLiteral32(Val, HasInv2Pi);
2014
2015 if (VT.getScalarType() == MVT::f16)
2016 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2017
2018 assert(VT.getScalarType() == MVT::bf16);
2019
2020 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2021}
2022
2023bool AMDGPUOperand::isInlinableImm(MVT type) const {
2024
2025 // This is a hack to enable named inline values like
2026 // shared_base with both 32-bit and 64-bit operands.
2027 // Note that these values are defined as
2028 // 32-bit operands only.
2029 if (isInlineValue()) {
2030 return true;
2031 }
2032
2033 if (!isImmTy(ImmTyNone)) {
2034 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2035 return false;
2036 }
2037 // TODO: We should avoid using host float here. It would be better to
2038 // check the float bit values which is what a few other places do.
2039 // We've had bot failures before due to weird NaN support on mips hosts.
2040
2041 APInt Literal(64, Imm.Val);
2042
2043 if (Imm.IsFPImm) { // We got fp literal token
2044 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2046 AsmParser->hasInv2PiInlineImm());
2047 }
2048
2049 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2050 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2051 return false;
2052
2053 if (type.getScalarSizeInBits() == 16) {
2054 bool Lost = false;
2055 switch (type.getScalarType().SimpleTy) {
2056 default:
2057 llvm_unreachable("unknown 16-bit type");
2058 case MVT::bf16:
2059 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2060 &Lost);
2061 break;
2062 case MVT::f16:
2063 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2064 &Lost);
2065 break;
2066 case MVT::i16:
2067 FPLiteral.convert(APFloatBase::IEEEsingle(),
2068 APFloat::rmNearestTiesToEven, &Lost);
2069 break;
2070 }
2071 // We need to use 32-bit representation here because when a floating-point
2072 // inline constant is used as an i16 operand, its 32-bit representation
2073 // representation will be used. We will need the 32-bit value to check if
2074 // it is FP inline constant.
2075 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2076 return isInlineableLiteralOp16(ImmVal, type,
2077 AsmParser->hasInv2PiInlineImm());
2078 }
2079
2080 // Check if single precision literal is inlinable
2082 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2083 AsmParser->hasInv2PiInlineImm());
2084 }
2085
2086 // We got int literal token.
2087 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2089 AsmParser->hasInv2PiInlineImm());
2090 }
2091
2092 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2093 return false;
2094 }
2095
2096 if (type.getScalarSizeInBits() == 16) {
2098 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2099 type, AsmParser->hasInv2PiInlineImm());
2100 }
2101
2103 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2104 AsmParser->hasInv2PiInlineImm());
2105}
2106
2107bool AMDGPUOperand::isLiteralImm(MVT type) const {
2108 // Check that this immediate can be added as literal
2109 if (!isImmTy(ImmTyNone)) {
2110 return false;
2111 }
2112
2113 if (!Imm.IsFPImm) {
2114 // We got int literal token.
2115
2116 if (type == MVT::f64 && hasFPModifiers()) {
2117 // Cannot apply fp modifiers to int literals preserving the same semantics
2118 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2119 // disable these cases.
2120 return false;
2121 }
2122
2123 unsigned Size = type.getSizeInBits();
2124 if (Size == 64)
2125 Size = 32;
2126
2127 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2128 // types.
2129 return isSafeTruncation(Imm.Val, Size);
2130 }
2131
2132 // We got fp literal token
2133 if (type == MVT::f64) { // Expected 64-bit fp operand
2134 // We would set low 64-bits of literal to zeroes but we accept this literals
2135 return true;
2136 }
2137
2138 if (type == MVT::i64) { // Expected 64-bit int operand
2139 // We don't allow fp literals in 64-bit integer instructions. It is
2140 // unclear how we should encode them.
2141 return false;
2142 }
2143
2144 // We allow fp literals with f16x2 operands assuming that the specified
2145 // literal goes into the lower half and the upper half is zero. We also
2146 // require that the literal may be losslessly converted to f16.
2147 //
2148 // For i16x2 operands, we assume that the specified literal is encoded as a
2149 // single-precision float. This is pretty odd, but it matches SP3 and what
2150 // happens in hardware.
2151 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2152 : (type == MVT::v2i16) ? MVT::f32
2153 : (type == MVT::v2f32) ? MVT::f32
2154 : type;
2155
2156 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2157 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2158}
2159
2160bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2161 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2162}
2163
2164bool AMDGPUOperand::isVRegWithInputMods() const {
2165 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2166 // GFX90A allows DPP on 64-bit operands.
2167 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2168 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2169}
2170
2171template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2172 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2173 : AMDGPU::VGPR_16_Lo128RegClassID);
2174}
2175
2176bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2177 if (AsmParser->isVI())
2178 return isVReg32();
2179 else if (AsmParser->isGFX9Plus())
2180 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2181 else
2182 return false;
2183}
2184
2185bool AMDGPUOperand::isSDWAFP16Operand() const {
2186 return isSDWAOperand(MVT::f16);
2187}
2188
2189bool AMDGPUOperand::isSDWAFP32Operand() const {
2190 return isSDWAOperand(MVT::f32);
2191}
2192
2193bool AMDGPUOperand::isSDWAInt16Operand() const {
2194 return isSDWAOperand(MVT::i16);
2195}
2196
2197bool AMDGPUOperand::isSDWAInt32Operand() const {
2198 return isSDWAOperand(MVT::i32);
2199}
2200
2201bool AMDGPUOperand::isBoolReg() const {
2202 auto FB = AsmParser->getFeatureBits();
2203 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2204 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2205}
2206
2207uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2208{
2209 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2210 assert(Size == 2 || Size == 4 || Size == 8);
2211
2212 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2213
2214 if (Imm.Mods.Abs) {
2215 Val &= ~FpSignMask;
2216 }
2217 if (Imm.Mods.Neg) {
2218 Val ^= FpSignMask;
2219 }
2220
2221 return Val;
2222}
2223
2224void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2225 if (isExpr()) {
2227 return;
2228 }
2229
2230 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2231 Inst.getNumOperands())) {
2232 addLiteralImmOperand(Inst, Imm.Val,
2233 ApplyModifiers &
2234 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2235 } else {
2236 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2238 setImmKindNone();
2239 }
2240}
2241
2242void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2243 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2244 auto OpNum = Inst.getNumOperands();
2245 // Check that this operand accepts literals
2246 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2247
2248 if (ApplyModifiers) {
2249 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2250 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2251 Val = applyInputFPModifiers(Val, Size);
2252 }
2253
2254 APInt Literal(64, Val);
2255 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2256
2257 if (Imm.IsFPImm) { // We got fp literal token
2258 switch (OpTy) {
2264 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2265 AsmParser->hasInv2PiInlineImm())) {
2266 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2267 setImmKindConst();
2268 return;
2269 }
2270
2271 // Non-inlineable
2272 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2273 // For fp operands we check if low 32 bits are zeros
2274 if (Literal.getLoBits(32) != 0) {
2275 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2276 "Can't encode literal as exact 64-bit floating-point operand. "
2277 "Low 32-bits will be set to zero");
2278 Val &= 0xffffffff00000000u;
2279 }
2280
2282 setImmKindLiteral();
2283 return;
2284 }
2285
2286 // We don't allow fp literals in 64-bit integer instructions. It is
2287 // unclear how we should encode them. This case should be checked earlier
2288 // in predicate methods (isLiteralImm())
2289 llvm_unreachable("fp literal in 64-bit integer instruction.");
2290
2298 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2299 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2300 // loss of precision. The constant represents ideomatic fp32 value of
2301 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2302 // bits. Prevent rounding below.
2303 Inst.addOperand(MCOperand::createImm(0x3e22));
2304 setImmKindLiteral();
2305 return;
2306 }
2307 [[fallthrough]];
2308
2336 bool lost;
2337 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2338 // Convert literal to single precision
2339 FPLiteral.convert(*getOpFltSemantics(OpTy),
2340 APFloat::rmNearestTiesToEven, &lost);
2341 // We allow precision lost but not overflow or underflow. This should be
2342 // checked earlier in isLiteralImm()
2343
2344 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2345 Inst.addOperand(MCOperand::createImm(ImmVal));
2346 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2347 setImmKindMandatoryLiteral();
2348 } else {
2349 setImmKindLiteral();
2350 }
2351 return;
2352 }
2353 default:
2354 llvm_unreachable("invalid operand size");
2355 }
2356
2357 return;
2358 }
2359
2360 // We got int literal token.
2361 // Only sign extend inline immediates.
2362 switch (OpTy) {
2378 if (isSafeTruncation(Val, 32) &&
2379 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2380 AsmParser->hasInv2PiInlineImm())) {
2382 setImmKindConst();
2383 return;
2384 }
2385
2386 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2387 setImmKindLiteral();
2388 return;
2389
2395 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2397 setImmKindConst();
2398 return;
2399 }
2400
2401 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2402 : Lo_32(Val);
2403
2405 setImmKindLiteral();
2406 return;
2407
2411 if (isSafeTruncation(Val, 16) &&
2412 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2413 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2414 setImmKindConst();
2415 return;
2416 }
2417
2418 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2419 setImmKindLiteral();
2420 return;
2421
2426 if (isSafeTruncation(Val, 16) &&
2427 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2428 AsmParser->hasInv2PiInlineImm())) {
2430 setImmKindConst();
2431 return;
2432 }
2433
2434 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2435 setImmKindLiteral();
2436 return;
2437
2442 if (isSafeTruncation(Val, 16) &&
2443 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2444 AsmParser->hasInv2PiInlineImm())) {
2446 setImmKindConst();
2447 return;
2448 }
2449
2450 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2451 setImmKindLiteral();
2452 return;
2453
2456 assert(isSafeTruncation(Val, 16));
2457 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2459 return;
2460 }
2463 assert(isSafeTruncation(Val, 16));
2464 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2465 AsmParser->hasInv2PiInlineImm()));
2466
2468 return;
2469 }
2470
2473 assert(isSafeTruncation(Val, 16));
2474 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2475 AsmParser->hasInv2PiInlineImm()));
2476
2478 return;
2479 }
2480
2482 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2483 setImmKindMandatoryLiteral();
2484 return;
2486 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2487 setImmKindMandatoryLiteral();
2488 return;
2489 default:
2490 llvm_unreachable("invalid operand size");
2491 }
2492}
2493
2494void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2495 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2496}
2497
2498bool AMDGPUOperand::isInlineValue() const {
2499 return isRegKind() && ::isInlineValue(getReg());
2500}
2501
2502//===----------------------------------------------------------------------===//
2503// AsmParser
2504//===----------------------------------------------------------------------===//
2505
2506static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2507 if (Is == IS_VGPR) {
2508 switch (RegWidth) {
2509 default: return -1;
2510 case 32:
2511 return AMDGPU::VGPR_32RegClassID;
2512 case 64:
2513 return AMDGPU::VReg_64RegClassID;
2514 case 96:
2515 return AMDGPU::VReg_96RegClassID;
2516 case 128:
2517 return AMDGPU::VReg_128RegClassID;
2518 case 160:
2519 return AMDGPU::VReg_160RegClassID;
2520 case 192:
2521 return AMDGPU::VReg_192RegClassID;
2522 case 224:
2523 return AMDGPU::VReg_224RegClassID;
2524 case 256:
2525 return AMDGPU::VReg_256RegClassID;
2526 case 288:
2527 return AMDGPU::VReg_288RegClassID;
2528 case 320:
2529 return AMDGPU::VReg_320RegClassID;
2530 case 352:
2531 return AMDGPU::VReg_352RegClassID;
2532 case 384:
2533 return AMDGPU::VReg_384RegClassID;
2534 case 512:
2535 return AMDGPU::VReg_512RegClassID;
2536 case 1024:
2537 return AMDGPU::VReg_1024RegClassID;
2538 }
2539 } else if (Is == IS_TTMP) {
2540 switch (RegWidth) {
2541 default: return -1;
2542 case 32:
2543 return AMDGPU::TTMP_32RegClassID;
2544 case 64:
2545 return AMDGPU::TTMP_64RegClassID;
2546 case 128:
2547 return AMDGPU::TTMP_128RegClassID;
2548 case 256:
2549 return AMDGPU::TTMP_256RegClassID;
2550 case 512:
2551 return AMDGPU::TTMP_512RegClassID;
2552 }
2553 } else if (Is == IS_SGPR) {
2554 switch (RegWidth) {
2555 default: return -1;
2556 case 32:
2557 return AMDGPU::SGPR_32RegClassID;
2558 case 64:
2559 return AMDGPU::SGPR_64RegClassID;
2560 case 96:
2561 return AMDGPU::SGPR_96RegClassID;
2562 case 128:
2563 return AMDGPU::SGPR_128RegClassID;
2564 case 160:
2565 return AMDGPU::SGPR_160RegClassID;
2566 case 192:
2567 return AMDGPU::SGPR_192RegClassID;
2568 case 224:
2569 return AMDGPU::SGPR_224RegClassID;
2570 case 256:
2571 return AMDGPU::SGPR_256RegClassID;
2572 case 288:
2573 return AMDGPU::SGPR_288RegClassID;
2574 case 320:
2575 return AMDGPU::SGPR_320RegClassID;
2576 case 352:
2577 return AMDGPU::SGPR_352RegClassID;
2578 case 384:
2579 return AMDGPU::SGPR_384RegClassID;
2580 case 512:
2581 return AMDGPU::SGPR_512RegClassID;
2582 }
2583 } else if (Is == IS_AGPR) {
2584 switch (RegWidth) {
2585 default: return -1;
2586 case 32:
2587 return AMDGPU::AGPR_32RegClassID;
2588 case 64:
2589 return AMDGPU::AReg_64RegClassID;
2590 case 96:
2591 return AMDGPU::AReg_96RegClassID;
2592 case 128:
2593 return AMDGPU::AReg_128RegClassID;
2594 case 160:
2595 return AMDGPU::AReg_160RegClassID;
2596 case 192:
2597 return AMDGPU::AReg_192RegClassID;
2598 case 224:
2599 return AMDGPU::AReg_224RegClassID;
2600 case 256:
2601 return AMDGPU::AReg_256RegClassID;
2602 case 288:
2603 return AMDGPU::AReg_288RegClassID;
2604 case 320:
2605 return AMDGPU::AReg_320RegClassID;
2606 case 352:
2607 return AMDGPU::AReg_352RegClassID;
2608 case 384:
2609 return AMDGPU::AReg_384RegClassID;
2610 case 512:
2611 return AMDGPU::AReg_512RegClassID;
2612 case 1024:
2613 return AMDGPU::AReg_1024RegClassID;
2614 }
2615 }
2616 return -1;
2617}
2618
2621 .Case("exec", AMDGPU::EXEC)
2622 .Case("vcc", AMDGPU::VCC)
2623 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2624 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2625 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2626 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2627 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2628 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2629 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2630 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2631 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2632 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2633 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2634 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2635 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2636 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2637 .Case("m0", AMDGPU::M0)
2638 .Case("vccz", AMDGPU::SRC_VCCZ)
2639 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2640 .Case("execz", AMDGPU::SRC_EXECZ)
2641 .Case("src_execz", AMDGPU::SRC_EXECZ)
2642 .Case("scc", AMDGPU::SRC_SCC)
2643 .Case("src_scc", AMDGPU::SRC_SCC)
2644 .Case("tba", AMDGPU::TBA)
2645 .Case("tma", AMDGPU::TMA)
2646 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2647 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2648 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2649 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2650 .Case("vcc_lo", AMDGPU::VCC_LO)
2651 .Case("vcc_hi", AMDGPU::VCC_HI)
2652 .Case("exec_lo", AMDGPU::EXEC_LO)
2653 .Case("exec_hi", AMDGPU::EXEC_HI)
2654 .Case("tma_lo", AMDGPU::TMA_LO)
2655 .Case("tma_hi", AMDGPU::TMA_HI)
2656 .Case("tba_lo", AMDGPU::TBA_LO)
2657 .Case("tba_hi", AMDGPU::TBA_HI)
2658 .Case("pc", AMDGPU::PC_REG)
2659 .Case("null", AMDGPU::SGPR_NULL)
2660 .Default(AMDGPU::NoRegister);
2661}
2662
2663bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2664 SMLoc &EndLoc, bool RestoreOnFailure) {
2665 auto R = parseRegister();
2666 if (!R) return true;
2667 assert(R->isReg());
2668 RegNo = R->getReg();
2669 StartLoc = R->getStartLoc();
2670 EndLoc = R->getEndLoc();
2671 return false;
2672}
2673
2674bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2675 SMLoc &EndLoc) {
2676 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2677}
2678
2679ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2680 SMLoc &EndLoc) {
2681 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2682 bool PendingErrors = getParser().hasPendingError();
2683 getParser().clearPendingErrors();
2684 if (PendingErrors)
2685 return ParseStatus::Failure;
2686 if (Result)
2687 return ParseStatus::NoMatch;
2688 return ParseStatus::Success;
2689}
2690
2691bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2692 RegisterKind RegKind, unsigned Reg1,
2693 SMLoc Loc) {
2694 switch (RegKind) {
2695 case IS_SPECIAL:
2696 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2697 Reg = AMDGPU::EXEC;
2698 RegWidth = 64;
2699 return true;
2700 }
2701 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2702 Reg = AMDGPU::FLAT_SCR;
2703 RegWidth = 64;
2704 return true;
2705 }
2706 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2707 Reg = AMDGPU::XNACK_MASK;
2708 RegWidth = 64;
2709 return true;
2710 }
2711 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2712 Reg = AMDGPU::VCC;
2713 RegWidth = 64;
2714 return true;
2715 }
2716 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2717 Reg = AMDGPU::TBA;
2718 RegWidth = 64;
2719 return true;
2720 }
2721 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2722 Reg = AMDGPU::TMA;
2723 RegWidth = 64;
2724 return true;
2725 }
2726 Error(Loc, "register does not fit in the list");
2727 return false;
2728 case IS_VGPR:
2729 case IS_SGPR:
2730 case IS_AGPR:
2731 case IS_TTMP:
2732 if (Reg1 != Reg + RegWidth / 32) {
2733 Error(Loc, "registers in a list must have consecutive indices");
2734 return false;
2735 }
2736 RegWidth += 32;
2737 return true;
2738 default:
2739 llvm_unreachable("unexpected register kind");
2740 }
2741}
2742
2743struct RegInfo {
2745 RegisterKind Kind;
2746};
2747
2748static constexpr RegInfo RegularRegisters[] = {
2749 {{"v"}, IS_VGPR},
2750 {{"s"}, IS_SGPR},
2751 {{"ttmp"}, IS_TTMP},
2752 {{"acc"}, IS_AGPR},
2753 {{"a"}, IS_AGPR},
2754};
2755
2756static bool isRegularReg(RegisterKind Kind) {
2757 return Kind == IS_VGPR ||
2758 Kind == IS_SGPR ||
2759 Kind == IS_TTMP ||
2760 Kind == IS_AGPR;
2761}
2762
2764 for (const RegInfo &Reg : RegularRegisters)
2765 if (Str.starts_with(Reg.Name))
2766 return &Reg;
2767 return nullptr;
2768}
2769
2770static bool getRegNum(StringRef Str, unsigned& Num) {
2771 return !Str.getAsInteger(10, Num);
2772}
2773
2774bool
2775AMDGPUAsmParser::isRegister(const AsmToken &Token,
2776 const AsmToken &NextToken) const {
2777
2778 // A list of consecutive registers: [s0,s1,s2,s3]
2779 if (Token.is(AsmToken::LBrac))
2780 return true;
2781
2782 if (!Token.is(AsmToken::Identifier))
2783 return false;
2784
2785 // A single register like s0 or a range of registers like s[0:1]
2786
2787 StringRef Str = Token.getString();
2788 const RegInfo *Reg = getRegularRegInfo(Str);
2789 if (Reg) {
2790 StringRef RegName = Reg->Name;
2791 StringRef RegSuffix = Str.substr(RegName.size());
2792 if (!RegSuffix.empty()) {
2793 RegSuffix.consume_back(".l");
2794 RegSuffix.consume_back(".h");
2795 unsigned Num;
2796 // A single register with an index: rXX
2797 if (getRegNum(RegSuffix, Num))
2798 return true;
2799 } else {
2800 // A range of registers: r[XX:YY].
2801 if (NextToken.is(AsmToken::LBrac))
2802 return true;
2803 }
2804 }
2805
2806 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2807}
2808
2809bool
2810AMDGPUAsmParser::isRegister()
2811{
2812 return isRegister(getToken(), peekToken());
2813}
2814
2815unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2816 unsigned SubReg, unsigned RegWidth,
2817 SMLoc Loc) {
2818 assert(isRegularReg(RegKind));
2819
2820 unsigned AlignSize = 1;
2821 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2822 // SGPR and TTMP registers must be aligned.
2823 // Max required alignment is 4 dwords.
2824 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2825 }
2826
2827 if (RegNum % AlignSize != 0) {
2828 Error(Loc, "invalid register alignment");
2829 return AMDGPU::NoRegister;
2830 }
2831
2832 unsigned RegIdx = RegNum / AlignSize;
2833 int RCID = getRegClass(RegKind, RegWidth);
2834 if (RCID == -1) {
2835 Error(Loc, "invalid or unsupported register size");
2836 return AMDGPU::NoRegister;
2837 }
2838
2839 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2840 const MCRegisterClass RC = TRI->getRegClass(RCID);
2841 if (RegIdx >= RC.getNumRegs()) {
2842 Error(Loc, "register index is out of range");
2843 return AMDGPU::NoRegister;
2844 }
2845
2846 unsigned Reg = RC.getRegister(RegIdx);
2847
2848 if (SubReg) {
2849 Reg = TRI->getSubReg(Reg, SubReg);
2850
2851 // Currently all regular registers have their .l and .h subregisters, so
2852 // we should never need to generate an error here.
2853 assert(Reg && "Invalid subregister!");
2854 }
2855
2856 return Reg;
2857}
2858
2859bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2860 int64_t RegLo, RegHi;
2861 if (!skipToken(AsmToken::LBrac, "missing register index"))
2862 return false;
2863
2864 SMLoc FirstIdxLoc = getLoc();
2865 SMLoc SecondIdxLoc;
2866
2867 if (!parseExpr(RegLo))
2868 return false;
2869
2870 if (trySkipToken(AsmToken::Colon)) {
2871 SecondIdxLoc = getLoc();
2872 if (!parseExpr(RegHi))
2873 return false;
2874 } else {
2875 RegHi = RegLo;
2876 }
2877
2878 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2879 return false;
2880
2881 if (!isUInt<32>(RegLo)) {
2882 Error(FirstIdxLoc, "invalid register index");
2883 return false;
2884 }
2885
2886 if (!isUInt<32>(RegHi)) {
2887 Error(SecondIdxLoc, "invalid register index");
2888 return false;
2889 }
2890
2891 if (RegLo > RegHi) {
2892 Error(FirstIdxLoc, "first register index should not exceed second index");
2893 return false;
2894 }
2895
2896 Num = static_cast<unsigned>(RegLo);
2897 RegWidth = 32 * ((RegHi - RegLo) + 1);
2898 return true;
2899}
2900
2901unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2902 unsigned &RegNum, unsigned &RegWidth,
2903 SmallVectorImpl<AsmToken> &Tokens) {
2904 assert(isToken(AsmToken::Identifier));
2905 unsigned Reg = getSpecialRegForName(getTokenStr());
2906 if (Reg) {
2907 RegNum = 0;
2908 RegWidth = 32;
2909 RegKind = IS_SPECIAL;
2910 Tokens.push_back(getToken());
2911 lex(); // skip register name
2912 }
2913 return Reg;
2914}
2915
2916unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2917 unsigned &RegNum, unsigned &RegWidth,
2918 SmallVectorImpl<AsmToken> &Tokens) {
2919 assert(isToken(AsmToken::Identifier));
2920 StringRef RegName = getTokenStr();
2921 auto Loc = getLoc();
2922
2923 const RegInfo *RI = getRegularRegInfo(RegName);
2924 if (!RI) {
2925 Error(Loc, "invalid register name");
2926 return AMDGPU::NoRegister;
2927 }
2928
2929 Tokens.push_back(getToken());
2930 lex(); // skip register name
2931
2932 RegKind = RI->Kind;
2933 StringRef RegSuffix = RegName.substr(RI->Name.size());
2934 unsigned SubReg = NoSubRegister;
2935 if (!RegSuffix.empty()) {
2936 // We don't know the opcode till we are done parsing, so we don't know if
2937 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2938 // .h to correctly specify 16 bit registers. We also can't determine class
2939 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2940 if (RegSuffix.consume_back(".l"))
2941 SubReg = AMDGPU::lo16;
2942 else if (RegSuffix.consume_back(".h"))
2943 SubReg = AMDGPU::hi16;
2944
2945 // Single 32-bit register: vXX.
2946 if (!getRegNum(RegSuffix, RegNum)) {
2947 Error(Loc, "invalid register index");
2948 return AMDGPU::NoRegister;
2949 }
2950 RegWidth = 32;
2951 } else {
2952 // Range of registers: v[XX:YY]. ":YY" is optional.
2953 if (!ParseRegRange(RegNum, RegWidth))
2954 return AMDGPU::NoRegister;
2955 }
2956
2957 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2958}
2959
2960unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2961 unsigned &RegWidth,
2962 SmallVectorImpl<AsmToken> &Tokens) {
2963 unsigned Reg = AMDGPU::NoRegister;
2964 auto ListLoc = getLoc();
2965
2966 if (!skipToken(AsmToken::LBrac,
2967 "expected a register or a list of registers")) {
2968 return AMDGPU::NoRegister;
2969 }
2970
2971 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2972
2973 auto Loc = getLoc();
2974 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2975 return AMDGPU::NoRegister;
2976 if (RegWidth != 32) {
2977 Error(Loc, "expected a single 32-bit register");
2978 return AMDGPU::NoRegister;
2979 }
2980
2981 for (; trySkipToken(AsmToken::Comma); ) {
2982 RegisterKind NextRegKind;
2983 unsigned NextReg, NextRegNum, NextRegWidth;
2984 Loc = getLoc();
2985
2986 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2987 NextRegNum, NextRegWidth,
2988 Tokens)) {
2989 return AMDGPU::NoRegister;
2990 }
2991 if (NextRegWidth != 32) {
2992 Error(Loc, "expected a single 32-bit register");
2993 return AMDGPU::NoRegister;
2994 }
2995 if (NextRegKind != RegKind) {
2996 Error(Loc, "registers in a list must be of the same kind");
2997 return AMDGPU::NoRegister;
2998 }
2999 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3000 return AMDGPU::NoRegister;
3001 }
3002
3003 if (!skipToken(AsmToken::RBrac,
3004 "expected a comma or a closing square bracket")) {
3005 return AMDGPU::NoRegister;
3006 }
3007
3008 if (isRegularReg(RegKind))
3009 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3010
3011 return Reg;
3012}
3013
3014bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3015 unsigned &RegNum, unsigned &RegWidth,
3016 SmallVectorImpl<AsmToken> &Tokens) {
3017 auto Loc = getLoc();
3018 Reg = AMDGPU::NoRegister;
3019
3020 if (isToken(AsmToken::Identifier)) {
3021 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3022 if (Reg == AMDGPU::NoRegister)
3023 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3024 } else {
3025 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3026 }
3027
3028 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3029 if (Reg == AMDGPU::NoRegister) {
3030 assert(Parser.hasPendingError());
3031 return false;
3032 }
3033
3034 if (!subtargetHasRegister(*TRI, Reg)) {
3035 if (Reg == AMDGPU::SGPR_NULL) {
3036 Error(Loc, "'null' operand is not supported on this GPU");
3037 } else {
3038 Error(Loc, "register not available on this GPU");
3039 }
3040 return false;
3041 }
3042
3043 return true;
3044}
3045
3046bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3047 unsigned &RegNum, unsigned &RegWidth,
3048 bool RestoreOnFailure /*=false*/) {
3049 Reg = AMDGPU::NoRegister;
3050
3052 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3053 if (RestoreOnFailure) {
3054 while (!Tokens.empty()) {
3055 getLexer().UnLex(Tokens.pop_back_val());
3056 }
3057 }
3058 return true;
3059 }
3060 return false;
3061}
3062
3063std::optional<StringRef>
3064AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3065 switch (RegKind) {
3066 case IS_VGPR:
3067 return StringRef(".amdgcn.next_free_vgpr");
3068 case IS_SGPR:
3069 return StringRef(".amdgcn.next_free_sgpr");
3070 default:
3071 return std::nullopt;
3072 }
3073}
3074
3075void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3076 auto SymbolName = getGprCountSymbolName(RegKind);
3077 assert(SymbolName && "initializing invalid register kind");
3078 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3079 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3080}
3081
3082bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3083 unsigned DwordRegIndex,
3084 unsigned RegWidth) {
3085 // Symbols are only defined for GCN targets
3086 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3087 return true;
3088
3089 auto SymbolName = getGprCountSymbolName(RegKind);
3090 if (!SymbolName)
3091 return true;
3092 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3093
3094 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3095 int64_t OldCount;
3096
3097 if (!Sym->isVariable())
3098 return !Error(getLoc(),
3099 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3100 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3101 return !Error(
3102 getLoc(),
3103 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3104
3105 if (OldCount <= NewMax)
3106 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3107
3108 return true;
3109}
3110
3111std::unique_ptr<AMDGPUOperand>
3112AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3113 const auto &Tok = getToken();
3114 SMLoc StartLoc = Tok.getLoc();
3115 SMLoc EndLoc = Tok.getEndLoc();
3116 RegisterKind RegKind;
3117 unsigned Reg, RegNum, RegWidth;
3118
3119 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3120 return nullptr;
3121 }
3122 if (isHsaAbi(getSTI())) {
3123 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3124 return nullptr;
3125 } else
3126 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3127 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3128}
3129
3130ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3131 bool HasSP3AbsModifier, bool HasLit) {
3132 // TODO: add syntactic sugar for 1/(2*PI)
3133
3134 if (isRegister())
3135 return ParseStatus::NoMatch;
3136 assert(!isModifier());
3137
3138 if (!HasLit) {
3139 HasLit = trySkipId("lit");
3140 if (HasLit) {
3141 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3142 return ParseStatus::Failure;
3143 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3144 if (S.isSuccess() &&
3145 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146 return ParseStatus::Failure;
3147 return S;
3148 }
3149 }
3150
3151 const auto& Tok = getToken();
3152 const auto& NextTok = peekToken();
3153 bool IsReal = Tok.is(AsmToken::Real);
3154 SMLoc S = getLoc();
3155 bool Negate = false;
3156
3157 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3158 lex();
3159 IsReal = true;
3160 Negate = true;
3161 }
3162
3163 AMDGPUOperand::Modifiers Mods;
3164 Mods.Lit = HasLit;
3165
3166 if (IsReal) {
3167 // Floating-point expressions are not supported.
3168 // Can only allow floating-point literals with an
3169 // optional sign.
3170
3171 StringRef Num = getTokenStr();
3172 lex();
3173
3174 APFloat RealVal(APFloat::IEEEdouble());
3175 auto roundMode = APFloat::rmNearestTiesToEven;
3176 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3177 return ParseStatus::Failure;
3178 if (Negate)
3179 RealVal.changeSign();
3180
3181 Operands.push_back(
3182 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3183 AMDGPUOperand::ImmTyNone, true));
3184 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3185 Op.setModifiers(Mods);
3186
3187 return ParseStatus::Success;
3188
3189 } else {
3190 int64_t IntVal;
3191 const MCExpr *Expr;
3192 SMLoc S = getLoc();
3193
3194 if (HasSP3AbsModifier) {
3195 // This is a workaround for handling expressions
3196 // as arguments of SP3 'abs' modifier, for example:
3197 // |1.0|
3198 // |-1|
3199 // |1+x|
3200 // This syntax is not compatible with syntax of standard
3201 // MC expressions (due to the trailing '|').
3202 SMLoc EndLoc;
3203 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3204 return ParseStatus::Failure;
3205 } else {
3206 if (Parser.parseExpression(Expr))
3207 return ParseStatus::Failure;
3208 }
3209
3210 if (Expr->evaluateAsAbsolute(IntVal)) {
3211 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3212 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3213 Op.setModifiers(Mods);
3214 } else {
3215 if (HasLit)
3216 return ParseStatus::NoMatch;
3217 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3218 }
3219
3220 return ParseStatus::Success;
3221 }
3222
3223 return ParseStatus::NoMatch;
3224}
3225
3226ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3227 if (!isRegister())
3228 return ParseStatus::NoMatch;
3229
3230 if (auto R = parseRegister()) {
3231 assert(R->isReg());
3232 Operands.push_back(std::move(R));
3233 return ParseStatus::Success;
3234 }
3235 return ParseStatus::Failure;
3236}
3237
3238ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3239 bool HasSP3AbsMod, bool HasLit) {
3240 ParseStatus Res = parseReg(Operands);
3241 if (!Res.isNoMatch())
3242 return Res;
3243 if (isModifier())
3244 return ParseStatus::NoMatch;
3245 return parseImm(Operands, HasSP3AbsMod, HasLit);
3246}
3247
3248bool
3249AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3250 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3251 const auto &str = Token.getString();
3252 return str == "abs" || str == "neg" || str == "sext";
3253 }
3254 return false;
3255}
3256
3257bool
3258AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3259 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3260}
3261
3262bool
3263AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3264 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3265}
3266
3267bool
3268AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3269 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3270}
3271
3272// Check if this is an operand modifier or an opcode modifier
3273// which may look like an expression but it is not. We should
3274// avoid parsing these modifiers as expressions. Currently
3275// recognized sequences are:
3276// |...|
3277// abs(...)
3278// neg(...)
3279// sext(...)
3280// -reg
3281// -|...|
3282// -abs(...)
3283// name:...
3284//
3285bool
3286AMDGPUAsmParser::isModifier() {
3287
3288 AsmToken Tok = getToken();
3289 AsmToken NextToken[2];
3290 peekTokens(NextToken);
3291
3292 return isOperandModifier(Tok, NextToken[0]) ||
3293 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3294 isOpcodeModifierWithVal(Tok, NextToken[0]);
3295}
3296
3297// Check if the current token is an SP3 'neg' modifier.
3298// Currently this modifier is allowed in the following context:
3299//
3300// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3301// 2. Before an 'abs' modifier: -abs(...)
3302// 3. Before an SP3 'abs' modifier: -|...|
3303//
3304// In all other cases "-" is handled as a part
3305// of an expression that follows the sign.
3306//
3307// Note: When "-" is followed by an integer literal,
3308// this is interpreted as integer negation rather
3309// than a floating-point NEG modifier applied to N.
3310// Beside being contr-intuitive, such use of floating-point
3311// NEG modifier would have resulted in different meaning
3312// of integer literals used with VOP1/2/C and VOP3,
3313// for example:
3314// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3315// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3316// Negative fp literals with preceding "-" are
3317// handled likewise for uniformity
3318//
3319bool
3320AMDGPUAsmParser::parseSP3NegModifier() {
3321
3322 AsmToken NextToken[2];
3323 peekTokens(NextToken);
3324
3325 if (isToken(AsmToken::Minus) &&
3326 (isRegister(NextToken[0], NextToken[1]) ||
3327 NextToken[0].is(AsmToken::Pipe) ||
3328 isId(NextToken[0], "abs"))) {
3329 lex();
3330 return true;
3331 }
3332
3333 return false;
3334}
3335
3337AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3338 bool AllowImm) {
3339 bool Neg, SP3Neg;
3340 bool Abs, SP3Abs;
3341 bool Lit;
3342 SMLoc Loc;
3343
3344 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3345 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3346 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3347
3348 SP3Neg = parseSP3NegModifier();
3349
3350 Loc = getLoc();
3351 Neg = trySkipId("neg");
3352 if (Neg && SP3Neg)
3353 return Error(Loc, "expected register or immediate");
3354 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3355 return ParseStatus::Failure;
3356
3357 Abs = trySkipId("abs");
3358 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3359 return ParseStatus::Failure;
3360
3361 Lit = trySkipId("lit");
3362 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3363 return ParseStatus::Failure;
3364
3365 Loc = getLoc();
3366 SP3Abs = trySkipToken(AsmToken::Pipe);
3367 if (Abs && SP3Abs)
3368 return Error(Loc, "expected register or immediate");
3369
3370 ParseStatus Res;
3371 if (AllowImm) {
3372 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3373 } else {
3374 Res = parseReg(Operands);
3375 }
3376 if (!Res.isSuccess())
3377 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3378
3379 if (Lit && !Operands.back()->isImm())
3380 Error(Loc, "expected immediate with lit modifier");
3381
3382 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3383 return ParseStatus::Failure;
3384 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3385 return ParseStatus::Failure;
3386 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3387 return ParseStatus::Failure;
3388 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3389 return ParseStatus::Failure;
3390
3391 AMDGPUOperand::Modifiers Mods;
3392 Mods.Abs = Abs || SP3Abs;
3393 Mods.Neg = Neg || SP3Neg;
3394 Mods.Lit = Lit;
3395
3396 if (Mods.hasFPModifiers() || Lit) {
3397 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3398 if (Op.isExpr())
3399 return Error(Op.getStartLoc(), "expected an absolute expression");
3400 Op.setModifiers(Mods);
3401 }
3402 return ParseStatus::Success;
3403}
3404
3406AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3407 bool AllowImm) {
3408 bool Sext = trySkipId("sext");
3409 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3410 return ParseStatus::Failure;
3411
3412 ParseStatus Res;
3413 if (AllowImm) {
3414 Res = parseRegOrImm(Operands);
3415 } else {
3416 Res = parseReg(Operands);
3417 }
3418 if (!Res.isSuccess())
3419 return Sext ? ParseStatus::Failure : Res;
3420
3421 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3422 return ParseStatus::Failure;
3423
3424 AMDGPUOperand::Modifiers Mods;
3425 Mods.Sext = Sext;
3426
3427 if (Mods.hasIntModifiers()) {
3428 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3429 if (Op.isExpr())
3430 return Error(Op.getStartLoc(), "expected an absolute expression");
3431 Op.setModifiers(Mods);
3432 }
3433
3434 return ParseStatus::Success;
3435}
3436
3437ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3438 return parseRegOrImmWithFPInputMods(Operands, false);
3439}
3440
3441ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3442 return parseRegOrImmWithIntInputMods(Operands, false);
3443}
3444
3445ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3446 auto Loc = getLoc();
3447 if (trySkipId("off")) {
3448 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3449 AMDGPUOperand::ImmTyOff, false));
3450 return ParseStatus::Success;
3451 }
3452
3453 if (!isRegister())
3454 return ParseStatus::NoMatch;
3455
3456 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3457 if (Reg) {
3458 Operands.push_back(std::move(Reg));
3459 return ParseStatus::Success;
3460 }
3461
3462 return ParseStatus::Failure;
3463}
3464
3465unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3466 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3467
3468 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3469 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3470 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3471 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3472 return Match_InvalidOperand;
3473
3474 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3475 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3476 // v_mac_f32/16 allow only dst_sel == DWORD;
3477 auto OpNum =
3478 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3479 const auto &Op = Inst.getOperand(OpNum);
3480 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3481 return Match_InvalidOperand;
3482 }
3483 }
3484
3485 return Match_Success;
3486}
3487
3489 static const unsigned Variants[] = {
3493 };
3494
3495 return ArrayRef(Variants);
3496}
3497
3498// What asm variants we should check
3499ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3500 if (isForcedDPP() && isForcedVOP3()) {
3501 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3502 return ArrayRef(Variants);
3503 }
3504 if (getForcedEncodingSize() == 32) {
3505 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3506 return ArrayRef(Variants);
3507 }
3508
3509 if (isForcedVOP3()) {
3510 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3511 return ArrayRef(Variants);
3512 }
3513
3514 if (isForcedSDWA()) {
3515 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3517 return ArrayRef(Variants);
3518 }
3519
3520 if (isForcedDPP()) {
3521 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3522 return ArrayRef(Variants);
3523 }
3524
3525 return getAllVariants();
3526}
3527
3528StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3529 if (isForcedDPP() && isForcedVOP3())
3530 return "e64_dpp";
3531
3532 if (getForcedEncodingSize() == 32)
3533 return "e32";
3534
3535 if (isForcedVOP3())
3536 return "e64";
3537
3538 if (isForcedSDWA())
3539 return "sdwa";
3540
3541 if (isForcedDPP())
3542 return "dpp";
3543
3544 return "";
3545}
3546
3547unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3548 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3549 for (MCPhysReg Reg : Desc.implicit_uses()) {
3550 switch (Reg) {
3551 case AMDGPU::FLAT_SCR:
3552 case AMDGPU::VCC:
3553 case AMDGPU::VCC_LO:
3554 case AMDGPU::VCC_HI:
3555 case AMDGPU::M0:
3556 return Reg;
3557 default:
3558 break;
3559 }
3560 }
3561 return AMDGPU::NoRegister;
3562}
3563
3564// NB: This code is correct only when used to check constant
3565// bus limitations because GFX7 support no f16 inline constants.
3566// Note that there are no cases when a GFX7 opcode violates
3567// constant bus limitations due to the use of an f16 constant.
3568bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3569 unsigned OpIdx) const {
3570 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3571
3572 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3573 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3574 return false;
3575 }
3576
3577 const MCOperand &MO = Inst.getOperand(OpIdx);
3578
3579 int64_t Val = MO.getImm();
3580 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3581
3582 switch (OpSize) { // expected operand size
3583 case 8:
3584 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3585 case 4:
3586 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3587 case 2: {
3588 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3592 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3593
3598
3603
3608
3613 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3614
3619 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3620
3621 llvm_unreachable("invalid operand type");
3622 }
3623 default:
3624 llvm_unreachable("invalid operand size");
3625 }
3626}
3627
3628unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3629 if (!isGFX10Plus())
3630 return 1;
3631
3632 switch (Opcode) {
3633 // 64-bit shift instructions can use only one scalar value input
3634 case AMDGPU::V_LSHLREV_B64_e64:
3635 case AMDGPU::V_LSHLREV_B64_gfx10:
3636 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3637 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3638 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3639 case AMDGPU::V_LSHRREV_B64_e64:
3640 case AMDGPU::V_LSHRREV_B64_gfx10:
3641 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3642 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3643 case AMDGPU::V_ASHRREV_I64_e64:
3644 case AMDGPU::V_ASHRREV_I64_gfx10:
3645 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3646 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3647 case AMDGPU::V_LSHL_B64_e64:
3648 case AMDGPU::V_LSHR_B64_e64:
3649 case AMDGPU::V_ASHR_I64_e64:
3650 return 1;
3651 default:
3652 return 2;
3653 }
3654}
3655
3656constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3658
3659// Get regular operand indices in the same order as specified
3660// in the instruction (but append mandatory literals to the end).
3662 bool AddMandatoryLiterals = false) {
3663
3664 int16_t ImmIdx =
3665 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3666
3667 if (isVOPD(Opcode)) {
3668 int16_t ImmDeferredIdx =
3669 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3670 : -1;
3671
3672 return {getNamedOperandIdx(Opcode, OpName::src0X),
3673 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3674 getNamedOperandIdx(Opcode, OpName::src0Y),
3675 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3676 ImmDeferredIdx,
3677 ImmIdx};
3678 }
3679
3680 return {getNamedOperandIdx(Opcode, OpName::src0),
3681 getNamedOperandIdx(Opcode, OpName::src1),
3682 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3683}
3684
3685bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3686 const MCOperand &MO = Inst.getOperand(OpIdx);
3687 if (MO.isImm()) {
3688 return !isInlineConstant(Inst, OpIdx);
3689 } else if (MO.isReg()) {
3690 auto Reg = MO.getReg();
3691 if (!Reg) {
3692 return false;
3693 }
3694 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3695 auto PReg = mc2PseudoReg(Reg);
3696 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3697 } else {
3698 return true;
3699 }
3700}
3701
3702// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3703// Writelane is special in that it can use SGPR and M0 (which would normally
3704// count as using the constant bus twice - but in this case it is allowed since
3705// the lane selector doesn't count as a use of the constant bus). However, it is
3706// still required to abide by the 1 SGPR rule.
3707static bool checkWriteLane(const MCInst &Inst) {
3708 const unsigned Opcode = Inst.getOpcode();
3709 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3710 return false;
3711 const MCOperand &LaneSelOp = Inst.getOperand(2);
3712 if (!LaneSelOp.isReg())
3713 return false;
3714 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3715 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3716}
3717
3718bool AMDGPUAsmParser::validateConstantBusLimitations(
3719 const MCInst &Inst, const OperandVector &Operands) {
3720 const unsigned Opcode = Inst.getOpcode();
3721 const MCInstrDesc &Desc = MII.get(Opcode);
3722 unsigned LastSGPR = AMDGPU::NoRegister;
3723 unsigned ConstantBusUseCount = 0;
3724 unsigned NumLiterals = 0;
3725 unsigned LiteralSize;
3726
3727 if (!(Desc.TSFlags &
3730 !isVOPD(Opcode))
3731 return true;
3732
3733 if (checkWriteLane(Inst))
3734 return true;
3735
3736 // Check special imm operands (used by madmk, etc)
3737 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3738 ++NumLiterals;
3739 LiteralSize = 4;
3740 }
3741
3742 SmallDenseSet<unsigned> SGPRsUsed;
3743 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3744 if (SGPRUsed != AMDGPU::NoRegister) {
3745 SGPRsUsed.insert(SGPRUsed);
3746 ++ConstantBusUseCount;
3747 }
3748
3749 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3750
3751 for (int OpIdx : OpIndices) {
3752 if (OpIdx == -1)
3753 continue;
3754
3755 const MCOperand &MO = Inst.getOperand(OpIdx);
3756 if (usesConstantBus(Inst, OpIdx)) {
3757 if (MO.isReg()) {
3758 LastSGPR = mc2PseudoReg(MO.getReg());
3759 // Pairs of registers with a partial intersections like these
3760 // s0, s[0:1]
3761 // flat_scratch_lo, flat_scratch
3762 // flat_scratch_lo, flat_scratch_hi
3763 // are theoretically valid but they are disabled anyway.
3764 // Note that this code mimics SIInstrInfo::verifyInstruction
3765 if (SGPRsUsed.insert(LastSGPR).second) {
3766 ++ConstantBusUseCount;
3767 }
3768 } else { // Expression or a literal
3769
3770 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3771 continue; // special operand like VINTERP attr_chan
3772
3773 // An instruction may use only one literal.
3774 // This has been validated on the previous step.
3775 // See validateVOPLiteral.
3776 // This literal may be used as more than one operand.
3777 // If all these operands are of the same size,
3778 // this literal counts as one scalar value.
3779 // Otherwise it counts as 2 scalar values.
3780 // See "GFX10 Shader Programming", section 3.6.2.3.
3781
3782 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3783 if (Size < 4)
3784 Size = 4;
3785
3786 if (NumLiterals == 0) {
3787 NumLiterals = 1;
3788 LiteralSize = Size;
3789 } else if (LiteralSize != Size) {
3790 NumLiterals = 2;
3791 }
3792 }
3793 }
3794 }
3795 ConstantBusUseCount += NumLiterals;
3796
3797 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3798 return true;
3799
3800 SMLoc LitLoc = getLitLoc(Operands);
3801 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3802 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3803 Error(Loc, "invalid operand (violates constant bus restrictions)");
3804 return false;
3805}
3806
3807bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3808 const MCInst &Inst, const OperandVector &Operands) {
3809
3810 const unsigned Opcode = Inst.getOpcode();
3811 if (!isVOPD(Opcode))
3812 return true;
3813
3814 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3815
3816 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3817 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3818 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3819 ? Opr.getReg()
3821 };
3822
3823 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3824 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3825
3826 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3827 auto InvalidCompOprIdx =
3828 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3829 if (!InvalidCompOprIdx)
3830 return true;
3831
3832 auto CompOprIdx = *InvalidCompOprIdx;
3833 auto ParsedIdx =
3834 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3835 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3836 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3837
3838 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3839 if (CompOprIdx == VOPD::Component::DST) {
3840 Error(Loc, "one dst register must be even and the other odd");
3841 } else {
3842 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3843 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3844 " operands must use different VGPR banks");
3845 }
3846
3847 return false;
3848}
3849
3850bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3851
3852 const unsigned Opc = Inst.getOpcode();
3853 const MCInstrDesc &Desc = MII.get(Opc);
3854
3855 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3856 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3857 assert(ClampIdx != -1);
3858 return Inst.getOperand(ClampIdx).getImm() == 0;
3859 }
3860
3861 return true;
3862}
3863
3866
3867bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3868 const SMLoc &IDLoc) {
3869
3870 const unsigned Opc = Inst.getOpcode();
3871 const MCInstrDesc &Desc = MII.get(Opc);
3872
3873 if ((Desc.TSFlags & MIMGFlags) == 0)
3874 return true;
3875
3876 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3877 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3878 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3879
3880 assert(VDataIdx != -1);
3881
3882 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3883 return true;
3884
3885 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3886 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3887 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3888 if (DMask == 0)
3889 DMask = 1;
3890
3891 bool IsPackedD16 = false;
3892 unsigned DataSize =
3893 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3894 if (hasPackedD16()) {
3895 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3896 IsPackedD16 = D16Idx >= 0;
3897 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3898 DataSize = (DataSize + 1) / 2;
3899 }
3900
3901 if ((VDataSize / 4) == DataSize + TFESize)
3902 return true;
3903
3904 StringRef Modifiers;
3905 if (isGFX90A())
3906 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3907 else
3908 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3909
3910 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3911 return false;
3912}
3913
3914bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3915 const SMLoc &IDLoc) {
3916 const unsigned Opc = Inst.getOpcode();
3917 const MCInstrDesc &Desc = MII.get(Opc);
3918
3919 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3920 return true;
3921
3923
3924 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3926 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3927 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3928 : AMDGPU::OpName::rsrc;
3929 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3930 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3931 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3932
3933 assert(VAddr0Idx != -1);
3934 assert(SrsrcIdx != -1);
3935 assert(SrsrcIdx > VAddr0Idx);
3936
3937 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3938 if (BaseOpcode->BVH) {
3939 if (IsA16 == BaseOpcode->A16)
3940 return true;
3941 Error(IDLoc, "image address size does not match a16");
3942 return false;
3943 }
3944
3945 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3947 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3948 unsigned ActualAddrSize =
3949 IsNSA ? SrsrcIdx - VAddr0Idx
3950 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3951
3952 unsigned ExpectedAddrSize =
3953 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3954
3955 if (IsNSA) {
3956 if (hasPartialNSAEncoding() &&
3957 ExpectedAddrSize >
3959 int VAddrLastIdx = SrsrcIdx - 1;
3960 unsigned VAddrLastSize =
3961 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3962
3963 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3964 }
3965 } else {
3966 if (ExpectedAddrSize > 12)
3967 ExpectedAddrSize = 16;
3968
3969 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3970 // This provides backward compatibility for assembly created
3971 // before 160b/192b/224b types were directly supported.
3972 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3973 return true;
3974 }
3975
3976 if (ActualAddrSize == ExpectedAddrSize)
3977 return true;
3978
3979 Error(IDLoc, "image address size does not match dim and a16");
3980 return false;
3981}
3982
3983bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3984
3985 const unsigned Opc = Inst.getOpcode();
3986 const MCInstrDesc &Desc = MII.get(Opc);
3987
3988 if ((Desc.TSFlags & MIMGFlags) == 0)
3989 return true;
3990 if (!Desc.mayLoad() || !Desc.mayStore())
3991 return true; // Not atomic
3992
3993 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3994 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3995
3996 // This is an incomplete check because image_atomic_cmpswap
3997 // may only use 0x3 and 0xf while other atomic operations
3998 // may use 0x1 and 0x3. However these limitations are
3999 // verified when we check that dmask matches dst size.
4000 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4001}
4002
4003bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4004
4005 const unsigned Opc = Inst.getOpcode();
4006 const MCInstrDesc &Desc = MII.get(Opc);
4007
4008 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4009 return true;
4010
4011 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4012 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4013
4014 // GATHER4 instructions use dmask in a different fashion compared to
4015 // other MIMG instructions. The only useful DMASK values are
4016 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4017 // (red,red,red,red) etc.) The ISA document doesn't mention
4018 // this.
4019 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4020}
4021
4022bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4023 const unsigned Opc = Inst.getOpcode();
4024 const MCInstrDesc &Desc = MII.get(Opc);
4025
4026 if ((Desc.TSFlags & MIMGFlags) == 0)
4027 return true;
4028
4030 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4032
4033 if (!BaseOpcode->MSAA)
4034 return true;
4035
4036 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4037 assert(DimIdx != -1);
4038
4039 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4041
4042 return DimInfo->MSAA;
4043}
4044
4045static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4046{
4047 switch (Opcode) {
4048 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4049 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4050 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4051 return true;
4052 default:
4053 return false;
4054 }
4055}
4056
4057// movrels* opcodes should only allow VGPRS as src0.
4058// This is specified in .td description for vop1/vop3,
4059// but sdwa is handled differently. See isSDWAOperand.
4060bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4061 const OperandVector &Operands) {
4062
4063 const unsigned Opc = Inst.getOpcode();
4064 const MCInstrDesc &Desc = MII.get(Opc);
4065
4066 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4067 return true;
4068
4069 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4070 assert(Src0Idx != -1);
4071
4072 SMLoc ErrLoc;
4073 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4074 if (Src0.isReg()) {
4075 auto Reg = mc2PseudoReg(Src0.getReg());
4076 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4077 if (!isSGPR(Reg, TRI))
4078 return true;
4079 ErrLoc = getRegLoc(Reg, Operands);
4080 } else {
4081 ErrLoc = getConstLoc(Operands);
4082 }
4083
4084 Error(ErrLoc, "source operand must be a VGPR");
4085 return false;
4086}
4087
4088bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4089 const OperandVector &Operands) {
4090
4091 const unsigned Opc = Inst.getOpcode();
4092
4093 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4094 return true;
4095
4096 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4097 assert(Src0Idx != -1);
4098
4099 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4100 if (!Src0.isReg())
4101 return true;
4102
4103 auto Reg = mc2PseudoReg(Src0.getReg());
4104 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4105 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4106 Error(getRegLoc(Reg, Operands),
4107 "source operand must be either a VGPR or an inline constant");
4108 return false;
4109 }
4110
4111 return true;
4112}
4113
4114bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4115 const OperandVector &Operands) {
4116 unsigned Opcode = Inst.getOpcode();
4117 const MCInstrDesc &Desc = MII.get(Opcode);
4118
4119 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4120 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4121 return true;
4122
4123 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4124 if (Src2Idx == -1)
4125 return true;
4126
4127 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4128 Error(getConstLoc(Operands),
4129 "inline constants are not allowed for this operand");
4130 return false;
4131 }
4132
4133 return true;
4134}
4135
4136bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4137 const OperandVector &Operands) {
4138 const unsigned Opc = Inst.getOpcode();
4139 const MCInstrDesc &Desc = MII.get(Opc);
4140
4141 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4142 return true;
4143
4144 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4145 if (Src2Idx == -1)
4146 return true;
4147
4148 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4149 if (!Src2.isReg())
4150 return true;
4151
4152 MCRegister Src2Reg = Src2.getReg();
4153 MCRegister DstReg = Inst.getOperand(0).getReg();
4154 if (Src2Reg == DstReg)
4155 return true;
4156
4157 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4158 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4159 return true;
4160
4161 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4162 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4163 "source 2 operand must not partially overlap with dst");
4164 return false;
4165 }
4166
4167 return true;
4168}
4169
4170bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4171 switch (Inst.getOpcode()) {
4172 default:
4173 return true;
4174 case V_DIV_SCALE_F32_gfx6_gfx7:
4175 case V_DIV_SCALE_F32_vi:
4176 case V_DIV_SCALE_F32_gfx10:
4177 case V_DIV_SCALE_F64_gfx6_gfx7:
4178 case V_DIV_SCALE_F64_vi:
4179 case V_DIV_SCALE_F64_gfx10:
4180 break;
4181 }
4182
4183 // TODO: Check that src0 = src1 or src2.
4184
4185 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4186 AMDGPU::OpName::src2_modifiers,
4187 AMDGPU::OpName::src2_modifiers}) {
4189 .getImm() &
4191 return false;
4192 }
4193 }
4194
4195 return true;
4196}
4197
4198bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4199
4200 const unsigned Opc = Inst.getOpcode();
4201 const MCInstrDesc &Desc = MII.get(Opc);
4202
4203 if ((Desc.TSFlags & MIMGFlags) == 0)
4204 return true;
4205
4206 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4207 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4208 if (isCI() || isSI())
4209 return false;
4210 }
4211
4212 return true;
4213}
4214
4215static bool IsRevOpcode(const unsigned Opcode)
4216{
4217 switch (Opcode) {
4218 case AMDGPU::V_SUBREV_F32_e32:
4219 case AMDGPU::V_SUBREV_F32_e64:
4220 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4221 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4222 case AMDGPU::V_SUBREV_F32_e32_vi:
4223 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4224 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4225 case AMDGPU::V_SUBREV_F32_e64_vi:
4226
4227 case AMDGPU::V_SUBREV_CO_U32_e32:
4228 case AMDGPU::V_SUBREV_CO_U32_e64:
4229 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4230 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4231
4232 case AMDGPU::V_SUBBREV_U32_e32:
4233 case AMDGPU::V_SUBBREV_U32_e64:
4234 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4235 case AMDGPU::V_SUBBREV_U32_e32_vi:
4236 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4237 case AMDGPU::V_SUBBREV_U32_e64_vi:
4238
4239 case AMDGPU::V_SUBREV_U32_e32:
4240 case AMDGPU::V_SUBREV_U32_e64:
4241 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4242 case AMDGPU::V_SUBREV_U32_e32_vi:
4243 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4244 case AMDGPU::V_SUBREV_U32_e64_vi:
4245
4246 case AMDGPU::V_SUBREV_F16_e32:
4247 case AMDGPU::V_SUBREV_F16_e64:
4248 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4249 case AMDGPU::V_SUBREV_F16_e32_vi:
4250 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4251 case AMDGPU::V_SUBREV_F16_e64_vi:
4252
4253 case AMDGPU::V_SUBREV_U16_e32:
4254 case AMDGPU::V_SUBREV_U16_e64:
4255 case AMDGPU::V_SUBREV_U16_e32_vi:
4256 case AMDGPU::V_SUBREV_U16_e64_vi:
4257
4258 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4259 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4260 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4261
4262 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4263 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4264
4265 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4266 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4267
4268 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4269 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4270
4271 case AMDGPU::V_LSHRREV_B32_e32:
4272 case AMDGPU::V_LSHRREV_B32_e64:
4273 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4274 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4275 case AMDGPU::V_LSHRREV_B32_e32_vi:
4276 case AMDGPU::V_LSHRREV_B32_e64_vi:
4277 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4278 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4279
4280 case AMDGPU::V_ASHRREV_I32_e32:
4281 case AMDGPU::V_ASHRREV_I32_e64:
4282 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4283 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4284 case AMDGPU::V_ASHRREV_I32_e32_vi:
4285 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4286 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4287 case AMDGPU::V_ASHRREV_I32_e64_vi:
4288
4289 case AMDGPU::V_LSHLREV_B32_e32:
4290 case AMDGPU::V_LSHLREV_B32_e64:
4291 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4292 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4293 case AMDGPU::V_LSHLREV_B32_e32_vi:
4294 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4295 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4296 case AMDGPU::V_LSHLREV_B32_e64_vi:
4297
4298 case AMDGPU::V_LSHLREV_B16_e32:
4299 case AMDGPU::V_LSHLREV_B16_e64:
4300 case AMDGPU::V_LSHLREV_B16_e32_vi:
4301 case AMDGPU::V_LSHLREV_B16_e64_vi:
4302 case AMDGPU::V_LSHLREV_B16_gfx10:
4303
4304 case AMDGPU::V_LSHRREV_B16_e32:
4305 case AMDGPU::V_LSHRREV_B16_e64:
4306 case AMDGPU::V_LSHRREV_B16_e32_vi:
4307 case AMDGPU::V_LSHRREV_B16_e64_vi:
4308 case AMDGPU::V_LSHRREV_B16_gfx10:
4309
4310 case AMDGPU::V_ASHRREV_I16_e32:
4311 case AMDGPU::V_ASHRREV_I16_e64:
4312 case AMDGPU::V_ASHRREV_I16_e32_vi:
4313 case AMDGPU::V_ASHRREV_I16_e64_vi:
4314 case AMDGPU::V_ASHRREV_I16_gfx10:
4315
4316 case AMDGPU::V_LSHLREV_B64_e64:
4317 case AMDGPU::V_LSHLREV_B64_gfx10:
4318 case AMDGPU::V_LSHLREV_B64_vi:
4319
4320 case AMDGPU::V_LSHRREV_B64_e64:
4321 case AMDGPU::V_LSHRREV_B64_gfx10:
4322 case AMDGPU::V_LSHRREV_B64_vi:
4323
4324 case AMDGPU::V_ASHRREV_I64_e64:
4325 case AMDGPU::V_ASHRREV_I64_gfx10:
4326 case AMDGPU::V_ASHRREV_I64_vi:
4327
4328 case AMDGPU::V_PK_LSHLREV_B16:
4329 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4330 case AMDGPU::V_PK_LSHLREV_B16_vi:
4331
4332 case AMDGPU::V_PK_LSHRREV_B16:
4333 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4334 case AMDGPU::V_PK_LSHRREV_B16_vi:
4335 case AMDGPU::V_PK_ASHRREV_I16:
4336 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4337 case AMDGPU::V_PK_ASHRREV_I16_vi:
4338 return true;
4339 default:
4340 return false;
4341 }
4342}
4343
4344std::optional<StringRef>
4345AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4346
4347 using namespace SIInstrFlags;
4348 const unsigned Opcode = Inst.getOpcode();
4349 const MCInstrDesc &Desc = MII.get(Opcode);
4350
4351 // lds_direct register is defined so that it can be used
4352 // with 9-bit operands only. Ignore encodings which do not accept these.
4353 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4354 if ((Desc.TSFlags & Enc) == 0)
4355 return std::nullopt;
4356
4357 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4358 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4359 if (SrcIdx == -1)
4360 break;
4361 const auto &Src = Inst.getOperand(SrcIdx);
4362 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4363
4364 if (isGFX90A() || isGFX11Plus())
4365 return StringRef("lds_direct is not supported on this GPU");
4366
4367 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4368 return StringRef("lds_direct cannot be used with this instruction");
4369
4370 if (SrcName != OpName::src0)
4371 return StringRef("lds_direct may be used as src0 only");
4372 }
4373 }
4374
4375 return std::nullopt;
4376}
4377
4378SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4379 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4380 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4381 if (Op.isFlatOffset())
4382 return Op.getStartLoc();
4383 }
4384 return getLoc();
4385}
4386
4387bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4388 const OperandVector &Operands) {
4389 auto Opcode = Inst.getOpcode();
4390 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4391 if (OpNum == -1)
4392 return true;
4393
4394 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4395 if ((TSFlags & SIInstrFlags::FLAT))
4396 return validateFlatOffset(Inst, Operands);
4397
4398 if ((TSFlags & SIInstrFlags::SMRD))
4399 return validateSMEMOffset(Inst, Operands);
4400
4401 const auto &Op = Inst.getOperand(OpNum);
4402 if (isGFX12Plus() &&
4403 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4404 const unsigned OffsetSize = 24;
4405 if (!isIntN(OffsetSize, Op.getImm())) {
4406 Error(getFlatOffsetLoc(Operands),
4407 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4408 return false;
4409 }
4410 } else {
4411 const unsigned OffsetSize = 16;
4412 if (!isUIntN(OffsetSize, Op.getImm())) {
4413 Error(getFlatOffsetLoc(Operands),
4414 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4415 return false;
4416 }
4417 }
4418 return true;
4419}
4420
4421bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4422 const OperandVector &Operands) {
4423 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4424 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4425 return true;
4426
4427 auto Opcode = Inst.getOpcode();
4428 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4429 assert(OpNum != -1);
4430
4431 const auto &Op = Inst.getOperand(OpNum);
4432 if (!hasFlatOffsets() && Op.getImm() != 0) {
4433 Error(getFlatOffsetLoc(Operands),
4434 "flat offset modifier is not supported on this GPU");
4435 return false;
4436 }
4437
4438 // For pre-GFX12 FLAT instructions the offset must be positive;
4439 // MSB is ignored and forced to zero.
4440 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4441 bool AllowNegative =
4443 isGFX12Plus();
4444 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4445 Error(getFlatOffsetLoc(Operands),
4446 Twine("expected a ") +
4447 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4448 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4449 return false;
4450 }
4451
4452 return true;
4453}
4454
4455SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4456 // Start with second operand because SMEM Offset cannot be dst or src0.
4457 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4458 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4459 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4460 return Op.getStartLoc();
4461 }
4462 return getLoc();
4463}
4464
4465bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4466 const OperandVector &Operands) {
4467 if (isCI() || isSI())
4468 return true;
4469
4470 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4471 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4472 return true;
4473
4474 auto Opcode = Inst.getOpcode();
4475 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4476 if (OpNum == -1)
4477 return true;
4478
4479 const auto &Op = Inst.getOperand(OpNum);
4480 if (!Op.isImm())
4481 return true;
4482
4483 uint64_t Offset = Op.getImm();
4484 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4487 return true;
4488
4489 Error(getSMEMOffsetLoc(Operands),
4490 isGFX12Plus() ? "expected a 24-bit signed offset"
4491 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4492 : "expected a 21-bit signed offset");
4493
4494 return false;
4495}
4496
4497bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4498 unsigned Opcode = Inst.getOpcode();
4499 const MCInstrDesc &Desc = MII.get(Opcode);
4500 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4501 return true;
4502
4503 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4504 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4505
4506 const int OpIndices[] = { Src0Idx, Src1Idx };
4507
4508 unsigned NumExprs = 0;
4509 unsigned NumLiterals = 0;
4511
4512 for (int OpIdx : OpIndices) {
4513 if (OpIdx == -1) break;
4514
4515 const MCOperand &MO = Inst.getOperand(OpIdx);
4516 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4517 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4518 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4519 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4520 if (NumLiterals == 0 || LiteralValue != Value) {
4522 ++NumLiterals;
4523 }
4524 } else if (MO.isExpr()) {
4525 ++NumExprs;
4526 }
4527 }
4528 }
4529
4530 return NumLiterals + NumExprs <= 1;
4531}
4532
4533bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4534 const unsigned Opc = Inst.getOpcode();
4535 if (isPermlane16(Opc)) {
4536 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4537 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4538
4539 if (OpSel & ~3)
4540 return false;
4541 }
4542
4543 uint64_t TSFlags = MII.get(Opc).TSFlags;
4544
4545 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4546 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4547 if (OpSelIdx != -1) {
4548 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4549 return false;
4550 }
4551 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4552 if (OpSelHiIdx != -1) {
4553 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4554 return false;
4555 }
4556 }
4557
4558 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4559 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4560 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4561 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4562 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4563 if (OpSel & 3)
4564 return false;
4565 }
4566
4567 return true;
4568}
4569
4570bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4571 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4572
4573 const unsigned Opc = Inst.getOpcode();
4574 uint64_t TSFlags = MII.get(Opc).TSFlags;
4575
4576 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4577 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4578 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4579 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4580 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4581 !(TSFlags & SIInstrFlags::IsSWMMAC))
4582 return true;
4583
4584 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4585 if (NegIdx == -1)
4586 return true;
4587
4588 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4589
4590 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4591 // on some src operands but not allowed on other.
4592 // It is convenient that such instructions don't have src_modifiers operand
4593 // for src operands that don't allow neg because they also don't allow opsel.
4594
4595 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4596 AMDGPU::OpName::src1_modifiers,
4597 AMDGPU::OpName::src2_modifiers};
4598
4599 for (unsigned i = 0; i < 3; ++i) {
4600 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4601 if (Neg & (1 << i))
4602 return false;
4603 }
4604 }
4605
4606 return true;
4607}
4608
4609bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4610 const OperandVector &Operands) {
4611 const unsigned Opc = Inst.getOpcode();
4612 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4613 if (DppCtrlIdx >= 0) {
4614 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4615
4616 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4617 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4618 // DP ALU DPP is supported for row_newbcast only on GFX9*
4619 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4620 Error(S, "DP ALU dpp only supports row_newbcast");
4621 return false;
4622 }
4623 }
4624
4625 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4626 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4627
4628 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4629 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4630 if (Src1Idx >= 0) {
4631 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4632 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4633 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4634 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4635 SMLoc S = getRegLoc(Reg, Operands);
4636 Error(S, "invalid operand for instruction");
4637 return false;
4638 }
4639 if (Src1.isImm()) {
4640 Error(getInstLoc(Operands),
4641 "src1 immediate operand invalid for instruction");
4642 return false;
4643 }
4644 }
4645 }
4646
4647 return true;
4648}
4649
4650// Check if VCC register matches wavefront size
4651bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4652 auto FB = getFeatureBits();
4653 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4654 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4655}
4656
4657// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4658bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4659 const OperandVector &Operands) {
4660 unsigned Opcode = Inst.getOpcode();
4661 const MCInstrDesc &Desc = MII.get(Opcode);
4662 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4663 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4664 !HasMandatoryLiteral && !isVOPD(Opcode))
4665 return true;
4666
4667 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4668
4669 unsigned NumExprs = 0;
4670 unsigned NumLiterals = 0;
4672
4673 for (int OpIdx : OpIndices) {
4674 if (OpIdx == -1)
4675 continue;
4676
4677 const MCOperand &MO = Inst.getOperand(OpIdx);
4678 if (!MO.isImm() && !MO.isExpr())
4679 continue;
4680 if (!isSISrcOperand(Desc, OpIdx))
4681 continue;
4682
4683 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4684 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4685 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4686 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4687 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4688
4689 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4690 Error(getLitLoc(Operands), "invalid operand for instruction");
4691 return false;
4692 }
4693
4694 if (IsFP64 && IsValid32Op)
4695 Value = Hi_32(Value);
4696
4697 if (NumLiterals == 0 || LiteralValue != Value) {
4699 ++NumLiterals;
4700 }
4701 } else if (MO.isExpr()) {
4702 ++NumExprs;
4703 }
4704 }
4705 NumLiterals += NumExprs;
4706
4707 if (!NumLiterals)
4708 return true;
4709
4710 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4711 Error(getLitLoc(Operands), "literal operands are not supported");
4712 return false;
4713 }
4714
4715 if (NumLiterals > 1) {
4716 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4717 return false;
4718 }
4719
4720 return true;
4721}
4722
4723// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4724static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4725 const MCRegisterInfo *MRI) {
4726 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4727 if (OpIdx < 0)
4728 return -1;
4729
4730 const MCOperand &Op = Inst.getOperand(OpIdx);
4731 if (!Op.isReg())
4732 return -1;
4733
4734 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4735 auto Reg = Sub ? Sub : Op.getReg();
4736 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4737 return AGPR32.contains(Reg) ? 1 : 0;
4738}
4739
4740bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4741 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4742 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4744 SIInstrFlags::DS)) == 0)
4745 return true;
4746
4747 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4748 : AMDGPU::OpName::vdata;
4749
4750 const MCRegisterInfo *MRI = getMRI();
4751 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4752 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4753
4754 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4755 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4756 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4757 return false;
4758 }
4759
4760 auto FB = getFeatureBits();
4761 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4762 if (DataAreg < 0 || DstAreg < 0)
4763 return true;
4764 return DstAreg == DataAreg;
4765 }
4766
4767 return DstAreg < 1 && DataAreg < 1;
4768}
4769
4770bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4771 auto FB = getFeatureBits();
4772 if (!FB[AMDGPU::FeatureGFX90AInsts])
4773 return true;
4774
4775 const MCRegisterInfo *MRI = getMRI();
4776 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4777 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4778 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4779 const MCOperand &Op = Inst.getOperand(I);
4780 if (!Op.isReg())
4781 continue;
4782
4783 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4784 if (!Sub)
4785 continue;
4786
4787 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4788 return false;
4789 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4790 return false;
4791 }
4792
4793 return true;
4794}
4795
4796SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4797 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4798 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4799 if (Op.isBLGP())
4800 return Op.getStartLoc();
4801 }
4802 return SMLoc();
4803}
4804
4805bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4806 const OperandVector &Operands) {
4807 unsigned Opc = Inst.getOpcode();
4808 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4809 if (BlgpIdx == -1)
4810 return true;
4811 SMLoc BLGPLoc = getBLGPLoc(Operands);
4812 if (!BLGPLoc.isValid())
4813 return true;
4814 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4815 auto FB = getFeatureBits();
4816 bool UsesNeg = false;
4817 if (FB[AMDGPU::FeatureGFX940Insts]) {
4818 switch (Opc) {
4819 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4820 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4821 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4822 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4823 UsesNeg = true;
4824 }
4825 }
4826
4827 if (IsNeg == UsesNeg)
4828 return true;
4829
4830 Error(BLGPLoc,
4831 UsesNeg ? "invalid modifier: blgp is not supported"
4832 : "invalid modifier: neg is not supported");
4833
4834 return false;
4835}
4836
4837bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4838 const OperandVector &Operands) {
4839 if (!isGFX11Plus())
4840 return true;
4841
4842 unsigned Opc = Inst.getOpcode();
4843 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4844 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4845 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4846 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4847 return true;
4848
4849 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4850 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4851 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4852 if (Reg == AMDGPU::SGPR_NULL)
4853 return true;
4854
4855 SMLoc RegLoc = getRegLoc(Reg, Operands);
4856 Error(RegLoc, "src0 must be null");
4857 return false;
4858}
4859
4860bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4861 const OperandVector &Operands) {
4862 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4863 if ((TSFlags & SIInstrFlags::DS) == 0)
4864 return true;
4865 if (TSFlags & SIInstrFlags::GWS)
4866 return validateGWS(Inst, Operands);
4867 // Only validate GDS for non-GWS instructions.
4868 if (hasGDS())
4869 return true;
4870 int GDSIdx =
4871 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4872 if (GDSIdx < 0)
4873 return true;
4874 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4875 if (GDS) {
4876 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4877 Error(S, "gds modifier is not supported on this GPU");
4878 return false;
4879 }
4880 return true;
4881}
4882
4883// gfx90a has an undocumented limitation:
4884// DS_GWS opcodes must use even aligned registers.
4885bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4886 const OperandVector &Operands) {
4887 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4888 return true;
4889
4890 int Opc = Inst.getOpcode();
4891 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4892 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4893 return true;
4894
4895 const MCRegisterInfo *MRI = getMRI();
4896 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4897 int Data0Pos =
4898 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4899 assert(Data0Pos != -1);
4900 auto Reg = Inst.getOperand(Data0Pos).getReg();
4901 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4902 if (RegIdx & 1) {
4903 SMLoc RegLoc = getRegLoc(Reg, Operands);
4904 Error(RegLoc, "vgpr must be even aligned");
4905 return false;
4906 }
4907
4908 return true;
4909}
4910
4911bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4912 const OperandVector &Operands,
4913 const SMLoc &IDLoc) {
4914 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4915 AMDGPU::OpName::cpol);
4916 if (CPolPos == -1)
4917 return true;
4918
4919 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4920
4921 if (isGFX12Plus())
4922 return validateTHAndScopeBits(Inst, Operands, CPol);
4923
4924 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4925 if (TSFlags & SIInstrFlags::SMRD) {
4926 if (CPol && (isSI() || isCI())) {
4927 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4928 Error(S, "cache policy is not supported for SMRD instructions");
4929 return false;
4930 }
4931 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4932 Error(IDLoc, "invalid cache policy for SMEM instruction");
4933 return false;
4934 }
4935 }
4936
4937 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4938 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4941 if (!(TSFlags & AllowSCCModifier)) {
4942 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4943 StringRef CStr(S.getPointer());
4944 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4945 Error(S,
4946 "scc modifier is not supported for this instruction on this GPU");
4947 return false;
4948 }
4949 }
4950
4952 return true;
4953
4954 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4955 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4956 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4957 : "instruction must use glc");
4958 return false;
4959 }
4960 } else {
4961 if (CPol & CPol::GLC) {
4962 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4963 StringRef CStr(S.getPointer());
4965 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4966 Error(S, isGFX940() ? "instruction must not use sc0"
4967 : "instruction must not use glc");
4968 return false;
4969 }
4970 }
4971
4972 return true;
4973}
4974
4975bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4976 const OperandVector &Operands,
4977 const unsigned CPol) {
4978 const unsigned TH = CPol & AMDGPU::CPol::TH;
4979 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4980
4981 const unsigned Opcode = Inst.getOpcode();
4982 const MCInstrDesc &TID = MII.get(Opcode);
4983
4984 auto PrintError = [&](StringRef Msg) {
4985 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4986 Error(S, Msg);
4987 return false;
4988 };
4989
4990 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4993 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4994
4995 if (TH == 0)
4996 return true;
4997
4998 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4999 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5000 (TH == AMDGPU::CPol::TH_NT_HT)))
5001 return PrintError("invalid th value for SMEM instruction");
5002
5003 if (TH == AMDGPU::CPol::TH_BYPASS) {
5004 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5006 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5008 return PrintError("scope and th combination is not valid");
5009 }
5010
5011 bool IsStore = TID.mayStore();
5012 bool IsAtomic =
5014
5015 if (IsAtomic) {
5016 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5017 return PrintError("invalid th value for atomic instructions");
5018 } else if (IsStore) {
5019 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5020 return PrintError("invalid th value for store instructions");
5021 } else {
5022 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5023 return PrintError("invalid th value for load instructions");
5024 }
5025
5026 return true;
5027}
5028
5029bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5030 if (!isGFX11Plus())
5031 return true;
5032 for (auto &Operand : Operands) {
5033 if (!Operand->isReg())
5034 continue;
5035 unsigned Reg = Operand->getReg();
5036 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5037 Error(getRegLoc(Reg, Operands),
5038 "execz and vccz are not supported on this GPU");
5039 return false;
5040 }
5041 }
5042 return true;
5043}
5044
5045bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5046 const OperandVector &Operands) {
5047 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5048 if (Desc.mayStore() &&
5050 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5051 if (Loc != getInstLoc(Operands)) {
5052 Error(Loc, "TFE modifier has no meaning for store instructions");
5053 return false;
5054 }
5055 }
5056
5057 return true;
5058}
5059
5060bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5061 const SMLoc &IDLoc,
5062 const OperandVector &Operands) {
5063 if (auto ErrMsg = validateLdsDirect(Inst)) {
5064 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5065 return false;
5066 }
5067 if (!validateSOPLiteral(Inst)) {
5068 Error(getLitLoc(Operands),
5069 "only one unique literal operand is allowed");
5070 return false;
5071 }
5072 if (!validateVOPLiteral(Inst, Operands)) {
5073 return false;
5074 }
5075 if (!validateConstantBusLimitations(Inst, Operands)) {
5076 return false;
5077 }
5078 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5079 return false;
5080 }
5081 if (!validateIntClampSupported(Inst)) {
5082 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
5083 "integer clamping is not supported on this GPU");
5084 return false;
5085 }
5086 if (!validateOpSel(Inst)) {
5087 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5088 "invalid op_sel operand");
5089 return false;
5090 }
5091 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5092 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5093 "invalid neg_lo operand");
5094 return false;
5095 }
5096 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5097 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5098 "invalid neg_hi operand");
5099 return false;
5100 }
5101 if (!validateDPP(Inst, Operands)) {
5102 return false;
5103 }
5104 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5105 if (!validateMIMGD16(Inst)) {
5106 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5107 "d16 modifier is not supported on this GPU");
5108 return false;
5109 }
5110 if (!validateMIMGMSAA(Inst)) {
5111 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5112 "invalid dim; must be MSAA type");
5113 return false;
5114 }
5115 if (!validateMIMGDataSize(Inst, IDLoc)) {
5116 return false;
5117 }
5118 if (!validateMIMGAddrSize(Inst, IDLoc))
5119 return false;
5120 if (!validateMIMGAtomicDMask(Inst)) {
5121 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5122 "invalid atomic image dmask");
5123 return false;
5124 }
5125 if (!validateMIMGGatherDMask(Inst)) {
5126 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5127 "invalid image_gather dmask: only one bit must be set");
5128 return false;
5129 }
5130 if (!validateMovrels(Inst, Operands)) {
5131 return false;
5132 }
5133 if (!validateOffset(Inst, Operands)) {
5134 return false;
5135 }
5136 if (!validateMAIAccWrite(Inst, Operands)) {
5137 return false;
5138 }
5139 if (!validateMAISrc2(Inst, Operands)) {
5140 return false;
5141 }
5142 if (!validateMFMA(Inst, Operands)) {
5143 return false;
5144 }
5145 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5146 return false;
5147 }
5148
5149 if (!validateAGPRLdSt(Inst)) {
5150 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5151 ? "invalid register class: data and dst should be all VGPR or AGPR"
5152 : "invalid register class: agpr loads and stores not supported on this GPU"
5153 );
5154 return false;
5155 }
5156 if (!validateVGPRAlign(Inst)) {
5157 Error(IDLoc,
5158 "invalid register class: vgpr tuples must be 64 bit aligned");
5159 return false;
5160 }
5161 if (!validateDS(Inst, Operands)) {
5162 return false;
5163 }
5164
5165 if (!validateBLGP(Inst, Operands)) {
5166 return false;
5167 }
5168
5169 if (!validateDivScale(Inst)) {
5170 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5171 return false;
5172 }
5173 if (!validateWaitCnt(Inst, Operands)) {
5174 return false;
5175 }
5176 if (!validateExeczVcczOperands(Operands)) {
5177 return false;
5178 }
5179 if (!validateTFE(Inst, Operands)) {
5180 return false;
5181 }
5182
5183 return true;
5184}
5185
5187 const FeatureBitset &FBS,
5188 unsigned VariantID = 0);
5189
5190static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5191 const FeatureBitset &AvailableFeatures,
5192 unsigned VariantID);
5193
5194bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5195 const FeatureBitset &FBS) {
5196 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5197}
5198
5199bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5200 const FeatureBitset &FBS,
5201 ArrayRef<unsigned> Variants) {
5202 for (auto Variant : Variants) {
5203 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5204 return true;
5205 }
5206
5207 return false;
5208}
5209
5210bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5211 const SMLoc &IDLoc) {
5212 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5213
5214 // Check if requested instruction variant is supported.
5215 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5216 return false;
5217
5218 // This instruction is not supported.
5219 // Clear any other pending errors because they are no longer relevant.
5220 getParser().clearPendingErrors();
5221
5222 // Requested instruction variant is not supported.
5223 // Check if any other variants are supported.
5224 StringRef VariantName = getMatchedVariantName();
5225 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5226 return Error(IDLoc,
5227 Twine(VariantName,
5228 " variant of this instruction is not supported"));
5229 }
5230
5231 // Check if this instruction may be used with a different wavesize.
5232 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5233 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5234
5235 FeatureBitset FeaturesWS32 = getFeatureBits();
5236 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5237 .flip(AMDGPU::FeatureWavefrontSize32);
5238 FeatureBitset AvailableFeaturesWS32 =
5239 ComputeAvailableFeatures(FeaturesWS32);
5240
5241 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5242 return Error(IDLoc, "instruction requires wavesize=32");
5243 }
5244
5245 // Finally check if this instruction is supported on any other GPU.
5246 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5247 return Error(IDLoc, "instruction not supported on this GPU");
5248 }
5249
5250 // Instruction not supported on any GPU. Probably a typo.
5251 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5252 return Error(IDLoc, "invalid instruction" + Suggestion);
5253}
5254
5256 uint64_t InvalidOprIdx) {
5257 assert(InvalidOprIdx < Operands.size());
5258 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5259 if (Op.isToken() && InvalidOprIdx > 1) {
5260 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5261 return PrevOp.isToken() && PrevOp.getToken() == "::";
5262 }
5263 return false;
5264}
5265
5266bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5268 MCStreamer &Out,
5270 bool MatchingInlineAsm) {
5271 MCInst Inst;
5272 unsigned Result = Match_Success;
5273 for (auto Variant : getMatchedVariants()) {
5274 uint64_t EI;
5275 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5276 Variant);
5277 // We order match statuses from least to most specific. We use most specific
5278 // status as resulting
5279 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5280 if ((R == Match_Success) ||
5281 (R == Match_PreferE32) ||
5282 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5283 (R == Match_InvalidOperand && Result != Match_MissingFeature
5284 && Result != Match_PreferE32) ||
5285 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5286 && Result != Match_MissingFeature
5287 && Result != Match_PreferE32)) {
5288 Result = R;
5289 ErrorInfo = EI;
5290 }
5291 if (R == Match_Success)
5292 break;
5293 }
5294
5295 if (Result == Match_Success) {
5296 if (!validateInstruction(Inst, IDLoc, Operands)) {
5297 return true;
5298 }
5299 Inst.setLoc(IDLoc);
5300 Out.emitInstruction(Inst, getSTI());
5301 return false;
5302 }
5303
5304 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5305 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5306 return true;
5307 }
5308
5309 switch (Result) {
5310 default: break;
5311 case Match_MissingFeature:
5312 // It has been verified that the specified instruction
5313 // mnemonic is valid. A match was found but it requires
5314 // features which are not supported on this GPU.
5315 return Error(IDLoc, "operands are not valid for this GPU or mode");
5316
5317 case Match_InvalidOperand: {
5318 SMLoc ErrorLoc = IDLoc;
5319 if (ErrorInfo != ~0ULL) {
5320 if (ErrorInfo >= Operands.size()) {
5321 return Error(IDLoc, "too few operands for instruction");
5322 }
5323 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5324 if (ErrorLoc == SMLoc())
5325 ErrorLoc = IDLoc;
5326
5328 return Error(ErrorLoc, "invalid VOPDY instruction");
5329 }
5330 return Error(ErrorLoc, "invalid operand for instruction");
5331 }
5332
5333 case Match_PreferE32:
5334 return Error(IDLoc, "internal error: instruction without _e64 suffix "
5335 "should be encoded as e32");
5336 case Match_MnemonicFail:
5337 llvm_unreachable("Invalid instructions should have been handled already");
5338 }
5339 llvm_unreachable("Implement any new match types added!");
5340}
5341
5342bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5343 int64_t Tmp = -1;
5344 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5345 return true;
5346 }
5347 if (getParser().parseAbsoluteExpression(Tmp)) {
5348 return true;
5349 }
5350 Ret = static_cast<uint32_t>(Tmp);
5351 return false;
5352}
5353
5354bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5355 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5356 return TokError("directive only supported for amdgcn architecture");
5357
5358 std::string TargetIDDirective;
5359 SMLoc TargetStart = getTok().getLoc();
5360 if (getParser().parseEscapedString(TargetIDDirective))
5361 return true;
5362
5363 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5364 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5365 return getParser().Error(TargetRange.Start,
5366 (Twine(".amdgcn_target directive's target id ") +
5367 Twine(TargetIDDirective) +
5368 Twine(" does not match the specified target id ") +
5369 Twine(getTargetStreamer().getTargetID()->toString())).str());
5370
5371 return false;
5372}
5373
5374bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5375 return Error(Range.Start, "value out of range", Range);
5376}
5377
5378bool AMDGPUAsmParser::calculateGPRBlocks(
5379 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5380 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5381 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5382 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5383 // TODO(scott.linder): These calculations are duplicated from
5384 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5385 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5386
5387 unsigned NumVGPRs = NextFreeVGPR;
5388 unsigned NumSGPRs = NextFreeSGPR;
5389
5390 if (Version.Major >= 10)
5391 NumSGPRs = 0;
5392 else {
5393 unsigned MaxAddressableNumSGPRs =
5395
5396 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5397 NumSGPRs > MaxAddressableNumSGPRs)
5398 return OutOfRangeError(SGPRRange);
5399
5400 NumSGPRs +=
5401 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5402
5403 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5404 NumSGPRs > MaxAddressableNumSGPRs)
5405 return OutOfRangeError(SGPRRange);
5406
5407 if (Features.test(FeatureSGPRInitBug))
5409 }
5410
5411 VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
5412 EnableWavefrontSize32);
5413 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5414
5415 return false;
5416}
5417
5418bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5419 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5420 return TokError("directive only supported for amdgcn architecture");
5421
5422 if (!isHsaAbi(getSTI()))
5423 return TokError("directive only supported for amdhsa OS");
5424
5425 StringRef KernelName;
5426 if (getParser().parseIdentifier(KernelName))
5427 return true;
5428
5431 &getSTI(), getContext());
5432
5433 StringSet<> Seen;
5434
5435 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5436
5437 SMRange VGPRRange;
5438 uint64_t NextFreeVGPR = 0;
5439 uint64_t AccumOffset = 0;
5440 uint64_t SharedVGPRCount = 0;
5441 uint64_t PreloadLength = 0;
5442 uint64_t PreloadOffset = 0;
5443 SMRange SGPRRange;
5444 uint64_t NextFreeSGPR = 0;
5445
5446 // Count the number of user SGPRs implied from the enabled feature bits.
5447 unsigned ImpliedUserSGPRCount = 0;
5448
5449 // Track if the asm explicitly contains the directive for the user SGPR
5450 // count.
5451 std::optional<unsigned> ExplicitUserSGPRCount;
5452 bool ReserveVCC = true;
5453 bool ReserveFlatScr = true;
5454 std::optional<bool> EnableWavefrontSize32;
5455
5456 while (true) {
5457 while (trySkipToken(AsmToken::EndOfStatement));
5458
5459 StringRef ID;
5460 SMRange IDRange = getTok().getLocRange();
5461 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5462 return true;
5463
5464 if (ID == ".end_amdhsa_kernel")
5465 break;
5466
5467 if (!Seen.insert(ID).second)
5468 return TokError(".amdhsa_ directives cannot be repeated");
5469
5470 SMLoc ValStart = getLoc();
5471 const MCExpr *ExprVal;
5472 if (getParser().parseExpression(ExprVal))
5473 return true;
5474 SMLoc ValEnd = getLoc();
5475 SMRange ValRange = SMRange(ValStart, ValEnd);
5476
5477 int64_t IVal = 0;
5478 uint64_t Val = IVal;
5479 bool EvaluatableExpr;
5480 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5481 if (IVal < 0)
5482 return OutOfRangeError(ValRange);
5483 Val = IVal;
5484 }
5485
5486#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5487 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5488 return OutOfRangeError(RANGE); \
5489 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5490 getContext());
5491
5492// Some fields use the parsed value immediately which requires the expression to
5493// be solvable.
5494#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5495 if (!(RESOLVED)) \
5496 return Error(IDRange.Start, "directive should have resolvable expression", \
5497 IDRange);
5498
5499 if (ID == ".amdhsa_group_segment_fixed_size") {
5501 CHAR_BIT>(Val))
5502 return OutOfRangeError(ValRange);
5503 KD.group_segment_fixed_size = ExprVal;
5504 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5506 CHAR_BIT>(Val))
5507 return OutOfRangeError(ValRange);
5508 KD.private_segment_fixed_size = ExprVal;
5509 } else if (ID == ".amdhsa_kernarg_size") {
5510 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5511 return OutOfRangeError(ValRange);
5512 KD.kernarg_size = ExprVal;
5513 } else if (ID == ".amdhsa_user_sgpr_count") {
5514 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5515 ExplicitUserSGPRCount = Val;
5516 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5517 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5519 return Error(IDRange.Start,
5520 "directive is not supported with architected flat scratch",
5521 IDRange);
5523 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5524 ExprVal, ValRange);
5525 if (Val)
5526 ImpliedUserSGPRCount += 4;
5527 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5528 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5529 if (!hasKernargPreload())
5530 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5531
5532 if (Val > getMaxNumUserSGPRs())
5533 return OutOfRangeError(ValRange);
5534 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5535 ValRange);
5536 if (Val) {
5537 ImpliedUserSGPRCount += Val;
5538 PreloadLength = Val;
5539 }
5540 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5541 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5542 if (!hasKernargPreload())
5543 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5544
5545 if (Val >= 1024)
5546 return OutOfRangeError(ValRange);
5547 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5548 ValRange);
5549 if (Val)
5550 PreloadOffset = Val;
5551 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5552 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5554 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5555 ValRange);
5556 if (Val)
5557 ImpliedUserSGPRCount += 2;
5558 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5559 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5561 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5562 ValRange);
5563 if (Val)
5564 ImpliedUserSGPRCount += 2;
5565 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5566 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5568 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5569 ExprVal, ValRange);
5570 if (Val)
5571 ImpliedUserSGPRCount += 2;
5572 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5573 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5575 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5576 ValRange);
5577 if (Val)
5578 ImpliedUserSGPRCount += 2;
5579 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5581 return Error(IDRange.Start,
5582 "directive is not supported with architected flat scratch",
5583 IDRange);
5584 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5586 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5587 ExprVal, ValRange);
5588 if (Val)
5589 ImpliedUserSGPRCount += 2;
5590 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5591 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5593 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5594 ExprVal, ValRange);
5595 if (Val)
5596 ImpliedUserSGPRCount += 1;
5597 } else if (ID == ".amdhsa_wavefront_size32") {
5598 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5599 if (IVersion.Major < 10)
5600 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5601 EnableWavefrontSize32 = Val;
5603 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5604 ValRange);
5605 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5607 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5608 ValRange);
5609 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5611 return Error(IDRange.Start,
5612 "directive is not supported with architected flat scratch",
5613 IDRange);
5615 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5616 ValRange);
5617 } else if (ID == ".amdhsa_enable_private_segment") {
5619 return Error(
5620 IDRange.Start,
5621 "directive is not supported without architected flat scratch",
5622 IDRange);
5624 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5625 ValRange);
5626 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5628 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5629 ValRange);
5630 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5632 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5633 ValRange);
5634 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5636 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5637 ValRange);
5638 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5640 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5641 ValRange);
5642 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5644 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5645 ValRange);
5646 } else if (ID == ".amdhsa_next_free_vgpr") {
5647 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5648 VGPRRange = ValRange;
5649 NextFreeVGPR = Val;
5650 } else if (ID == ".amdhsa_next_free_sgpr") {
5651 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5652 SGPRRange = ValRange;
5653 NextFreeSGPR = Val;
5654 } else if (ID == ".amdhsa_accum_offset") {
5655 if (!isGFX90A())
5656 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5657 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5658 AccumOffset = Val;
5659 } else if (ID == ".amdhsa_reserve_vcc") {
5660 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5661 if (!isUInt<1>(Val))
5662 return OutOfRangeError(ValRange);
5663 ReserveVCC = Val;
5664 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5665 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5666 if (IVersion.Major < 7)
5667 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5669 return Error(IDRange.Start,
5670 "directive is not supported with architected flat scratch",
5671 IDRange);
5672 if (!isUInt<1>(Val))
5673 return OutOfRangeError(ValRange);
5674 ReserveFlatScr = Val;
5675 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5676 if (IVersion.Major < 8)
5677 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5678 if (!isUInt<1>(Val))
5679 return OutOfRangeError(ValRange);
5680 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5681 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5682 IDRange);
5683 } else if (ID == ".amdhsa_float_round_mode_32") {
5685 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5686 ValRange);
5687 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5689 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5690 ValRange);
5691 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5693 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5694 ValRange);
5695 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5697 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5698 ValRange);
5699 } else if (ID == ".amdhsa_dx10_clamp") {
5700 if (IVersion.Major >= 12)
5701 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5703 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5704 ValRange);
5705 } else if (ID == ".amdhsa_ieee_mode") {
5706 if (IVersion.Major >= 12)
5707 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5709 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5710 ValRange);
5711 } else if (ID == ".amdhsa_fp16_overflow") {
5712 if (IVersion.Major < 9)
5713 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5715 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5716 ValRange);
5717 } else if (ID == ".amdhsa_tg_split") {
5718 if (!isGFX90A())
5719 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5720 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5721 ExprVal, ValRange);
5722 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5723 if (IVersion.Major < 10)
5724 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5726 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5727 ValRange);
5728 } else if (ID == ".amdhsa_memory_ordered") {
5729 if (IVersion.Major < 10)
5730 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5732 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5733 ValRange);
5734 } else if (ID == ".amdhsa_forward_progress") {
5735 if (IVersion.Major < 10)
5736 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5738 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5739 ValRange);
5740 } else if (ID == ".amdhsa_shared_vgpr_count") {
5741 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5742 if (IVersion.Major < 10 || IVersion.Major >= 12)
5743 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5744 IDRange);
5745 SharedVGPRCount = Val;
5747 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5748 ValRange);
5749 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5752 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5753 ExprVal, ValRange);
5754 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5756 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5757 ExprVal, ValRange);
5758 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5761 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5762 ExprVal, ValRange);
5763 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5765 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5766 ExprVal, ValRange);
5767 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5769 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5770 ExprVal, ValRange);
5771 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5773 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5774 ExprVal, ValRange);
5775 } else if (ID == ".amdhsa_exception_int_div_zero") {
5777 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5778 ExprVal, ValRange);
5779 } else if (ID == ".amdhsa_round_robin_scheduling") {
5780 if (IVersion.Major < 12)
5781 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5783 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5784 ValRange);
5785 } else {
5786 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5787 }
5788
5789#undef PARSE_BITS_ENTRY
5790 }
5791
5792 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5793 return TokError(".amdhsa_next_free_vgpr directive is required");
5794
5795 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5796 return TokError(".amdhsa_next_free_sgpr directive is required");
5797
5798 unsigned VGPRBlocks;
5799 unsigned SGPRBlocks;
5800 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5801 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5802 EnableWavefrontSize32, NextFreeVGPR,
5803 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5804 SGPRBlocks))
5805 return true;
5806
5807 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5808 VGPRBlocks))
5809 return OutOfRangeError(VGPRRange);
5811 KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
5812 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5813 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5814
5815 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5816 SGPRBlocks))
5817 return OutOfRangeError(SGPRRange);
5819 KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
5820 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5821 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5822
5823 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5824 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5825 "enabled user SGPRs");
5826
5827 unsigned UserSGPRCount =
5828 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5829
5830 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5831 return TokError("too many user SGPRs enabled");
5833 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5834 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5835 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5836
5837 int64_t IVal = 0;
5838 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5839 return TokError("Kernarg size should be resolvable");
5840 uint64_t kernarg_size = IVal;
5841 if (PreloadLength && kernarg_size &&
5842 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5843 return TokError("Kernarg preload length + offset is larger than the "
5844 "kernarg segment size");
5845
5846 if (isGFX90A()) {
5847 if (!Seen.contains(".amdhsa_accum_offset"))
5848 return TokError(".amdhsa_accum_offset directive is required");
5849 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5850 return TokError("accum_offset should be in range [4..256] in "
5851 "increments of 4");
5852 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5853 return TokError("accum_offset exceeds total VGPR allocation");
5856 MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
5857 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5858 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5859 }
5860
5861 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5862 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5863 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5864 return TokError("shared_vgpr_count directive not valid on "
5865 "wavefront size 32");
5866 }
5867 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5868 return TokError("shared_vgpr_count*2 + "
5869 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5870 "exceed 63\n");
5871 }
5872 }
5873
5874 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5875 NextFreeVGPR, NextFreeSGPR,
5876 ReserveVCC, ReserveFlatScr);
5877 return false;
5878}
5879
5880bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5882 if (ParseAsAbsoluteExpression(Version))
5883 return true;
5884
5885 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5886 return false;
5887}
5888
5889bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5890 amd_kernel_code_t &Header) {
5891 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5892 // assembly for backwards compatibility.
5893 if (ID == "max_scratch_backing_memory_byte_size") {
5894 Parser.eatToEndOfStatement();
5895 return false;
5896 }
5897
5898 SmallString<40> ErrStr;
5899 raw_svector_ostream Err(ErrStr);
5900 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5901 return TokError(Err.str());
5902 }
5903 Lex();
5904
5905 if (ID == "enable_dx10_clamp") {
5906 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5907 isGFX12Plus())
5908 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5909 }
5910
5911 if (ID == "enable_ieee_mode") {
5912 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5913 isGFX12Plus())
5914 return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5915 }
5916
5917 if (ID == "enable_wavefront_size32") {
5918 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5919 if (!isGFX10Plus())
5920 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5921 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5922 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5923 } else {
5924 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5925 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5926 }
5927 }
5928
5929 if (ID == "wavefront_size") {
5930 if (Header.wavefront_size == 5) {
5931 if (!isGFX10Plus())
5932 return TokError("wavefront_size=5 is only allowed on GFX10+");
5933 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5934 return TokError("wavefront_size=5 requires +WavefrontSize32");
5935 } else if (Header.wavefront_size == 6) {
5936 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5937 return TokError("wavefront_size=6 requires +WavefrontSize64");
5938 }
5939 }
5940
5941 if (ID == "enable_wgp_mode") {
5942 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5943 !isGFX10Plus())
5944 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5945 }
5946
5947 if (ID == "enable_mem_ordered") {
5948 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5949 !isGFX10Plus())
5950 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5951 }
5952
5953 if (ID == "enable_fwd_progress") {
5954 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5955 !isGFX10Plus())
5956 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5957 }
5958
5959 return false;
5960}
5961
5962bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5963 amd_kernel_code_t Header;
5964 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5965
5966 while (true) {
5967 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5968 // will set the current token to EndOfStatement.
5969 while(trySkipToken(AsmToken::EndOfStatement));
5970
5971 StringRef ID;
5972 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5973 return true;
5974
5975 if (ID == ".end_amd_kernel_code_t")
5976 break;
5977
5978 if (ParseAMDKernelCodeTValue(ID, Header))
5979 return true;
5980 }
5981
5982 getTargetStreamer().EmitAMDKernelCodeT(Header);
5983
5984 return false;
5985}
5986
5987bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5988 StringRef KernelName;
5989 if (!parseId(KernelName, "expected symbol name"))
5990 return true;
5991
5992 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5994
5995 KernelScope.initialize(getContext());
5996 return false;
5997}
5998
5999bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6000 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6001 return Error(getLoc(),
6002 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6003 "architectures");
6004 }
6005
6006 auto TargetIDDirective = getLexer().getTok().getStringContents();
6007 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6008 return Error(getParser().getTok().getLoc(), "target id must match options");
6009
6010 getTargetStreamer().EmitISAVersion();
6011 Lex();
6012
6013 return false;
6014}
6015
6016bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6017 assert(isHsaAbi(getSTI()));
6018
6019 std::string HSAMetadataString;
6020 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6021 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6022 return true;
6023
6024 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6025 return Error(getLoc(), "invalid HSA metadata");
6026
6027 return false;
6028}
6029
6030/// Common code to parse out a block of text (typically YAML) between start and
6031/// end directives.
6032bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6033 const char *AssemblerDirectiveEnd,
6034 std::string &CollectString) {
6035
6036 raw_string_ostream CollectStream(CollectString);
6037
6038 getLexer().setSkipSpace(false);
6039
6040 bool FoundEnd = false;
6041 while (!isToken(AsmToken::Eof)) {
6042 while (isToken(AsmToken::Space)) {
6043 CollectStream << getTokenStr();
6044 Lex();
6045 }
6046
6047 if (trySkipId(AssemblerDirectiveEnd)) {
6048 FoundEnd = true;
6049 break;
6050 }
6051
6052 CollectStream << Parser.parseStringToEndOfStatement()
6053 << getContext().getAsmInfo()->getSeparatorString();
6054
6055 Parser.eatToEndOfStatement();
6056 }
6057
6058 getLexer().setSkipSpace(true);
6059
6060 if (isToken(AsmToken::Eof) && !FoundEnd) {
6061 return TokError(Twine("expected directive ") +
6062 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6063 }
6064
6065 CollectStream.flush();
6066 return false;
6067}
6068
6069/// Parse the assembler directive for new MsgPack-format PAL metadata.
6070bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6071 std::string String;
6072 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6074 return true;
6075
6076 auto PALMetadata = getTargetStreamer().getPALMetadata();
6077 if (!PALMetadata->setFromString(String))
6078 return Error(getLoc(), "invalid PAL metadata");
6079 return false;
6080}
6081
6082/// Parse the assembler directive for old linear-format PAL metadata.
6083bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6084 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6085 return Error(getLoc(),
6086 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6087 "not available on non-amdpal OSes")).str());
6088 }
6089
6090 auto PALMetadata = getTargetStreamer().getPALMetadata();
6091 PALMetadata->setLegacy();
6092 for (;;) {
6094 if (ParseAsAbsoluteExpression(Key)) {
6095 return TokError(Twine("invalid value in ") +
6097 }
6098 if (!trySkipToken(AsmToken::Comma)) {
6099 return TokError(Twine("expected an even number of values in ") +
6101 }
6102 if (ParseAsAbsoluteExpression(Value)) {
6103 return TokError(Twine("invalid value in ") +
6105 }
6106 PALMetadata->setRegister(Key, Value);
6107 if (!trySkipToken(AsmToken::Comma))
6108 break;
6109 }
6110 return false;
6111}
6112
6113/// ParseDirectiveAMDGPULDS
6114/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6115bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6116 if (getParser().checkForValidSection())
6117 return true;
6118
6120 SMLoc NameLoc = getLoc();
6121 if (getParser().parseIdentifier(Name))
6122 return TokError("expected identifier in directive");
6123
6124 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6125 if (getParser().parseComma())
6126 return true;
6127
6128 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6129
6130 int64_t Size;
6131 SMLoc SizeLoc = getLoc();
6132 if (getParser().parseAbsoluteExpression(Size))
6133 return true;
6134 if (Size < 0)
6135 return Error(SizeLoc, "size must be non-negative");
6136 if (Size > LocalMemorySize)
6137 return Error(SizeLoc, "size is too large");
6138
6139 int64_t Alignment = 4;
6140 if (trySkipToken(AsmToken::Comma)) {
6141 SMLoc AlignLoc = getLoc();
6142 if (getParser().parseAbsoluteExpression(Alignment))
6143 return true;
6144 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6145 return Error(AlignLoc, "alignment must be a power of two");
6146
6147 // Alignment larger than the size of LDS is possible in theory, as long
6148 // as the linker manages to place to symbol at address 0, but we do want
6149 // to make sure the alignment fits nicely into a 32-bit integer.
6150 if (Alignment >= 1u << 31)
6151 return Error(AlignLoc, "alignment is too large");
6152 }
6153
6154 if (parseEOL())
6155 return true;
6156
6157 Symbol->redefineIfPossible();
6158 if (!Symbol->isUndefined())
6159 return Error(NameLoc, "invalid symbol redefinition");
6160
6161 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6162 return false;
6163}
6164
6165bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6166 StringRef IDVal = DirectiveID.getString();
6167
6168 if (isHsaAbi(getSTI())) {
6169 if (IDVal == ".amdhsa_kernel")
6170 return ParseDirectiveAMDHSAKernel();
6171
6172 if (IDVal == ".amdhsa_code_object_version")
6173 return ParseDirectiveAMDHSACodeObjectVersion();
6174
6175 // TODO: Restructure/combine with PAL metadata directive.
6177 return ParseDirectiveHSAMetadata();
6178 } else {
6179 if (IDVal == ".amd_kernel_code_t")
6180 return ParseDirectiveAMDKernelCodeT();
6181
6182 if (IDVal == ".amdgpu_hsa_kernel")
6183 return ParseDirectiveAMDGPUHsaKernel();
6184
6185 if (IDVal == ".amd_amdgpu_isa")
6186 return ParseDirectiveISAVersion();
6187
6189 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6190 Twine(" directive is "
6191 "not available on non-amdhsa OSes"))
6192 .str());
6193 }
6194 }
6195
6196 if (IDVal == ".amdgcn_target")
6197 return ParseDirectiveAMDGCNTarget();
6198
6199 if (IDVal == ".amdgpu_lds")
6200 return ParseDirectiveAMDGPULDS();
6201
6202 if (IDVal == PALMD::AssemblerDirectiveBegin)
6203 return ParseDirectivePALMetadataBegin();
6204
6205 if (IDVal == PALMD::AssemblerDirective)
6206 return ParseDirectivePALMetadata();
6207
6208 return true;
6209}
6210
6211bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6212 unsigned RegNo) {
6213
6214 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6215 return isGFX9Plus();
6216
6217 // GFX10+ has 2 more SGPRs 104 and 105.
6218 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6219 return hasSGPR104_SGPR105();
6220
6221 switch (RegNo) {
6222 case AMDGPU::SRC_SHARED_BASE_LO:
6223 case AMDGPU::SRC_SHARED_BASE:
6224 case AMDGPU::SRC_SHARED_LIMIT_LO:
6225 case AMDGPU::SRC_SHARED_LIMIT:
6226 case AMDGPU::SRC_PRIVATE_BASE_LO:
6227 case AMDGPU::SRC_PRIVATE_BASE:
6228 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6229 case AMDGPU::SRC_PRIVATE_LIMIT:
6230 return isGFX9Plus();
6231 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6232 return isGFX9Plus() && !isGFX11Plus();
6233 case AMDGPU::TBA:
6234 case AMDGPU::TBA_LO:
6235 case AMDGPU::TBA_HI:
6236 case AMDGPU::TMA:
6237 case AMDGPU::TMA_LO:
6238 case AMDGPU::TMA_HI:
6239 return !isGFX9Plus();
6240 case AMDGPU::XNACK_MASK:
6241 case AMDGPU::XNACK_MASK_LO:
6242 case AMDGPU::XNACK_MASK_HI:
6243 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6244 case AMDGPU::SGPR_NULL:
6245 return isGFX10Plus();
6246 default:
6247 break;
6248 }
6249
6250 if (isCI())
6251 return true;
6252
6253 if (isSI() || isGFX10Plus()) {
6254 // No flat_scr on SI.
6255 // On GFX10Plus flat scratch is not a valid register operand and can only be
6256 // accessed with s_setreg/s_getreg.
6257 switch (RegNo) {
6258 case AMDGPU::FLAT_SCR:
6259 case AMDGPU::FLAT_SCR_LO:
6260 case AMDGPU::FLAT_SCR_HI:
6261 return false;
6262 default:
6263 return true;
6264 }
6265 }
6266
6267 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6268 // SI/CI have.
6269 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6270 return hasSGPR102_SGPR103();
6271
6272 return true;
6273}
6274
6275ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6276 StringRef Mnemonic,
6277 OperandMode Mode) {
6278 ParseStatus Res = parseVOPD(Operands);
6279 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6280 return Res;
6281
6282 // Try to parse with a custom parser
6283 Res = MatchOperandParserImpl(Operands, Mnemonic);
6284
6285 // If we successfully parsed the operand or if there as an error parsing,
6286 // we are done.
6287 //
6288 // If we are parsing after we reach EndOfStatement then this means we
6289 // are appending default values to the Operands list. This is only done
6290 // by custom parser, so we shouldn't continue on to the generic parsing.
6291 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6292 return Res;
6293
6294 SMLoc RBraceLoc;
6295 SMLoc LBraceLoc = getLoc();
6296 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6297 unsigned Prefix = Operands.size();
6298
6299 for (;;) {
6300 auto Loc = getLoc();
6301 Res = parseReg(Operands);
6302 if (Res.isNoMatch())
6303 Error(Loc, "expected a register");
6304 if (!Res.isSuccess())
6305 return ParseStatus::Failure;
6306
6307 RBraceLoc = getLoc();
6308 if (trySkipToken(AsmToken::RBrac))
6309 break;
6310
6311 if (!skipToken(AsmToken::Comma,
6312 "expected a comma or a closing square bracket"))
6313 return ParseStatus::Failure;
6314 }
6315
6316 if (Operands.size() - Prefix > 1) {
6317 Operands.insert(Operands.begin() + Prefix,
6318 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6319 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6320 }
6321
6322 return ParseStatus::Success;
6323 }
6324
6325 return parseRegOrImm(Operands);
6326}
6327
6328StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6329 // Clear any forced encodings from the previous instruction.
6330 setForcedEncodingSize(0);
6331 setForcedDPP(false);
6332 setForcedSDWA(false);
6333
6334 if (Name.ends_with("_e64_dpp")) {
6335 setForcedDPP(true);
6336 setForcedEncodingSize(64);
6337 return Name.substr(0, Name.size() - 8);
6338 } else if (Name.ends_with("_e64")) {
6339 setForcedEncodingSize(64);
6340 return Name.substr(0, Name.size() - 4);
6341 } else if (Name.ends_with("_e32")) {
6342 setForcedEncodingSize(32);
6343 return Name.substr(0, Name.size() - 4);
6344 } else if (Name.ends_with("_dpp")) {
6345 setForcedDPP(true);
6346 return Name.substr(0, Name.size() - 4);
6347 } else if (Name.ends_with("_sdwa")) {
6348 setForcedSDWA(true);
6349 return Name.substr(0, Name.size() - 5);
6350 }
6351 return Name;
6352}
6353
6354static void applyMnemonicAliases(StringRef &Mnemonic,
6355 const FeatureBitset &Features,
6356 unsigned VariantID);
6357
6358bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6360 SMLoc NameLoc, OperandVector &Operands) {
6361 // Add the instruction mnemonic
6362 Name = parseMnemonicSuffix(Name);
6363
6364 // If the target architecture uses MnemonicAlias, call it here to parse
6365 // operands correctly.
6366 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6367
6368 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6369
6370 bool IsMIMG = Name.starts_with("image_");
6371
6372 while (!trySkipToken(AsmToken::EndOfStatement)) {
6373 OperandMode Mode = OperandMode_Default;
6374 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6375 Mode = OperandMode_NSA;
6376 ParseStatus Res = parseOperand(Operands, Name, Mode);
6377
6378 if (!Res.isSuccess()) {
6379 checkUnsupportedInstruction(Name, NameLoc);
6380 if (!Parser.hasPendingError()) {
6381 // FIXME: use real operand location rather than the current location.
6382 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6383 : "not a valid operand.";
6384 Error(getLoc(), Msg);
6385 }
6386 while (!trySkipToken(AsmToken::EndOfStatement)) {
6387 lex();
6388 }
6389 return true;
6390 }
6391
6392 // Eat the comma or space if there is one.
6393 trySkipToken(AsmToken::Comma);
6394 }
6395
6396 return false;
6397}
6398
6399//===----------------------------------------------------------------------===//
6400// Utility functions
6401//===----------------------------------------------------------------------===//
6402
6403ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6405 SMLoc S = getLoc();
6406 if (!trySkipId(Name))
6407 return ParseStatus::NoMatch;
6408
6409 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6410 return ParseStatus::Success;
6411}
6412
6413ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6414 int64_t &IntVal) {
6415
6416 if (!trySkipId(Prefix, AsmToken::Colon))
6417 return ParseStatus::NoMatch;
6418
6419 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6420}
6421
6422ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6423 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6424 std::function<bool(int64_t &)> ConvertResult) {
6425 SMLoc S = getLoc();
6426 int64_t Value = 0;
6427
6428 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6429 if (!Res.isSuccess())
6430 return Res;
6431
6432 if (ConvertResult && !ConvertResult(Value)) {
6433 Error(S, "invalid " + StringRef(Prefix) + " value.");
6434 }
6435
6436 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6437 return ParseStatus::Success;
6438}
6439
6440ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6441 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6442 bool (*ConvertResult)(int64_t &)) {
6443 SMLoc S = getLoc();
6444 if (!trySkipId(Prefix, AsmToken::Colon))
6445 return ParseStatus::NoMatch;
6446
6447 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6448 return ParseStatus::Failure;
6449
6450 unsigned Val = 0;
6451 const unsigned MaxSize = 4;
6452
6453 // FIXME: How to verify the number of elements matches the number of src
6454 // operands?
6455 for (int I = 0; ; ++I) {
6456 int64_t Op;
6457 SMLoc Loc = getLoc();
6458 if (!parseExpr(Op))
6459 return ParseStatus::Failure;
6460
6461 if (Op != 0 && Op != 1)
6462 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6463
6464 Val |= (Op << I);
6465
6466 if (trySkipToken(AsmToken::RBrac))
6467 break;
6468
6469 if (I + 1 == MaxSize)
6470 return Error(getLoc(), "expected a closing square bracket");
6471
6472 if (!skipToken(AsmToken::Comma, "expected a comma"))
6473 return ParseStatus::Failure;
6474 }
6475
6476 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6477 return ParseStatus::Success;
6478}
6479
6480ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6482 AMDGPUOperand::ImmTy ImmTy) {
6483 int64_t Bit;
6484 SMLoc S = getLoc();
6485
6486 if (trySkipId(Name)) {
6487 Bit = 1;
6488 } else if (trySkipId("no", Name)) {
6489 Bit = 0;
6490 } else {
6491 return ParseStatus::NoMatch;
6492 }
6493
6494 if (Name == "r128" && !hasMIMG_R128())
6495 return Error(S, "r128 modifier is not supported on this GPU");
6496 if (Name == "a16" && !hasA16())
6497 return Error(S, "a16 modifier is not supported on this GPU");
6498
6499 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6500 ImmTy = AMDGPUOperand::ImmTyR128A16;
6501
6502 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6503 return ParseStatus::Success;
6504}
6505
6506unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6507 bool &Disabling) const {
6508 Disabling = Id.consume_front("no");
6509
6510 if (isGFX940() && !Mnemo.starts_with("s_")) {
6511 return StringSwitch<unsigned>(Id)
6512 .Case("nt", AMDGPU::CPol::NT)
6513 .Case("sc0", AMDGPU::CPol::SC0)
6514 .Case("sc1", AMDGPU::CPol::SC1)
6515 .Default(0);
6516 }
6517
6518 return StringSwitch<unsigned>(Id)
6519 .Case("dlc", AMDGPU::CPol::DLC)
6520 .Case("glc", AMDGPU::CPol::GLC)
6521 .Case("scc", AMDGPU::CPol::SCC)
6522 .Case("slc", AMDGPU::CPol::SLC)
6523 .Default(0);
6524}
6525
6526ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6527 if (isGFX12Plus()) {
6528 SMLoc StringLoc = getLoc();
6529
6530 int64_t CPolVal = 0;
6533
6534 for (;;) {
6535 if (ResTH.isNoMatch()) {
6536 int64_t TH;
6537 ResTH = parseTH(Operands, TH);
6538 if (ResTH.isFailure())
6539 return ResTH;
6540 if (ResTH.isSuccess()) {
6541 CPolVal |= TH;
6542 continue;
6543 }
6544 }
6545
6546 if (ResScope.isNoMatch()) {
6547 int64_t Scope;
6548 ResScope = parseScope(Operands, Scope);
6549 if (ResScope.isFailure())
6550 return ResScope;
6551 if (ResScope.isSuccess()) {
6552 CPolVal |= Scope;
6553 continue;
6554 }
6555 }
6556
6557 break;
6558 }
6559
6560 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6561 return ParseStatus::NoMatch;
6562
6563 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6564 AMDGPUOperand::ImmTyCPol));
6565 return ParseStatus::Success;
6566 }
6567
6568 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6569 SMLoc OpLoc = getLoc();
6570 unsigned Enabled = 0, Seen = 0;
6571 for (;;) {
6572 SMLoc S = getLoc();
6573 bool Disabling;
6574 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6575 if (!CPol)
6576 break;
6577
6578 lex();
6579
6580 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6581 return Error(S, "dlc modifier is not supported on this GPU");
6582
6583 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6584 return Error(S, "scc modifier is not supported on this GPU");
6585
6586 if (Seen & CPol)
6587 return Error(S, "duplicate cache policy modifier");
6588
6589 if (!Disabling)
6590 Enabled |= CPol;
6591
6592 Seen |= CPol;
6593 }
6594
6595 if (!Seen)
6596 return ParseStatus::NoMatch;
6597
6598 Operands.push_back(
6599 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6600 return ParseStatus::Success;
6601}
6602
6603ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6604 int64_t &Scope) {
6605 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6606
6608 SMLoc StringLoc;
6609 ParseStatus Res;
6610
6611 Res = parseStringWithPrefix("scope", Value, StringLoc);
6612 if (!Res.isSuccess())
6613 return Res;
6614
6616 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6617 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6618 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6619 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6620 .Default(0xffffffff);
6621
6622 if (Scope == 0xffffffff)
6623 return Error(StringLoc, "invalid scope value");
6624
6625 return ParseStatus::Success;
6626}
6627
6628ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6629 TH = AMDGPU::CPol::TH_RT; // default
6630
6632 SMLoc StringLoc;
6633 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6634 if (!Res.isSuccess())
6635 return Res;
6636
6637 if (Value == "TH_DEFAULT")
6639 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6640 Value == "TH_LOAD_NT_WB") {
6641 return Error(StringLoc, "invalid th value");
6642 } else if (Value.consume_front("TH_ATOMIC_")) {
6644 } else if (Value.consume_front("TH_LOAD_")) {
6646 } else if (Value.consume_front("TH_STORE_")) {
6648 } else {
6649 return Error(StringLoc, "invalid th value");
6650 }
6651
6652 if (Value == "BYPASS")
6654
6655 if (TH != 0) {
6662 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6665 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6667 .Default(0xffffffff);
6668 else
6674 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6675 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6676 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6677 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6678 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6679 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6680 .Default(0xffffffff);
6681 }
6682
6683 if (TH == 0xffffffff)
6684 return Error(StringLoc, "invalid th value");
6685
6686 return ParseStatus::Success;
6687}
6688
6690 MCInst& Inst, const OperandVector& Operands,
6691 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6692 AMDGPUOperand::ImmTy ImmT,
6693 int64_t Default = 0) {
6694 auto i = OptionalIdx.find(ImmT);
6695 if (i != OptionalIdx.end()) {
6696 unsigned Idx = i->second;
6697 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6698 } else {
6700 }
6701}
6702
6703ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6705 SMLoc &StringLoc) {
6706 if (!trySkipId(Prefix, AsmToken::Colon))
6707 return ParseStatus::NoMatch;
6708
6709 StringLoc = getLoc();
6710 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6712}
6713
6714//===----------------------------------------------------------------------===//
6715// MTBUF format
6716//===----------------------------------------------------------------------===//
6717
6718bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6719 int64_t MaxVal,
6720 int64_t &Fmt) {
6721 int64_t Val;
6722 SMLoc Loc = getLoc();
6723
6724 auto Res = parseIntWithPrefix(Pref, Val);
6725 if (Res.isFailure())
6726 return false;
6727 if (Res.isNoMatch())
6728 return true;
6729
6730 if (Val < 0 || Val > MaxVal) {
6731 Error(Loc, Twine("out of range ", StringRef(Pref)));
6732 return false;
6733 }
6734
6735 Fmt = Val;
6736 return true;
6737}
6738
6739ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6740 AMDGPUOperand::ImmTy ImmTy) {
6741 const char *Pref = "index_key";
6742 int64_t ImmVal = 0;
6743 SMLoc Loc = getLoc();
6744 auto Res = parseIntWithPrefix(Pref, ImmVal);
6745 if (!Res.isSuccess())
6746 return Res;
6747
6748 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6749 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6750
6751 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6752 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6753
6754 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6755 return ParseStatus::Success;
6756}
6757
6758ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6759 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6760}
6761
6762ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6763 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6764}
6765
6766// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6767// values to live in a joint format operand in the MCInst encoding.
6768ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6769 using namespace llvm::AMDGPU::MTBUFFormat;
6770
6771 int64_t Dfmt = DFMT_UNDEF;
6772 int64_t Nfmt = NFMT_UNDEF;
6773
6774 // dfmt and nfmt can appear in either order, and each is optional.
6775 for (int I = 0; I < 2; ++I) {
6776 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6777 return ParseStatus::Failure;
6778
6779 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6780 return ParseStatus::Failure;
6781
6782 // Skip optional comma between dfmt/nfmt
6783 // but guard against 2 commas following each other.
6784 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6785 !peekToken().is(AsmToken::Comma)) {
6786 trySkipToken(AsmToken::Comma);
6787 }
6788 }
6789
6790 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6791 return ParseStatus::NoMatch;
6792
6793 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6794 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6795
6796 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6797 return ParseStatus::Success;
6798}
6799
6800ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6801 using namespace llvm::AMDGPU::MTBUFFormat;
6802
6803 int64_t Fmt = UFMT_UNDEF;
6804
6805 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6806 return ParseStatus::Failure;
6807
6808 if (Fmt == UFMT_UNDEF)
6809 return ParseStatus::NoMatch;
6810
6811 Format = Fmt;
6812 return ParseStatus::Success;
6813}
6814
6815bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6816 int64_t &Nfmt,
6817 StringRef FormatStr,
6818 SMLoc Loc) {
6819 using namespace llvm::AMDGPU::MTBUFFormat;
6820 int64_t Format;
6821
6822 Format = getDfmt(FormatStr);
6823 if (Format != DFMT_UNDEF) {
6824 Dfmt = Format;
6825 return true;
6826 }
6827
6828 Format = getNfmt(FormatStr, getSTI());
6829 if (Format != NFMT_UNDEF) {
6830 Nfmt = Format;
6831 return true;
6832 }
6833
6834 Error(Loc, "unsupported format");
6835 return false;
6836}
6837
6838ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6839 SMLoc FormatLoc,
6840 int64_t &Format) {
6841 using namespace llvm::AMDGPU::MTBUFFormat;
6842
6843 int64_t Dfmt = DFMT_UNDEF;
6844 int64_t Nfmt = NFMT_UNDEF;
6845 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6846 return ParseStatus::Failure;
6847
6848 if (trySkipToken(AsmToken::Comma)) {
6849 StringRef Str;
6850 SMLoc Loc = getLoc();
6851 if (!parseId(Str, "expected a format string") ||
6852 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6853 return ParseStatus::Failure;
6854 if (Dfmt == DFMT_UNDEF)
6855 return Error(Loc, "duplicate numeric format");
6856 if (Nfmt == NFMT_UNDEF)
6857 return Error(Loc, "duplicate data format");
6858 }
6859
6860 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6861 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6862
6863 if (isGFX10Plus()) {
6864 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6865 if (Ufmt == UFMT_UNDEF)
6866 return Error(FormatLoc, "unsupported format");
6867 Format = Ufmt;
6868 } else {
6869 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6870 }
6871
6872 return ParseStatus::Success;
6873}
6874
6875ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6876 SMLoc Loc,
6877 int64_t &Format) {
6878 using namespace llvm::AMDGPU::MTBUFFormat;
6879
6880 auto Id = getUnifiedFormat(FormatStr, getSTI());
6881 if (Id == UFMT_UNDEF)
6882 return ParseStatus::NoMatch;
6883
6884 if (!isGFX10Plus())
6885 return Error(Loc, "unified format is not supported on this GPU");
6886
6887 Format = Id;
6888 return ParseStatus::Success;
6889}
6890
6891ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6892 using namespace llvm::AMDGPU::MTBUFFormat;
6893 SMLoc Loc = getLoc();
6894
6895 if (!parseExpr(Format))
6896 return ParseStatus::Failure;
6897 if (!isValidFormatEncoding(Format, getSTI()))
6898 return Error(Loc, "out of range format");
6899
6900 return ParseStatus::Success;
6901}
6902
6903ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6904 using namespace llvm::AMDGPU::MTBUFFormat;
6905
6906 if (!trySkipId("format", AsmToken::Colon))
6907 return ParseStatus::NoMatch;
6908
6909 if (trySkipToken(AsmToken::LBrac)) {
6910 StringRef FormatStr;
6911 SMLoc Loc = getLoc();
6912 if (!parseId(FormatStr, "expected a format string"))
6913 return ParseStatus::Failure;
6914
6915 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6916 if (Res.isNoMatch())
6917 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6918 if (!Res.isSuccess())
6919 return Res;
6920
6921 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6922 return ParseStatus::Failure;
6923
6924 return ParseStatus::Success;
6925 }
6926
6927 return parseNumericFormat(Format);
6928}
6929
6930ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6931 using namespace llvm::AMDGPU::MTBUFFormat;
6932
6933 int64_t Format = getDefaultFormatEncoding(getSTI());
6934 ParseStatus Res;
6935 SMLoc Loc = getLoc();
6936
6937 // Parse legacy format syntax.
6938 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6939 if (Res.isFailure())
6940 return Res;
6941
6942 bool FormatFound = Res.isSuccess();
6943
6944 Operands.push_back(
6945 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6946
6947 if (FormatFound)
6948 trySkipToken(AsmToken::Comma);
6949
6950 if (isToken(AsmToken::EndOfStatement)) {
6951 // We are expecting an soffset operand,
6952 // but let matcher handle the error.
6953 return ParseStatus::Success;
6954 }
6955
6956 // Parse soffset.
6957 Res = parseRegOrImm(Operands);
6958 if (!Res.isSuccess())
6959 return Res;
6960
6961 trySkipToken(AsmToken::Comma);
6962
6963 if (!FormatFound) {
6964 Res = parseSymbolicOrNumericFormat(Format);
6965 if (Res.isFailure())
6966 return Res;
6967 if (Res.isSuccess()) {
6968 auto Size = Operands.size();
6969 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6970 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6971 Op.setImm(Format);
6972 }
6973 return ParseStatus::Success;
6974 }
6975
6976 if (isId("format") && peekToken().is(AsmToken::Colon))
6977 return Error(getLoc(), "duplicate format");
6978 return ParseStatus::Success;
6979}
6980
6981ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6982 ParseStatus Res =
6983 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6984 if (Res.isNoMatch()) {
6985 Res = parseIntWithPrefix("inst_offset", Operands,
6986 AMDGPUOperand::ImmTyInstOffset);
6987 }
6988 return Res;
6989}
6990
6991ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6992 ParseStatus Res =
6993 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6994 if (Res.isNoMatch())
6995 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6996 return Res;
6997}
6998
6999ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7000 ParseStatus Res =
7001 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7002 if (Res.isNoMatch()) {
7003 Res =
7004 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7005 }
7006 return Res;
7007}
7008
7009//===----------------------------------------------------------------------===//
7010// Exp
7011//===----------------------------------------------------------------------===//
7012
7013void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7014 OptionalImmIndexMap OptionalIdx;
7015
7016 unsigned OperandIdx[4];
7017 unsigned EnMask = 0;
7018 int SrcIdx = 0;
7019
7020 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7021 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7022
7023 // Add the register arguments
7024 if (Op.isReg()) {
7025 assert(SrcIdx < 4);
7026 OperandIdx[SrcIdx] = Inst.size();
7027 Op.addRegOperands(Inst, 1);
7028 ++SrcIdx;
7029 continue;
7030 }
7031
7032 if (Op.isOff()) {
7033 assert(SrcIdx < 4);
7034 OperandIdx[SrcIdx] = Inst.size();
7035 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7036 ++SrcIdx;
7037 continue;
7038 }
7039
7040 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7041 Op.addImmOperands(Inst, 1);
7042 continue;
7043 }
7044
7045 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7046 continue;
7047
7048 // Handle optional arguments
7049 OptionalIdx[Op.getImmTy()] = i;
7050 }
7051
7052 assert(SrcIdx == 4);
7053
7054 bool Compr = false;
7055 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7056 Compr = true;
7057 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7058 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7059 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7060 }
7061
7062 for (auto i = 0; i < SrcIdx; ++i) {
7063 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7064 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7065 }
7066 }
7067
7068 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7070
7071 Inst.addOperand(MCOperand::createImm(EnMask));
7072}
7073
7074//===----------------------------------------------------------------------===//
7075// s_waitcnt
7076//===----------------------------------------------------------------------===//
7077
7078static bool
7080 const AMDGPU::IsaVersion ISA,
7081 int64_t &IntVal,
7082 int64_t CntVal,
7083 bool Saturate,
7084 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7085 unsigned (*decode)(const IsaVersion &Version, unsigned))
7086{
7087 bool Failed = false;
7088
7089 IntVal = encode(ISA, IntVal, CntVal);
7090 if (CntVal != decode(ISA, IntVal)) {
7091 if (Saturate) {
7092 IntVal = encode(ISA, IntVal, -1);
7093 } else {
7094 Failed = true;
7095 }
7096 }
7097 return Failed;
7098}
7099
7100bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7101
7102 SMLoc CntLoc = getLoc();
7103 StringRef CntName = getTokenStr();
7104
7105 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7106 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7107 return false;
7108
7109 int64_t CntVal;
7110 SMLoc ValLoc = getLoc();
7111 if (!parseExpr(CntVal))
7112 return false;
7113
7115
7116 bool Failed = true;
7117 bool Sat = CntName.ends_with("_sat");
7118
7119 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7120 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7121 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7122 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7123 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7124 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7125 } else {
7126 Error(CntLoc, "invalid counter name " + CntName);
7127 return false;
7128 }
7129
7130 if (Failed) {
7131 Error(ValLoc, "too large value for " + CntName);
7132 return false;
7133 }
7134
7135 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7136 return false;
7137
7138 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7139 if (isToken(AsmToken::EndOfStatement)) {
7140 Error(getLoc(), "expected a counter name");
7141 return false;
7142 }
7143 }
7144
7145 return true;
7146}
7147
7148ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7150 int64_t Waitcnt = getWaitcntBitMask(ISA);
7151 SMLoc S = getLoc();
7152
7153 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7154 while (!isToken(AsmToken::EndOfStatement)) {
7155 if (!parseCnt(Waitcnt))
7156 return ParseStatus::Failure;
7157 }
7158 } else {
7159 if (!parseExpr(Waitcnt))
7160 return ParseStatus::Failure;
7161 }
7162
7163 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7164 return ParseStatus::Success;
7165}
7166
7167bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7168 SMLoc FieldLoc = getLoc();
7169 StringRef FieldName = getTokenStr();
7170 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7171 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7172 return false;
7173
7174 SMLoc ValueLoc = getLoc();
7175 StringRef ValueName = getTokenStr();
7176 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7177 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7178 return false;
7179
7180 unsigned Shift;
7181 if (FieldName == "instid0") {
7182 Shift = 0;
7183 } else if (FieldName == "instskip") {
7184 Shift = 4;
7185 } else if (FieldName == "instid1") {
7186 Shift = 7;
7187 } else {
7188 Error(FieldLoc, "invalid field name " + FieldName);
7189 return false;
7190 }
7191
7192 int Value;
7193 if (Shift == 4) {
7194 // Parse values for instskip.
7196 .Case("SAME", 0)
7197 .Case("NEXT", 1)
7198 .Case("SKIP_1", 2)
7199 .Case("SKIP_2", 3)
7200 .Case("SKIP_3", 4)
7201 .Case("SKIP_4", 5)
7202 .Default(-1);
7203 } else {
7204 // Parse values for instid0 and instid1.
7206 .Case("NO_DEP", 0)
7207 .Case("VALU_DEP_1", 1)
7208 .Case("VALU_DEP_2", 2)
7209 .Case("VALU_DEP_3", 3)
7210 .Case("VALU_DEP_4", 4)
7211 .Case("TRANS32_DEP_1", 5)
7212 .Case("TRANS32_DEP_2", 6)
7213 .Case("TRANS32_DEP_3", 7)
7214 .Case("FMA_ACCUM_CYCLE_1", 8)
7215 .Case("SALU_CYCLE_1", 9)
7216 .Case("SALU_CYCLE_2", 10)
7217 .Case("SALU_CYCLE_3", 11)
7218 .Default(-1);
7219 }
7220 if (Value < 0) {
7221 Error(ValueLoc, "invalid value name " + ValueName);
7222 return false;
7223 }
7224
7225 Delay |= Value << Shift;
7226 return true;
7227}
7228
7229ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7230 int64_t Delay = 0;
7231 SMLoc S = getLoc();
7232
7233 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7234 do {
7235 if (!parseDelay(Delay))
7236 return ParseStatus::Failure;
7237 } while (trySkipToken(AsmToken::Pipe));
7238 } else {
7239 if (!parseExpr(Delay))
7240 return ParseStatus::Failure;
7241 }
7242
7243 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7244 return ParseStatus::Success;
7245}
7246
7247bool
7248AMDGPUOperand::isSWaitCnt() const {
7249 return isImm();
7250}
7251
7252bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7253
7254//===----------------------------------------------------------------------===//
7255// DepCtr
7256//===----------------------------------------------------------------------===//
7257
7258void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7259 StringRef DepCtrName) {
7260 switch (ErrorId) {
7261 case OPR_ID_UNKNOWN:
7262 Error(Loc, Twine("invalid counter name ", DepCtrName));
7263 return;
7264 case OPR_ID_UNSUPPORTED:
7265 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7266 return;
7267 case OPR_ID_DUPLICATE:
7268 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7269 return;
7270 case OPR_VAL_INVALID:
7271 Error(Loc, Twine("invalid value for ", DepCtrName));
7272 return;
7273 default:
7274 assert(false);
7275 }
7276}
7277
7278bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7279
7280 using namespace llvm::AMDGPU::DepCtr;
7281
7282 SMLoc DepCtrLoc = getLoc();
7283 StringRef DepCtrName = getTokenStr();
7284
7285 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7286 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7287 return false;
7288
7289 int64_t ExprVal;
7290 if (!parseExpr(ExprVal))
7291 return false;
7292
7293 unsigned PrevOprMask = UsedOprMask;
7294 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7295
7296 if (CntVal < 0) {
7297 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7298 return false;
7299 }
7300
7301 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7302 return false;
7303
7304 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7305 if (isToken(AsmToken::EndOfStatement)) {
7306 Error(getLoc(), "expected a counter name");
7307 return false;
7308 }
7309 }
7310
7311 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7312 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7313 return true;
7314}
7315
7316ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7317 using namespace llvm::AMDGPU::DepCtr;
7318
7319 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7320 SMLoc Loc = getLoc();
7321
7322 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7323 unsigned UsedOprMask = 0;
7324 while (!isToken(AsmToken::EndOfStatement)) {
7325 if (!parseDepCtr(DepCtr, UsedOprMask))
7326 return ParseStatus::Failure;
7327 }
7328 } else {
7329 if (!parseExpr(DepCtr))
7330 return ParseStatus::Failure;
7331 }
7332
7333 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7334 return ParseStatus::Success;
7335}
7336
7337bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7338
7339//===----------------------------------------------------------------------===//
7340// hwreg
7341//===----------------------------------------------------------------------===//
7342
7343ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7344 OperandInfoTy &Offset,
7345 OperandInfoTy &Width) {
7346 using namespace llvm::AMDGPU::Hwreg;
7347
7348 if (!trySkipId("hwreg", AsmToken::LParen))
7349 return ParseStatus::NoMatch;
7350
7351 // The register may be specified by name or using a numeric code
7352 HwReg.Loc = getLoc();
7353 if (isToken(AsmToken::Identifier) &&
7354 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7355 HwReg.IsSymbolic = true;
7356 lex(); // skip register name
7357 } else if (!parseExpr(HwReg.Val, "a register name")) {
7358 return ParseStatus::Failure;
7359 }
7360
7361 if (trySkipToken(AsmToken::RParen))
7362 return ParseStatus::Success;
7363
7364 // parse optional params
7365 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7366 return ParseStatus::Failure;
7367
7368 Offset.Loc = getLoc();
7369 if (!parseExpr(Offset.Val))
7370 return ParseStatus::Failure;
7371
7372 if (!skipToken(AsmToken::Comma, "expected a comma"))
7373 return ParseStatus::Failure;
7374
7375 Width.Loc = getLoc();
7376 if (!parseExpr(Width.Val) ||
7377 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7378 return ParseStatus::Failure;
7379
7380 return ParseStatus::Success;
7381}
7382
7383ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7384 using namespace llvm::AMDGPU::Hwreg;
7385
7386 int64_t ImmVal = 0;
7387 SMLoc Loc = getLoc();
7388
7389 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7390 HwregId::Default);
7391 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7392 HwregOffset::Default);
7393 struct : StructuredOpField {
7394 using StructuredOpField::StructuredOpField;
7395 bool validate(AMDGPUAsmParser &Parser) const override {
7396 if (!isUIntN(Width, Val - 1))
7397 return Error(Parser, "only values from 1 to 32 are legal");
7398 return true;
7399 }
7400 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7401 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7402
7403 if (Res.isNoMatch())
7404 Res = parseHwregFunc(HwReg, Offset, Width);
7405
7406 if (Res.isSuccess()) {
7407 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7408 return ParseStatus::Failure;
7409 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7410 }
7411
7412 if (Res.isNoMatch() &&
7413 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7415
7416 if (!Res.isSuccess())
7417 return ParseStatus::Failure;
7418
7419 if (!isUInt<16>(ImmVal))
7420 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7421 Operands.push_back(
7422 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7423 return ParseStatus::Success;
7424}
7425
7426bool AMDGPUOperand::isHwreg() const {
7427 return isImmTy(ImmTyHwreg);
7428}
7429
7430//===----------------------------------------------------------------------===//
7431// sendmsg
7432//===----------------------------------------------------------------------===//
7433
7434bool
7435AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7436 OperandInfoTy &Op,
7437 OperandInfoTy &Stream) {
7438 using namespace llvm::AMDGPU::SendMsg;
7439
7440 Msg.Loc = getLoc();
7441 if (isToken(AsmToken::Identifier) &&
7442 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7443 Msg.IsSymbolic = true;
7444 lex(); // skip message name
7445 } else if (!parseExpr(Msg.Val, "a message name")) {
7446 return false;
7447 }
7448
7449 if (trySkipToken(AsmToken::Comma)) {
7450 Op.IsDefined = true;
7451 Op.Loc = getLoc();
7452 if (isToken(AsmToken::Identifier) &&
7453 (Op.Val = getMsgOpId(Msg.Val, getTokenStr())) >= 0) {
7454 lex(); // skip operation name
7455 } else if (!parseExpr(Op.Val, "an operation name")) {
7456 return false;
7457 }
7458
7459 if (trySkipToken(AsmToken::Comma)) {
7460 Stream.IsDefined = true;
7461 Stream.Loc = getLoc();
7462 if (!parseExpr(Stream.Val))
7463 return false;
7464 }
7465 }
7466
7467 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7468}
7469
7470bool
7471AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7472 const OperandInfoTy &Op,
7473 const OperandInfoTy &Stream) {
7474 using namespace llvm::AMDGPU::SendMsg;
7475
7476 // Validation strictness depends on whether message is specified
7477 // in a symbolic or in a numeric form. In the latter case
7478 // only encoding possibility is checked.
7479 bool Strict = Msg.IsSymbolic;
7480
7481 if (Strict) {
7482 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7483 Error(Msg.Loc, "specified message id is not supported on this GPU");
7484 return false;
7485 }
7486 } else {
7487 if (!isValidMsgId(Msg.Val, getSTI())) {
7488 Error(Msg.Loc, "invalid message id");
7489 return false;
7490 }
7491 }
7492 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7493 if (Op.IsDefined) {
7494 Error(Op.Loc, "message does not support operations");
7495 } else {
7496 Error(Msg.Loc, "missing message operation");
7497 }
7498 return false;
7499 }
7500 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7501 Error(Op.Loc, "invalid operation id");
7502 return false;
7503 }
7504 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7505 Stream.IsDefined) {
7506 Error(Stream.Loc, "message operation does not support streams");
7507 return false;
7508 }
7509 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7510 Error(Stream.Loc, "invalid message stream id");
7511 return false;
7512 }
7513 return true;
7514}
7515
7516ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7517 using namespace llvm::AMDGPU::SendMsg;
7518
7519 int64_t ImmVal = 0;
7520 SMLoc Loc = getLoc();
7521
7522 if (trySkipId("sendmsg", AsmToken::LParen)) {
7523 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7524 OperandInfoTy Op(OP_NONE_);
7525 OperandInfoTy Stream(STREAM_ID_NONE_);
7526 if (parseSendMsgBody(Msg, Op, Stream) &&
7527 validateSendMsg(Msg, Op, Stream)) {
7528 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7529 } else {
7530 return ParseStatus::Failure;
7531 }
7532 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7533 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7534 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7535 } else {
7536 return ParseStatus::Failure;
7537 }
7538
7539 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7540 return ParseStatus::Success;
7541}
7542
7543bool AMDGPUOperand::isSendMsg() const {
7544 return isImmTy(ImmTySendMsg);
7545}
7546
7547//===----------------------------------------------------------------------===//
7548// v_interp
7549//===----------------------------------------------------------------------===//
7550
7551ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7552 StringRef Str;
7553 SMLoc S = getLoc();
7554
7555 if (!parseId(Str))
7556 return ParseStatus::NoMatch;
7557
7558 int Slot = StringSwitch<int>(Str)
7559 .Case("p10", 0)
7560 .Case("p20", 1)
7561 .Case("p0", 2)
7562 .Default(-1);
7563
7564 if (Slot == -1)
7565 return Error(S, "invalid interpolation slot");
7566
7567 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7568 AMDGPUOperand::ImmTyInterpSlot));
7569 return ParseStatus::Success;
7570}
7571
7572ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7573 StringRef Str;
7574 SMLoc S = getLoc();
7575
7576 if (!parseId(Str))
7577 return ParseStatus::NoMatch;
7578
7579 if (!Str.starts_with("attr"))
7580 return Error(S, "invalid interpolation attribute");
7581
7582 StringRef Chan = Str.take_back(2);
7583 int AttrChan = StringSwitch<int>(Chan)
7584 .Case(".x", 0)
7585 .Case(".y", 1)
7586 .Case(".z", 2)
7587 .Case(".w", 3)
7588 .Default(-1);
7589 if (AttrChan == -1)
7590 return Error(S, "invalid or missing interpolation attribute channel");
7591
7592 Str = Str.drop_back(2).drop_front(4);
7593
7594 uint8_t Attr;
7595 if (Str.getAsInteger(10, Attr))
7596 return Error(S, "invalid or missing interpolation attribute number");
7597
7598 if (Attr > 32)
7599 return Error(S, "out of bounds interpolation attribute number");
7600
7601 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7602
7603 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7604 AMDGPUOperand::ImmTyInterpAttr));
7605 Operands.push_back(AMDGPUOperand::CreateImm(
7606 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7607 return ParseStatus::Success;
7608}
7609
7610//===----------------------------------------------------------------------===//
7611// exp
7612//===----------------------------------------------------------------------===//
7613
7614ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7615 using namespace llvm::AMDGPU::Exp;
7616
7617 StringRef Str;
7618 SMLoc S = getLoc();
7619
7620 if (!parseId(Str))
7621 return ParseStatus::NoMatch;
7622
7623 unsigned Id = getTgtId(Str);
7624 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7625 return Error(S, (Id == ET_INVALID)
7626 ? "invalid exp target"
7627 : "exp target is not supported on this GPU");
7628
7629 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7630 AMDGPUOperand::ImmTyExpTgt));
7631 return ParseStatus::Success;
7632}
7633
7634//===----------------------------------------------------------------------===//
7635// parser helpers
7636//===----------------------------------------------------------------------===//
7637
7638bool
7639AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7640 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7641}
7642
7643bool
7644AMDGPUAsmParser::isId(const StringRef Id) const {
7645 return isId(getToken(), Id);
7646}
7647
7648bool
7649AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7650 return getTokenKind() == Kind;
7651}
7652
7653StringRef AMDGPUAsmParser::getId() const {
7654 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7655}
7656
7657bool
7658AMDGPUAsmParser::trySkipId(const StringRef Id) {
7659 if (isId(Id)) {
7660 lex();
7661 return true;
7662 }
7663 return false;
7664}
7665
7666bool
7667AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7668 if (isToken(AsmToken::Identifier)) {
7669 StringRef Tok = getTokenStr();
7670 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7671 lex();
7672 return true;
7673 }
7674 }
7675 return false;
7676}
7677
7678bool
7679AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7680 if (isId(Id) && peekToken().is(Kind)) {
7681 lex();
7682 lex();
7683 return true;
7684 }
7685 return false;
7686}
7687
7688bool
7689AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7690 if (isToken(Kind)) {
7691 lex();
7692 return true;
7693 }
7694 return false;
7695}
7696
7697bool
7698AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7699 const StringRef ErrMsg) {
7700 if (!trySkipToken(Kind)) {
7701 Error(getLoc(), ErrMsg);
7702 return false;
7703 }
7704 return true;
7705}
7706
7707bool
7708AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7709 SMLoc S = getLoc();
7710
7711 const MCExpr *Expr;
7712 if (Parser.parseExpression(Expr))
7713 return false;
7714
7715 if (Expr->evaluateAsAbsolute(Imm))
7716 return true;
7717
7718 if (Expected.empty()) {
7719 Error(S, "expected absolute expression");
7720 } else {
7721 Error(S, Twine("expected ", Expected) +
7722 Twine(" or an absolute expression"));
7723 }
7724 return false;
7725}
7726
7727bool
7728AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7729 SMLoc S = getLoc();
7730
7731 const MCExpr *Expr;
7732 if (Parser.parseExpression(Expr))
7733 return false;
7734
7735 int64_t IntVal;
7736 if (Expr->evaluateAsAbsolute(IntVal)) {
7737 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7738 } else {
7739 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7740 }
7741 return true;
7742}
7743
7744bool
7745AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7746 if (isToken(AsmToken::String)) {
7747 Val = getToken().getStringContents();
7748 lex();
7749 return true;
7750 } else {
7751 Error(getLoc(), ErrMsg);
7752 return false;
7753 }
7754}
7755
7756bool
7757AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7758 if (isToken(AsmToken::Identifier)) {
7759 Val = getTokenStr();
7760 lex();
7761 return true;
7762 } else {
7763 if (!ErrMsg.empty())
7764 Error(getLoc(), ErrMsg);
7765 return false;
7766 }
7767}
7768
7770AMDGPUAsmParser::getToken() const {
7771 return Parser.getTok();
7772}
7773
7774AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7775 return isToken(AsmToken::EndOfStatement)
7776 ? getToken()
7777 : getLexer().peekTok(ShouldSkipSpace);
7778}
7779
7780void
7781AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7782 auto TokCount = getLexer().peekTokens(Tokens);
7783
7784 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7785 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7786}
7787
7789AMDGPUAsmParser::getTokenKind() const {
7790 return getLexer().getKind();
7791}
7792
7793SMLoc
7794AMDGPUAsmParser::getLoc() const {
7795 return getToken().getLoc();
7796}
7797
7799AMDGPUAsmParser::getTokenStr() const {
7800 return getToken().getString();
7801}
7802
7803void
7804AMDGPUAsmParser::lex() {
7805 Parser.Lex();
7806}
7807
7808SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7809 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7810}
7811
7812SMLoc
7813AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7814 const OperandVector &Operands) const {
7815 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7816 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7817 if (Test(Op))
7818 return Op.getStartLoc();
7819 }
7820 return getInstLoc(Operands);
7821}
7822
7823SMLoc
7824AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7825 const OperandVector &Operands) const {
7826 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7827 return getOperandLoc(Test, Operands);
7828}
7829
7830SMLoc
7831AMDGPUAsmParser::getRegLoc(unsigned Reg,
7832 const OperandVector &Operands) const {
7833 auto Test = [=](const AMDGPUOperand& Op) {
7834 return Op.isRegKind() && Op.getReg() == Reg;
7835 };
7836 return getOperandLoc(Test, Operands);
7837}
7838
7839SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7840 bool SearchMandatoryLiterals) const {
7841 auto Test = [](const AMDGPUOperand& Op) {
7842 return Op.IsImmKindLiteral() || Op.isExpr();
7843 };
7844 SMLoc Loc = getOperandLoc(Test, Operands);
7845 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7846 Loc = getMandatoryLitLoc(Operands);
7847 return Loc;
7848}
7849
7850SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7851 auto Test = [](const AMDGPUOperand &Op) {
7852 return Op.IsImmKindMandatoryLiteral();
7853 };
7854 return getOperandLoc(Test, Operands);
7855}
7856
7857SMLoc
7858AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7859 auto Test = [](const AMDGPUOperand& Op) {
7860 return Op.isImmKindConst();
7861 };
7862 return getOperandLoc(Test, Operands);
7863}
7864
7866AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7867 if (!trySkipToken(AsmToken::LCurly))
7868 return ParseStatus::NoMatch;
7869
7870 bool First = true;
7871 while (!trySkipToken(AsmToken::RCurly)) {
7872 if (!First &&
7873 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7874 return ParseStatus::Failure;
7875
7876 StringRef Id = getTokenStr();
7877 SMLoc IdLoc = getLoc();
7878 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7879 !skipToken(AsmToken::Colon, "colon expected"))
7880 return ParseStatus::Failure;
7881
7882 auto I =
7883 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7884 if (I == Fields.end())
7885 return Error(IdLoc, "unknown field");
7886 if ((*I)->IsDefined)
7887 return Error(IdLoc, "duplicate field");
7888
7889 // TODO: Support symbolic values.
7890 (*I)->Loc = getLoc();
7891 if (!parseExpr((*I)->Val))
7892 return ParseStatus::Failure;
7893 (*I)->IsDefined = true;
7894
7895 First = false;
7896 }
7897 return ParseStatus::Success;
7898}
7899
7900bool AMDGPUAsmParser::validateStructuredOpFields(
7902 return all_of(Fields, [this](const StructuredOpField *F) {
7903 return F->validate(*this);
7904 });
7905}
7906
7907//===----------------------------------------------------------------------===//
7908// swizzle
7909//===----------------------------------------------------------------------===//
7910
7912static unsigned
7913encodeBitmaskPerm(const unsigned AndMask,
7914 const unsigned OrMask,
7915 const unsigned XorMask) {
7916 using namespace llvm::AMDGPU::Swizzle;
7917
7918 return BITMASK_PERM_ENC |
7919 (AndMask << BITMASK_AND_SHIFT) |
7920 (OrMask << BITMASK_OR_SHIFT) |
7921 (XorMask << BITMASK_XOR_SHIFT);
7922}
7923
7924bool
7925AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7926 const unsigned MinVal,
7927 const unsigned MaxVal,
7928 const StringRef ErrMsg,
7929 SMLoc &Loc) {
7930 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7931 return false;
7932 }
7933 Loc = getLoc();
7934 if (!parseExpr(Op)) {
7935 return false;
7936 }
7937 if (Op < MinVal || Op > MaxVal) {
7938 Error(Loc, ErrMsg);
7939 return false;
7940 }
7941
7942 return true;
7943}
7944
7945bool
7946AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7947 const unsigned MinVal,
7948 const unsigned MaxVal,
7949 const StringRef ErrMsg) {
7950 SMLoc Loc;
7951 for (unsigned i = 0; i < OpNum; ++i) {
7952 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7953 return false;
7954 }
7955
7956 return true;
7957}
7958
7959bool
7960AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7961 using namespace llvm::AMDGPU::Swizzle;
7962
7963 int64_t Lane[LANE_NUM];
7964 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7965 "expected a 2-bit lane id")) {
7967 for (unsigned I = 0; I < LANE_NUM; ++I) {
7968 Imm |= Lane[I] << (LANE_SHIFT * I);
7969 }
7970 return true;
7971 }
7972 return false;
7973}
7974
7975bool
7976AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7977 using namespace llvm::AMDGPU::Swizzle;
7978
7979 SMLoc Loc;
7980 int64_t GroupSize;
7981 int64_t LaneIdx;
7982
7983 if (!parseSwizzleOperand(GroupSize,
7984 2, 32,
7985 "group size must be in the interval [2,32]",
7986 Loc)) {
7987 return false;
7988 }
7989 if (!isPowerOf2_64(GroupSize)) {
7990 Error(Loc, "group size must be a power of two");
7991 return false;
7992 }
7993 if (parseSwizzleOperand(LaneIdx,
7994 0, GroupSize - 1,
7995 "lane id must be in the interval [0,group size - 1]",
7996 Loc)) {
7997 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7998 return true;
7999 }
8000 return false;
8001}
8002
8003bool
8004AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8005 using namespace llvm::AMDGPU::Swizzle;
8006
8007 SMLoc Loc;
8008 int64_t GroupSize;
8009
8010 if (!parseSwizzleOperand(GroupSize,
8011 2, 32,
8012 "group size must be in the interval [2,32]",
8013 Loc)) {
8014 return false;
8015 }
8016 if (!isPowerOf2_64(GroupSize)) {
8017 Error(Loc, "group size must be a power of two");
8018 return false;
8019 }
8020
8021 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8022 return true;
8023}
8024
8025bool
8026AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8027 using namespace llvm::AMDGPU::Swizzle;
8028
8029 SMLoc Loc;
8030 int64_t GroupSize;
8031
8032 if (!parseSwizzleOperand(GroupSize,
8033 1, 16,
8034 "group size must be in the interval [1,16]",
8035 Loc)) {
8036 return false;
8037 }
8038 if (!isPowerOf2_64(GroupSize)) {
8039 Error(Loc, "group size must be a power of two");
8040 return false;
8041 }
8042
8043 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8044 return true;
8045}
8046
8047bool
8048AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8049 using namespace llvm::AMDGPU::Swizzle;
8050
8051 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8052 return false;
8053 }
8054
8055 StringRef Ctl;
8056 SMLoc StrLoc = getLoc();
8057 if (!parseString(Ctl)) {
8058 return false;
8059 }
8060 if (Ctl.size() != BITMASK_WIDTH) {
8061 Error(StrLoc, "expected a 5-character mask");
8062 return false;
8063 }
8064
8065 unsigned AndMask = 0;
8066 unsigned OrMask = 0;
8067 unsigned XorMask = 0;
8068
8069 for (size_t i = 0; i < Ctl.size(); ++i) {
8070 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8071 switch(Ctl[i]) {
8072 default:
8073 Error(StrLoc, "invalid mask");
8074 return false;
8075 case '0':
8076 break;
8077 case '1':
8078 OrMask |= Mask;
8079 break;
8080 case 'p':
8081 AndMask |= Mask;
8082 break;
8083 case 'i':
8084 AndMask |= Mask;
8085 XorMask |= Mask;
8086 break;
8087 }
8088 }
8089
8090 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8091 return true;
8092}
8093
8094bool
8095AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8096
8097 SMLoc OffsetLoc = getLoc();
8098
8099 if (!parseExpr(Imm, "a swizzle macro")) {
8100 return false;
8101 }
8102 if (!isUInt<16>(Imm)) {
8103 Error(OffsetLoc, "expected a 16-bit offset");
8104 return false;
8105 }
8106 return true;
8107}
8108
8109bool
8110AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8111 using namespace llvm::AMDGPU::Swizzle;
8112
8113 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8114
8115 SMLoc ModeLoc = getLoc();
8116 bool Ok = false;
8117
8118 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8119 Ok = parseSwizzleQuadPerm(Imm);
8120 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8121 Ok = parseSwizzleBitmaskPerm(Imm);
8122 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8123 Ok = parseSwizzleBroadcast(Imm);
8124 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8125 Ok = parseSwizzleSwap(Imm);
8126 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8127 Ok = parseSwizzleReverse(Imm);
8128 } else {
8129 Error(ModeLoc, "expected a swizzle mode");
8130 }
8131
8132 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8133 }
8134
8135 return false;
8136}
8137
8138ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8139 SMLoc S = getLoc();
8140 int64_t Imm = 0;
8141
8142 if (trySkipId("offset")) {
8143
8144 bool Ok = false;
8145 if (skipToken(AsmToken::Colon, "expected a colon")) {
8146 if (trySkipId("swizzle")) {
8147 Ok = parseSwizzleMacro(Imm);
8148 } else {
8149 Ok = parseSwizzleOffset(Imm);
8150 }
8151 }
8152
8153 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8154
8156 }
8157 return ParseStatus::NoMatch;
8158}
8159
8160bool
8161AMDGPUOperand::isSwizzle() const {
8162 return isImmTy(ImmTySwizzle);
8163}
8164
8165//===----------------------------------------------------------------------===//
8166// VGPR Index Mode
8167//===----------------------------------------------------------------------===//
8168
8169int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8170
8171 using namespace llvm::AMDGPU::VGPRIndexMode;
8172
8173 if (trySkipToken(AsmToken::RParen)) {
8174 return OFF;
8175 }
8176
8177 int64_t Imm = 0;
8178
8179 while (true) {
8180 unsigned Mode = 0;
8181 SMLoc S = getLoc();
8182
8183 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8184 if (trySkipId(IdSymbolic[ModeId])) {
8185 Mode = 1 << ModeId;
8186 break;
8187 }
8188 }
8189
8190 if (Mode == 0) {
8191 Error(S, (Imm == 0)?
8192 "expected a VGPR index mode or a closing parenthesis" :
8193 "expected a VGPR index mode");
8194 return UNDEF;
8195 }
8196
8197 if (Imm & Mode) {
8198 Error(S, "duplicate VGPR index mode");
8199 return UNDEF;
8200 }
8201 Imm |= Mode;
8202
8203 if (trySkipToken(AsmToken::RParen))
8204 break;
8205 if (!skipToken(AsmToken::Comma,
8206 "expected a comma or a closing parenthesis"))
8207 return UNDEF;
8208 }
8209
8210 return Imm;
8211}
8212
8213ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8214
8215 using namespace llvm::AMDGPU::VGPRIndexMode;
8216
8217 int64_t Imm = 0;
8218 SMLoc S = getLoc();
8219
8220 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8221 Imm = parseGPRIdxMacro();
8222 if (Imm == UNDEF)
8223 return ParseStatus::Failure;
8224 } else {
8225 if (getParser().parseAbsoluteExpression(Imm))
8226 return ParseStatus::Failure;
8227 if (Imm < 0 || !isUInt<4>(Imm))
8228 return Error(S, "invalid immediate: only 4-bit values are legal");
8229 }
8230
8231 Operands.push_back(
8232 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8233 return ParseStatus::Success;
8234}
8235
8236bool AMDGPUOperand::isGPRIdxMode() const {
8237 return isImmTy(ImmTyGprIdxMode);
8238}
8239
8240//===----------------------------------------------------------------------===//
8241// sopp branch targets
8242//===----------------------------------------------------------------------===//
8243
8244ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8245
8246 // Make sure we are not parsing something
8247 // that looks like a label or an expression but is not.
8248 // This will improve error messages.
8249 if (isRegister() || isModifier())
8250 return ParseStatus::NoMatch;
8251
8252 if (!parseExpr(Operands))
8253 return ParseStatus::Failure;
8254
8255 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8256 assert(Opr.isImm() || Opr.isExpr());
8257 SMLoc Loc = Opr.getStartLoc();
8258
8259 // Currently we do not support arbitrary expressions as branch targets.
8260 // Only labels and absolute expressions are accepted.
8261 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8262 Error(Loc, "expected an absolute expression or a label");
8263 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8264 Error(Loc, "expected a 16-bit signed jump offset");
8265 }
8266
8267 return ParseStatus::Success;
8268}
8269
8270//===----------------------------------------------------------------------===//
8271// Boolean holding registers
8272//===----------------------------------------------------------------------===//
8273
8274ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8275 return parseReg(Operands);
8276}
8277
8278//===----------------------------------------------------------------------===//
8279// mubuf
8280//===----------------------------------------------------------------------===//
8281
8282void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8283 const OperandVector &Operands,
8284 bool IsAtomic) {
8285 OptionalImmIndexMap OptionalIdx;
8286 unsigned FirstOperandIdx = 1;
8287 bool IsAtomicReturn = false;
8288
8289 if (IsAtomic) {
8290 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8292 }
8293
8294 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8295 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8296
8297 // Add the register arguments
8298 if (Op.isReg()) {
8299 Op.addRegOperands(Inst, 1);
8300 // Insert a tied src for atomic return dst.
8301 // This cannot be postponed as subsequent calls to
8302 // addImmOperands rely on correct number of MC operands.
8303 if (IsAtomicReturn && i == FirstOperandIdx)
8304 Op.addRegOperands(Inst, 1);
8305 continue;
8306 }
8307
8308 // Handle the case where soffset is an immediate
8309 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8310 Op.addImmOperands(Inst, 1);
8311 continue;
8312 }
8313
8314 // Handle tokens like 'offen' which are sometimes hard-coded into the
8315 // asm string. There are no MCInst operands for these.
8316 if (Op.isToken()) {
8317 continue;
8318 }
8319 assert(Op.isImm());
8320
8321 // Handle optional arguments
8322 OptionalIdx[Op.getImmTy()] = i;
8323 }
8324
8325 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8326 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8327}
8328
8329//===----------------------------------------------------------------------===//
8330// smrd
8331//===----------------------------------------------------------------------===//
8332
8333bool AMDGPUOperand::isSMRDOffset8() const {
8334 return isImmLiteral() && isUInt<8>(getImm());
8335}
8336
8337bool AMDGPUOperand::isSMEMOffset() const {
8338 // Offset range is checked later by validator.
8339 return isImmLiteral();
8340}
8341
8342bool AMDGPUOperand::isSMRDLiteralOffset() const {
8343 // 32-bit literals are only supported on CI and we only want to use them
8344 // when the offset is > 8-bits.
8345 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8346}
8347
8348//===----------------------------------------------------------------------===//
8349// vop3
8350//===----------------------------------------------------------------------===//
8351
8352static bool ConvertOmodMul(int64_t &Mul) {
8353 if (Mul != 1 && Mul != 2 && Mul != 4)
8354 return false;
8355
8356 Mul >>= 1;
8357 return true;
8358}
8359
8360static bool ConvertOmodDiv(int64_t &Div) {
8361 if (Div == 1) {
8362 Div = 0;
8363 return true;
8364 }
8365
8366 if (Div == 2) {
8367 Div = 3;
8368 return true;
8369 }
8370
8371 return false;
8372}
8373
8374// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8375// This is intentional and ensures compatibility with sp3.
8376// See bug 35397 for details.
8377bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8378 if (BoundCtrl == 0 || BoundCtrl == 1) {
8379 if (!isGFX11Plus())
8380 BoundCtrl = 1;
8381 return true;
8382 }
8383 return false;
8384}
8385
8386void AMDGPUAsmParser::onBeginOfFile() {
8387 if (!getParser().getStreamer().getTargetStreamer() ||
8388 getSTI().getTargetTriple().getArch() == Triple::r600)
8389 return;
8390
8391 if (!getTargetStreamer().getTargetID())
8392 getTargetStreamer().initializeTargetID(getSTI(),
8393 getSTI().getFeatureString());
8394
8395 if (isHsaAbi(getSTI()))
8396 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8397}
8398
8399/// Parse AMDGPU specific expressions.
8400///
8401/// expr ::= or(expr, ...) |
8402/// max(expr, ...)
8403///
8404bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8406
8407 if (isToken(AsmToken::Identifier)) {
8408 StringRef TokenId = getTokenStr();
8409 AGVK VK = StringSwitch<AGVK>(TokenId)
8410 .Case("max", AGVK::AGVK_Max)
8411 .Case("or", AGVK::AGVK_Or)
8412 .Default(AGVK::AGVK_None);
8413
8414 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8416 uint64_t CommaCount = 0;
8417 lex(); // Eat 'max'/'or'
8418 lex(); // Eat '('
8419 while (true) {
8420 if (trySkipToken(AsmToken::RParen)) {
8421 if (Exprs.empty()) {
8422 Error(getToken().getLoc(),
8423 "empty " + Twine(TokenId) + " expression");
8424 return true;
8425 }
8426 if (CommaCount + 1 != Exprs.size()) {
8427 Error(getToken().getLoc(),
8428 "mismatch of commas in " + Twine(TokenId) + " expression");
8429 return true;
8430 }
8431 Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
8432 return false;
8433 }
8434 const MCExpr *Expr;
8435 if (getParser().parseExpression(Expr, EndLoc))
8436 return true;
8437 Exprs.push_back(Expr);
8438 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8439 if (LastTokenWasComma)
8440 CommaCount++;
8441 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8442 Error(getToken().getLoc(),
8443 "unexpected token in " + Twine(TokenId) + " expression");
8444 return true;
8445 }
8446 }
8447 }
8448 }
8449 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8450}
8451
8452ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8453 StringRef Name = getTokenStr();
8454 if (Name == "mul") {
8455 return parseIntWithPrefix("mul", Operands,
8456 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8457 }
8458
8459 if (Name == "div") {
8460 return parseIntWithPrefix("div", Operands,
8461 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8462 }
8463
8464 return ParseStatus::NoMatch;
8465}
8466
8467// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8468// the number of src operands present, then copies that bit into src0_modifiers.
8469static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8470 int Opc = Inst.getOpcode();
8471 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8472 if (OpSelIdx == -1)
8473 return;
8474
8475 int SrcNum;
8476 const int Ops[] = { AMDGPU::OpName::src0,
8477 AMDGPU::OpName::src1,
8478 AMDGPU::OpName::src2 };
8479 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8480 ++SrcNum)
8481 ;
8482 assert(SrcNum > 0);
8483
8484 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8485
8486 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8487 if (DstIdx == -1)
8488 return;
8489
8490 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8491 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8492 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8493 if (DstOp.isReg() &&
8494 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8495 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8496 ModVal |= SISrcMods::DST_OP_SEL;
8497 } else {
8498 if ((OpSel & (1 << SrcNum)) != 0)
8499 ModVal |= SISrcMods::DST_OP_SEL;
8500 }
8501 Inst.getOperand(ModIdx).setImm(ModVal);
8502}
8503
8504void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8505 const OperandVector &Operands) {
8506 cvtVOP3P(Inst, Operands);
8507 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8508}
8509
8510void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8511 OptionalImmIndexMap &OptionalIdx) {
8512 cvtVOP3P(Inst, Operands, OptionalIdx);
8513 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8514}
8515
8516static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8517 return
8518 // 1. This operand is input modifiers
8519 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8520 // 2. This is not last operand
8521 && Desc.NumOperands > (OpNum + 1)
8522 // 3. Next operand is register class
8523 && Desc.operands()[OpNum + 1].RegClass != -1
8524 // 4. Next register is not tied to any other operand
8525 && Desc.getOperandConstraint(OpNum + 1,
8526 MCOI::OperandConstraint::TIED_TO) == -1;
8527}
8528
8529void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8530{
8531 OptionalImmIndexMap OptionalIdx;
8532 unsigned Opc = Inst.getOpcode();
8533
8534 unsigned I = 1;
8535 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8536 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8537 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8538 }
8539
8540 for (unsigned E = Operands.size(); I != E; ++I) {
8541 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8543 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8544 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8545 Op.isInterpAttrChan()) {
8546 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8547 } else if (Op.isImmModifier()) {
8548 OptionalIdx[Op.getImmTy()] = I;
8549 } else {
8550 llvm_unreachable("unhandled operand type");
8551 }
8552 }
8553
8554 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8555 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8556 AMDGPUOperand::ImmTyHigh);
8557
8558 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8559 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8560 AMDGPUOperand::ImmTyClampSI);
8561
8562 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8563 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8564 AMDGPUOperand::ImmTyOModSI);
8565}
8566
8567void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8568{
8569 OptionalImmIndexMap OptionalIdx;
8570 unsigned Opc = Inst.getOpcode();
8571
8572 unsigned I = 1;
8573 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8574 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8575 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8576 }
8577
8578 for (unsigned E = Operands.size(); I != E; ++I) {
8579 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8581 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8582 } else if (Op.isImmModifier()) {
8583 OptionalIdx[Op.getImmTy()] = I;
8584 } else {
8585 llvm_unreachable("unhandled operand type");
8586 }
8587 }
8588
8589 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8590
8591 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8592 if (OpSelIdx != -1)
8593 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8594
8595 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8596
8597 if (OpSelIdx == -1)
8598 return;
8599
8600 const int Ops[] = { AMDGPU::OpName::src0,
8601 AMDGPU::OpName::src1,
8602 AMDGPU::OpName::src2 };
8603 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8604 AMDGPU::OpName::src1_modifiers,
8605 AMDGPU::OpName::src2_modifiers };
8606
8607 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8608
8609 for (int J = 0; J < 3; ++J) {
8610 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8611 if (OpIdx == -1)
8612 break;
8613
8614 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8615 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8616
8617 if ((OpSel & (1 << J)) != 0)
8618 ModVal |= SISrcMods::OP_SEL_0;
8619 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8620 (OpSel & (1 << 3)) != 0)
8621 ModVal |= SISrcMods::DST_OP_SEL;
8622
8623 Inst.getOperand(ModIdx).setImm(ModVal);
8624 }
8625}
8626
8627void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8628 OptionalImmIndexMap &OptionalIdx) {
8629 unsigned Opc = Inst.getOpcode();
8630
8631 unsigned I = 1;
8632 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8633 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8634 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8635 }
8636
8637 for (unsigned E = Operands.size(); I != E; ++I) {
8638 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8640 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8641 } else if (Op.isImmModifier()) {
8642 OptionalIdx[Op.getImmTy()] = I;
8643 } else if (Op.isRegOrImm()) {
8644 Op.addRegOrImmOperands(Inst, 1);
8645 } else {
8646 llvm_unreachable("unhandled operand type");
8647 }
8648 }
8649
8650 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8651 assert(AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in));
8652 Inst.addOperand(Inst.getOperand(0));
8653 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8654 AMDGPUOperand::ImmTyByteSel);
8655 }
8656
8657 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8658 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8659 AMDGPUOperand::ImmTyClampSI);
8660
8661 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8662 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8663 AMDGPUOperand::ImmTyOModSI);
8664
8665 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8666 // it has src2 register operand that is tied to dst operand
8667 // we don't allow modifiers for this operand in assembler so src2_modifiers
8668 // should be 0.
8669 if (isMAC(Opc)) {
8670 auto it = Inst.begin();
8671 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8672 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8673 ++it;
8674 // Copy the operand to ensure it's not invalidated when Inst grows.
8675 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8676 }
8677}
8678
8679void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8680 OptionalImmIndexMap OptionalIdx;
8681 cvtVOP3(Inst, Operands, OptionalIdx);
8682}
8683
8684void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8685 OptionalImmIndexMap &OptIdx) {
8686 const int Opc = Inst.getOpcode();
8687 const MCInstrDesc &Desc = MII.get(Opc);
8688
8689 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8690
8691 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8692 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8693 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8694 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8695 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8696 Inst.addOperand(Inst.getOperand(0));
8697 }
8698
8699 // Adding vdst_in operand is already covered for these DPP instructions in
8700 // cvtVOP3DPP.
8701 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8702 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8703 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8704 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8705 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8706 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8707 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8708 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8709 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8710 assert(!IsPacked);
8711 Inst.addOperand(Inst.getOperand(0));
8712 }
8713
8714 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8715 // instruction, and then figure out where to actually put the modifiers
8716
8717 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8718 if (OpSelIdx != -1) {
8719 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8720 }
8721
8722 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8723 if (OpSelHiIdx != -1) {
8724 int DefaultVal = IsPacked ? -1 : 0;
8725 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8726 DefaultVal);
8727 }
8728
8729 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8730 if (NegLoIdx != -1)
8731 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8732
8733 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8734 if (NegHiIdx != -1)
8735 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8736
8737 const int Ops[] = { AMDGPU::OpName::src0,
8738 AMDGPU::OpName::src1,
8739 AMDGPU::OpName::src2 };
8740 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8741 AMDGPU::OpName::src1_modifiers,
8742 AMDGPU::OpName::src2_modifiers };
8743
8744 unsigned OpSel = 0;
8745 unsigned OpSelHi = 0;
8746 unsigned NegLo = 0;
8747 unsigned NegHi = 0;
8748
8749 if (OpSelIdx != -1)
8750 OpSel = Inst.getOperand(OpSelIdx).getImm();
8751
8752 if (OpSelHiIdx != -1)
8753 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8754
8755 if (NegLoIdx != -1)
8756 NegLo = Inst.getOperand(NegLoIdx).getImm();
8757
8758 if (NegHiIdx != -1)
8759 NegHi = Inst.getOperand(NegHiIdx).getImm();
8760
8761 for (int J = 0; J < 3; ++J) {
8762 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8763 if (OpIdx == -1)
8764 break;
8765
8766 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8767
8768 if (ModIdx == -1)
8769 continue;
8770
8771 uint32_t ModVal = 0;
8772
8773 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8774 if (SrcOp.isReg() && getMRI()
8775 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8776 .contains(SrcOp.getReg())) {
8777 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8778 if (VGPRSuffixIsHi)
8779 ModVal |= SISrcMods::OP_SEL_0;
8780 } else {
8781 if ((OpSel & (1 << J)) != 0)
8782 ModVal |= SISrcMods::OP_SEL_0;
8783 }
8784
8785 if ((OpSelHi & (1 << J)) != 0)
8786 ModVal |= SISrcMods::OP_SEL_1;
8787
8788 if ((NegLo & (1 << J)) != 0)
8789 ModVal |= SISrcMods::NEG;
8790
8791 if ((NegHi & (1 << J)) != 0)
8792 ModVal |= SISrcMods::NEG_HI;
8793
8794 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8795 }
8796}
8797
8798void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8799 OptionalImmIndexMap OptIdx;
8800 cvtVOP3(Inst, Operands, OptIdx);
8801 cvtVOP3P(Inst, Operands, OptIdx);
8802}
8803
8805 unsigned i, unsigned Opc, unsigned OpName) {
8806 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8807 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8808 else
8809 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8810}
8811
8812void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8813 unsigned Opc = Inst.getOpcode();
8814
8815 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8816 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8817 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8818 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8819 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8820
8821 OptionalImmIndexMap OptIdx;
8822 for (unsigned i = 5; i < Operands.size(); ++i) {
8823 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8824 OptIdx[Op.getImmTy()] = i;
8825 }
8826
8827 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8828 addOptionalImmOperand(Inst, Operands, OptIdx,
8829 AMDGPUOperand::ImmTyIndexKey8bit);
8830
8831 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8832 addOptionalImmOperand(Inst, Operands, OptIdx,
8833 AMDGPUOperand::ImmTyIndexKey16bit);
8834
8835 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8836 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8837
8838 cvtVOP3P(Inst, Operands, OptIdx);
8839}
8840
8841//===----------------------------------------------------------------------===//
8842// VOPD
8843//===----------------------------------------------------------------------===//
8844
8845ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8846 if (!hasVOPD(getSTI()))
8847 return ParseStatus::NoMatch;
8848
8849 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8850 SMLoc S = getLoc();
8851 lex();
8852 lex();
8853 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8854 SMLoc OpYLoc = getLoc();
8855 StringRef OpYName;
8856 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8857 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8858 return ParseStatus::Success;
8859 }
8860 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8861 }
8862 return ParseStatus::NoMatch;
8863}
8864
8865// Create VOPD MCInst operands using parsed assembler operands.
8866void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8867 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8868 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8869 if (Op.isReg()) {
8870 Op.addRegOperands(Inst, 1);
8871 return;
8872 }
8873 if (Op.isImm()) {
8874 Op.addImmOperands(Inst, 1);
8875 return;
8876 }
8877 llvm_unreachable("Unhandled operand type in cvtVOPD");
8878 };
8879
8880 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8881
8882 // MCInst operands are ordered as follows:
8883 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8884
8885 for (auto CompIdx : VOPD::COMPONENTS) {
8886 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8887 }
8888
8889 for (auto CompIdx : VOPD::COMPONENTS) {
8890 const auto &CInfo = InstInfo[CompIdx];
8891 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8892 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8893 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8894 if (CInfo.hasSrc2Acc())
8895 addOp(CInfo.getIndexOfDstInParsedOperands());
8896 }
8897}
8898
8899//===----------------------------------------------------------------------===//
8900// dpp
8901//===----------------------------------------------------------------------===//
8902
8903bool AMDGPUOperand::isDPP8() const {
8904 return isImmTy(ImmTyDPP8);
8905}
8906
8907bool AMDGPUOperand::isDPPCtrl() const {
8908 using namespace AMDGPU::DPP;
8909
8910 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8911 if (result) {
8912 int64_t Imm = getImm();
8913 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8914 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8915 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8916 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8917 (Imm == DppCtrl::WAVE_SHL1) ||
8918 (Imm == DppCtrl::WAVE_ROL1) ||
8919 (Imm == DppCtrl::WAVE_SHR1) ||
8920 (Imm == DppCtrl::WAVE_ROR1) ||
8921 (Imm == DppCtrl::ROW_MIRROR) ||
8922 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8923 (Imm == DppCtrl::BCAST15) ||
8924 (Imm == DppCtrl::BCAST31) ||
8925 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8926 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8927 }
8928 return false;
8929}
8930
8931//===----------------------------------------------------------------------===//
8932// mAI
8933//===----------------------------------------------------------------------===//
8934
8935bool AMDGPUOperand::isBLGP() const {
8936 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8937}
8938
8939bool AMDGPUOperand::isCBSZ() const {
8940 return isImm() && getImmTy() == ImmTyCBSZ;
8941}
8942
8943bool AMDGPUOperand::isABID() const {
8944 return isImm() && getImmTy() == ImmTyABID;
8945}
8946
8947bool AMDGPUOperand::isS16Imm() const {
8948 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8949}
8950
8951bool AMDGPUOperand::isU16Imm() const {
8952 return isImmLiteral() && isUInt<16>(getImm());
8953}
8954
8955//===----------------------------------------------------------------------===//
8956// dim
8957//===----------------------------------------------------------------------===//
8958
8959bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8960 // We want to allow "dim:1D" etc.,
8961 // but the initial 1 is tokenized as an integer.
8962 std::string Token;
8963 if (isToken(AsmToken::Integer)) {
8964 SMLoc Loc = getToken().getEndLoc();
8965 Token = std::string(getTokenStr());
8966 lex();
8967 if (getLoc() != Loc)
8968 return false;
8969 }
8970
8971 StringRef Suffix;
8972 if (!parseId(Suffix))
8973 return false;
8974 Token += Suffix;
8975
8976 StringRef DimId = Token;
8977 if (DimId.starts_with("SQ_RSRC_IMG_"))
8978 DimId = DimId.drop_front(12);
8979
8981 if (!DimInfo)
8982 return false;
8983
8984 Encoding = DimInfo->Encoding;
8985 return true;
8986}
8987
8988ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8989 if (!isGFX10Plus())
8990 return ParseStatus::NoMatch;
8991
8992 SMLoc S = getLoc();
8993
8994 if (!trySkipId("dim", AsmToken::Colon))
8995 return ParseStatus::NoMatch;
8996
8997 unsigned Encoding;
8998 SMLoc Loc = getLoc();
8999 if (!parseDimId(Encoding))
9000 return Error(Loc, "invalid dim value");
9001
9002 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9003 AMDGPUOperand::ImmTyDim));
9004 return ParseStatus::Success;
9005}
9006
9007//===----------------------------------------------------------------------===//
9008// dpp
9009//===----------------------------------------------------------------------===//
9010
9011ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9012 SMLoc S = getLoc();
9013
9014 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9015 return ParseStatus::NoMatch;
9016
9017 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9018
9019 int64_t Sels[8];
9020
9021 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9022 return ParseStatus::Failure;
9023
9024 for (size_t i = 0; i < 8; ++i) {
9025 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9026 return ParseStatus::Failure;
9027
9028 SMLoc Loc = getLoc();
9029 if (getParser().parseAbsoluteExpression(Sels[i]))
9030 return ParseStatus::Failure;
9031 if (0 > Sels[i] || 7 < Sels[i])
9032 return Error(Loc, "expected a 3-bit value");
9033 }
9034
9035 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9036 return ParseStatus::Failure;
9037
9038 unsigned DPP8 = 0;
9039 for (size_t i = 0; i < 8; ++i)
9040 DPP8 |= (Sels[i] << (i * 3));
9041
9042 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9043 return ParseStatus::Success;
9044}
9045
9046bool
9047AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9048 const OperandVector &Operands) {
9049 if (Ctrl == "row_newbcast")
9050 return isGFX90A();
9051
9052 if (Ctrl == "row_share" ||
9053 Ctrl == "row_xmask")
9054 return isGFX10Plus();
9055
9056 if (Ctrl == "wave_shl" ||
9057 Ctrl == "wave_shr" ||
9058 Ctrl == "wave_rol" ||
9059 Ctrl == "wave_ror" ||
9060 Ctrl == "row_bcast")
9061 return isVI() || isGFX9();
9062
9063 return Ctrl == "row_mirror" ||
9064 Ctrl == "row_half_mirror" ||
9065 Ctrl == "quad_perm" ||
9066 Ctrl == "row_shl" ||
9067 Ctrl == "row_shr" ||
9068 Ctrl == "row_ror";
9069}
9070
9071int64_t
9072AMDGPUAsmParser::parseDPPCtrlPerm() {
9073 // quad_perm:[%d,%d,%d,%d]
9074
9075 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9076 return -1;
9077
9078 int64_t Val = 0;
9079 for (int i = 0; i < 4; ++i) {
9080 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9081 return -1;
9082
9083 int64_t Temp;
9084 SMLoc Loc = getLoc();
9085 if (getParser().parseAbsoluteExpression(Temp))
9086 return -1;
9087 if (Temp < 0 || Temp > 3) {
9088 Error(Loc, "expected a 2-bit value");
9089 return -1;
9090 }
9091
9092 Val += (Temp << i * 2);
9093 }
9094
9095 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9096 return -1;
9097
9098 return Val;
9099}
9100
9101int64_t
9102AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9103 using namespace AMDGPU::DPP;
9104
9105 // sel:%d
9106
9107 int64_t Val;
9108 SMLoc Loc = getLoc();
9109
9110 if (getParser().parseAbsoluteExpression(Val))
9111 return -1;
9112
9113 struct DppCtrlCheck {
9114 int64_t Ctrl;
9115 int Lo;
9116 int Hi;
9117 };
9118
9119 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9120 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9121 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9122 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9123 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9124 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9125 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9126 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9127 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9128 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9129 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9130 .Default({-1, 0, 0});
9131
9132 bool Valid;
9133 if (Check.Ctrl == -1) {
9134 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9135 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9136 } else {
9137 Valid = Check.Lo <= Val && Val <= Check.Hi;
9138 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9139 }
9140
9141 if (!Valid) {
9142 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9143 return -1;
9144 }
9145
9146 return Val;
9147}
9148
9149ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9150 using namespace AMDGPU::DPP;
9151
9152 if (!isToken(AsmToken::Identifier) ||
9153 !isSupportedDPPCtrl(getTokenStr(), Operands))
9154 return ParseStatus::NoMatch;
9155
9156 SMLoc S = getLoc();
9157 int64_t Val = -1;
9159
9160 parseId(Ctrl);
9161
9162 if (Ctrl == "row_mirror") {
9163 Val = DppCtrl::ROW_MIRROR;
9164 } else if (Ctrl == "row_half_mirror") {
9165 Val = DppCtrl::ROW_HALF_MIRROR;
9166 } else {
9167 if (skipToken(AsmToken::Colon, "expected a colon")) {
9168 if (Ctrl == "quad_perm") {
9169 Val = parseDPPCtrlPerm();
9170 } else {
9171 Val = parseDPPCtrlSel(Ctrl);
9172 }
9173 }
9174 }
9175
9176 if (Val == -1)
9177 return ParseStatus::Failure;
9178
9179 Operands.push_back(
9180 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9181 return ParseStatus::Success;
9182}
9183
9184void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9185 bool IsDPP8) {
9186 OptionalImmIndexMap OptionalIdx;
9187 unsigned Opc = Inst.getOpcode();
9188 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9189
9190 // MAC instructions are special because they have 'old'
9191 // operand which is not tied to dst (but assumed to be).
9192 // They also have dummy unused src2_modifiers.
9193 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9194 int Src2ModIdx =
9195 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9196 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9197 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9198
9199 unsigned I = 1;
9200 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9201 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9202 }
9203
9204 int Fi = 0;
9205 for (unsigned E = Operands.size(); I != E; ++I) {
9206
9207 if (IsMAC) {
9208 int NumOperands = Inst.getNumOperands();
9209 if (OldIdx == NumOperands) {
9210 // Handle old operand
9211 constexpr int DST_IDX = 0;
9212 Inst.addOperand(Inst.getOperand(DST_IDX));
9213 } else if (Src2ModIdx == NumOperands) {
9214 // Add unused dummy src2_modifiers
9216 }
9217 }
9218
9219 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9220 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9221 Inst.addOperand(Inst.getOperand(0));
9222 }
9223
9224 bool IsVOP3CvtSrDpp =
9225 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9226 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9227 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9228 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9229 if (IsVOP3CvtSrDpp) {
9230 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9233 }
9234 }
9235
9236 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9238 if (TiedTo != -1) {
9239 assert((unsigned)TiedTo < Inst.getNumOperands());
9240 // handle tied old or src2 for MAC instructions
9241 Inst.addOperand(Inst.getOperand(TiedTo));
9242 }
9243 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9244 // Add the register arguments
9245 if (IsDPP8 && Op.isDppFI()) {
9246 Fi = Op.getImm();
9247 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9248 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9249 } else if (Op.isReg()) {
9250 Op.addRegOperands(Inst, 1);
9251 } else if (Op.isImm() &&
9252 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9253 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9254 Op.addImmOperands(Inst, 1);
9255 } else if (Op.isImm()) {
9256 OptionalIdx[Op.getImmTy()] = I;
9257 } else {
9258 llvm_unreachable("unhandled operand type");
9259 }
9260 }
9261
9262 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9263 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9264 AMDGPUOperand::ImmTyByteSel);
9265
9266 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
9268
9269 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9270 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9271
9272 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9273 cvtVOP3P(Inst, Operands, OptionalIdx);
9274 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9275 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9276 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9277 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9278 }
9279
9280 if (IsDPP8) {
9281 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9282 using namespace llvm::AMDGPU::DPP;
9283 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9284 } else {
9285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9289
9290 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9291 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9292 AMDGPUOperand::ImmTyDppFI);
9293 }
9294}
9295
9296void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9297 OptionalImmIndexMap OptionalIdx;
9298
9299 unsigned I = 1;
9300 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9301 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9302 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9303 }
9304
9305 int Fi = 0;
9306 for (unsigned E = Operands.size(); I != E; ++I) {
9307 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9309 if (TiedTo != -1) {
9310 assert((unsigned)TiedTo < Inst.getNumOperands());
9311 // handle tied old or src2 for MAC instructions
9312 Inst.addOperand(Inst.getOperand(TiedTo));
9313 }
9314 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9315 // Add the register arguments
9316 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9317 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9318 // Skip it.
9319 continue;
9320 }
9321
9322 if (IsDPP8) {
9323 if (Op.isDPP8()) {
9324 Op.addImmOperands(Inst, 1);
9325 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9326 Op.addRegWithFPInputModsOperands(Inst, 2);
9327 } else if (Op.isDppFI()) {
9328 Fi = Op.getImm();
9329 } else if (Op.isReg()) {
9330 Op.addRegOperands(Inst, 1);
9331 } else {
9332 llvm_unreachable("Invalid operand type");
9333 }
9334 } else {
9336 Op.addRegWithFPInputModsOperands(Inst, 2);
9337 } else if (Op.isReg()) {
9338 Op.addRegOperands(Inst, 1);
9339 } else if (Op.isDPPCtrl()) {
9340 Op.addImmOperands(Inst, 1);
9341 } else if (Op.isImm()) {
9342 // Handle optional arguments
9343 OptionalIdx[Op.getImmTy()] = I;
9344 } else {
9345 llvm_unreachable("Invalid operand type");
9346 }
9347 }
9348 }
9349
9350 if (IsDPP8) {
9351 using namespace llvm::AMDGPU::DPP;
9352 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9353 } else {
9354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9355 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9357 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9358 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9359 AMDGPUOperand::ImmTyDppFI);
9360 }
9361 }
9362}
9363
9364//===----------------------------------------------------------------------===//
9365// sdwa
9366//===----------------------------------------------------------------------===//
9367
9368ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9369 StringRef Prefix,
9370 AMDGPUOperand::ImmTy Type) {
9371 using namespace llvm::AMDGPU::SDWA;
9372
9373 SMLoc S = getLoc();
9375
9376 SMLoc StringLoc;
9377 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9378 if (!Res.isSuccess())
9379 return Res;
9380
9381 int64_t Int;
9383 .Case("BYTE_0", SdwaSel::BYTE_0)
9384 .Case("BYTE_1", SdwaSel::BYTE_1)
9385 .Case("BYTE_2", SdwaSel::BYTE_2)
9386 .Case("BYTE_3", SdwaSel::BYTE_3)
9387 .Case("WORD_0", SdwaSel::WORD_0)
9388 .Case("WORD_1", SdwaSel::WORD_1)
9389 .Case("DWORD", SdwaSel::DWORD)
9390 .Default(0xffffffff);
9391
9392 if (Int == 0xffffffff)
9393 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9394
9395 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9396 return ParseStatus::Success;
9397}
9398
9399ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9400 using namespace llvm::AMDGPU::SDWA;
9401
9402 SMLoc S = getLoc();
9404
9405 SMLoc StringLoc;
9406 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9407 if (!Res.isSuccess())
9408 return Res;
9409
9410 int64_t Int;
9412 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9413 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9414 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9415 .Default(0xffffffff);
9416
9417 if (Int == 0xffffffff)
9418 return Error(StringLoc, "invalid dst_unused value");
9419
9420 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9421 return ParseStatus::Success;
9422}
9423
9424void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9425 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9426}
9427
9428void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9429 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9430}
9431
9432void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9433 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9434}
9435
9436void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9437 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9438}
9439
9440void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9441 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9442}
9443
9444void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9445 uint64_t BasicInstType,
9446 bool SkipDstVcc,
9447 bool SkipSrcVcc) {
9448 using namespace llvm::AMDGPU::SDWA;
9449
9450 OptionalImmIndexMap OptionalIdx;
9451 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9452 bool SkippedVcc = false;
9453
9454 unsigned I = 1;
9455 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9456 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9457 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9458 }
9459
9460 for (unsigned E = Operands.size(); I != E; ++I) {
9461 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9462 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9463 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9464 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9465 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9466 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9467 // Skip VCC only if we didn't skip it on previous iteration.
9468 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9469 if (BasicInstType == SIInstrFlags::VOP2 &&
9470 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9471 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9472 SkippedVcc = true;
9473 continue;
9474 } else if (BasicInstType == SIInstrFlags::VOPC &&
9475 Inst.getNumOperands() == 0) {
9476 SkippedVcc = true;
9477 continue;
9478 }
9479 }
9481 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9482 } else if (Op.isImm()) {
9483 // Handle optional arguments
9484 OptionalIdx[Op.getImmTy()] = I;
9485 } else {
9486 llvm_unreachable("Invalid operand type");
9487 }
9488 SkippedVcc = false;
9489 }
9490
9491 const unsigned Opc = Inst.getOpcode();
9492 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9493 Opc != AMDGPU::V_NOP_sdwa_vi) {
9494 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9495 switch (BasicInstType) {
9496 case SIInstrFlags::VOP1:
9497 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9498 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9499 AMDGPUOperand::ImmTyClampSI, 0);
9500
9501 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9502 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9503 AMDGPUOperand::ImmTyOModSI, 0);
9504
9505 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9506 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9507 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9508
9509 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9510 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9511 AMDGPUOperand::ImmTySDWADstUnused,
9512 DstUnused::UNUSED_PRESERVE);
9513
9514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9515 break;
9516
9517 case SIInstrFlags::VOP2:
9518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9519
9520 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9521 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9522
9523 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9525 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9527 break;
9528
9529 case SIInstrFlags::VOPC:
9530 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9533 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9534 break;
9535
9536 default:
9537 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9538 }
9539 }
9540
9541 // special case v_mac_{f16, f32}:
9542 // it has src2 register operand that is tied to dst operand
9543 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9544 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9545 auto it = Inst.begin();
9546 std::advance(
9547 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9548 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9549 }
9550}
9551
9552/// Force static initialization.
9556}
9557
9558#define GET_REGISTER_MATCHER
9559#define GET_MATCHER_IMPLEMENTATION
9560#define GET_MNEMONIC_SPELL_CHECKER
9561#define GET_MNEMONIC_CHECKER
9562#include "AMDGPUGenAsmMatcher.inc"
9563
9564ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9565 unsigned MCK) {
9566 switch (MCK) {
9567 case MCK_addr64:
9568 return parseTokenOp("addr64", Operands);
9569 case MCK_done:
9570 return parseTokenOp("done", Operands);
9571 case MCK_idxen:
9572 return parseTokenOp("idxen", Operands);
9573 case MCK_lds:
9574 return parseTokenOp("lds", Operands);
9575 case MCK_offen:
9576 return parseTokenOp("offen", Operands);
9577 case MCK_off:
9578 return parseTokenOp("off", Operands);
9579 case MCK_row_95_en:
9580 return parseTokenOp("row_en", Operands);
9581 case MCK_gds:
9582 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9583 case MCK_tfe:
9584 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9585 }
9586 return tryCustomParseOperand(Operands, MCK);
9587}
9588
9589// This function should be defined after auto-generated include so that we have
9590// MatchClassKind enum defined
9591unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9592 unsigned Kind) {
9593 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9594 // But MatchInstructionImpl() expects to meet token and fails to validate
9595 // operand. This method checks if we are given immediate operand but expect to
9596 // get corresponding token.
9597 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9598 switch (Kind) {
9599 case MCK_addr64:
9600 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9601 case MCK_gds:
9602 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9603 case MCK_lds:
9604 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9605 case MCK_idxen:
9606 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9607 case MCK_offen:
9608 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9609 case MCK_tfe:
9610 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9611 case MCK_SSrc_b32:
9612 // When operands have expression values, they will return true for isToken,
9613 // because it is not possible to distinguish between a token and an
9614 // expression at parse time. MatchInstructionImpl() will always try to
9615 // match an operand as a token, when isToken returns true, and when the
9616 // name of the expression is not a valid token, the match will fail,
9617 // so we need to handle it here.
9618 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9619 case MCK_SSrc_f32:
9620 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9621 case MCK_SOPPBrTarget:
9622 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9623 case MCK_VReg32OrOff:
9624 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9625 case MCK_InterpSlot:
9626 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9627 case MCK_InterpAttr:
9628 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9629 case MCK_InterpAttrChan:
9630 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9631 case MCK_SReg_64:
9632 case MCK_SReg_64_XEXEC:
9633 // Null is defined as a 32-bit register but
9634 // it should also be enabled with 64-bit operands.
9635 // The following code enables it for SReg_64 operands
9636 // used as source and destination. Remaining source
9637 // operands are handled in isInlinableImm.
9638 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9639 default:
9640 return Match_InvalidOperand;
9641 }
9642}
9643
9644//===----------------------------------------------------------------------===//
9645// endpgm
9646//===----------------------------------------------------------------------===//
9647
9648ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9649 SMLoc S = getLoc();
9650 int64_t Imm = 0;
9651
9652 if (!parseExpr(Imm)) {
9653 // The operand is optional, if not present default to 0
9654 Imm = 0;
9655 }
9656
9657 if (!isUInt<16>(Imm))
9658 return Error(S, "expected a 16-bit value");
9659
9660 Operands.push_back(
9661 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9662 return ParseStatus::Success;
9663}
9664
9665bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9666
9667//===----------------------------------------------------------------------===//
9668// LDSDIR
9669//===----------------------------------------------------------------------===//
9670
9671bool AMDGPUOperand::isWaitVDST() const { return isImmTy(ImmTyWaitVDST); }
9672
9673bool AMDGPUOperand::isWaitVAVDst() const { return isImmTy(ImmTyWaitVAVDst); }
9674
9675bool AMDGPUOperand::isWaitVMVSrc() const { return isImmTy(ImmTyWaitVMVSrc); }
9676
9677//===----------------------------------------------------------------------===//
9678// VINTERP
9679//===----------------------------------------------------------------------===//
9680
9681bool AMDGPUOperand::isWaitEXP() const { return isImmTy(ImmTyWaitEXP); }
9682
9683//===----------------------------------------------------------------------===//
9684// Split Barrier
9685//===----------------------------------------------------------------------===//
9686
9687bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
LLVMContext & Context
#define P(N)
#define G_00B848_FWD_PROGRESS(x)
Definition: SIDefines.h:1157
#define G_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1154
#define G_00B848_IEEE_MODE(x)
Definition: SIDefines.h:1148
#define G_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:1139
#define G_00B848_WGP_MODE(x)
Definition: SIDefines.h:1151
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:352
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:81
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:455
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:459
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:304
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:93
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:845
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:647
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:601
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:269
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1336
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
const uint64_t Version
Definition: InstrProf.h:1153
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1071
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:138
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:177
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:143
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
#define N
RegisterKind Kind
StringLiteral Name
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:125
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...