LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 } else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
106 } else {
107 return 0;
108 }
109 }
110
111 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
112 };
113
114 enum ImmTy {
115 ImmTyNone,
116 ImmTyGDS,
117 ImmTyLDS,
118 ImmTyOffen,
119 ImmTyIdxen,
120 ImmTyAddr64,
121 ImmTyOffset,
122 ImmTyInstOffset,
123 ImmTyOffset0,
124 ImmTyOffset1,
125 ImmTySMEMOffsetMod,
126 ImmTyCPol,
127 ImmTyTFE,
128 ImmTyD16,
129 ImmTyClampSI,
130 ImmTyOModSI,
131 ImmTySDWADstSel,
132 ImmTySDWASrc0Sel,
133 ImmTySDWASrc1Sel,
134 ImmTySDWADstUnused,
135 ImmTyDMask,
136 ImmTyDim,
137 ImmTyUNorm,
138 ImmTyDA,
139 ImmTyR128A16,
140 ImmTyA16,
141 ImmTyLWE,
142 ImmTyExpTgt,
143 ImmTyExpCompr,
144 ImmTyExpVM,
145 ImmTyFORMAT,
146 ImmTyHwreg,
147 ImmTyOff,
148 ImmTySendMsg,
149 ImmTyInterpSlot,
150 ImmTyInterpAttr,
151 ImmTyInterpAttrChan,
152 ImmTyOpSel,
153 ImmTyOpSelHi,
154 ImmTyNegLo,
155 ImmTyNegHi,
156 ImmTyIndexKey8bit,
157 ImmTyIndexKey16bit,
158 ImmTyDPP8,
159 ImmTyDppCtrl,
160 ImmTyDppRowMask,
161 ImmTyDppBankMask,
162 ImmTyDppBoundCtrl,
163 ImmTyDppFI,
164 ImmTySwizzle,
165 ImmTyGprIdxMode,
166 ImmTyHigh,
167 ImmTyBLGP,
168 ImmTyCBSZ,
169 ImmTyABID,
170 ImmTyEndpgm,
171 ImmTyWaitVDST,
172 ImmTyWaitEXP,
173 ImmTyWaitVAVDst,
174 ImmTyWaitVMVSrc,
175 };
176
177 // Immediate operand kind.
178 // It helps to identify the location of an offending operand after an error.
179 // Note that regular literals and mandatory literals (KImm) must be handled
180 // differently. When looking for an offending operand, we should usually
181 // ignore mandatory literals because they are part of the instruction and
182 // cannot be changed. Report location of mandatory operands only for VOPD,
183 // when both OpX and OpY have a KImm and there are no other literals.
184 enum ImmKindTy {
185 ImmKindTyNone,
186 ImmKindTyLiteral,
187 ImmKindTyMandatoryLiteral,
188 ImmKindTyConst,
189 };
190
191private:
192 struct TokOp {
193 const char *Data;
194 unsigned Length;
195 };
196
197 struct ImmOp {
198 int64_t Val;
199 ImmTy Type;
200 bool IsFPImm;
201 mutable ImmKindTy Kind;
202 Modifiers Mods;
203 };
204
205 struct RegOp {
206 unsigned RegNo;
207 Modifiers Mods;
208 };
209
210 union {
211 TokOp Tok;
212 ImmOp Imm;
213 RegOp Reg;
214 const MCExpr *Expr;
215 };
216
217public:
218 bool isToken() const override { return Kind == Token; }
219
220 bool isSymbolRefExpr() const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
222 }
223
224 bool isImm() const override {
225 return Kind == Immediate;
226 }
227
228 void setImmKindNone() const {
229 assert(isImm());
230 Imm.Kind = ImmKindTyNone;
231 }
232
233 void setImmKindLiteral() const {
234 assert(isImm());
235 Imm.Kind = ImmKindTyLiteral;
236 }
237
238 void setImmKindMandatoryLiteral() const {
239 assert(isImm());
240 Imm.Kind = ImmKindTyMandatoryLiteral;
241 }
242
243 void setImmKindConst() const {
244 assert(isImm());
245 Imm.Kind = ImmKindTyConst;
246 }
247
248 bool IsImmKindLiteral() const {
249 return isImm() && Imm.Kind == ImmKindTyLiteral;
250 }
251
252 bool IsImmKindMandatoryLiteral() const {
253 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
254 }
255
256 bool isImmKindConst() const {
257 return isImm() && Imm.Kind == ImmKindTyConst;
258 }
259
260 bool isInlinableImm(MVT type) const;
261 bool isLiteralImm(MVT type) const;
262
263 bool isRegKind() const {
264 return Kind == Register;
265 }
266
267 bool isReg() const override {
268 return isRegKind() && !hasModifiers();
269 }
270
271 bool isRegOrInline(unsigned RCID, MVT type) const {
272 return isRegClass(RCID) || isInlinableImm(type);
273 }
274
275 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
277 }
278
279 bool isRegOrImmWithInt16InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281 }
282
283 bool isRegOrImmWithIntT16InputMods() const {
284 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
285 }
286
287 bool isRegOrImmWithInt32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
289 }
290
291 bool isRegOrInlineImmWithInt16InputMods() const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
293 }
294
295 bool isRegOrInlineImmWithInt32InputMods() const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
297 }
298
299 bool isRegOrImmWithInt64InputMods() const {
300 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
301 }
302
303 bool isRegOrImmWithFP16InputMods() const {
304 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
305 }
306
307 bool isRegOrImmWithFPT16InputMods() const {
308 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
309 }
310
311 bool isRegOrImmWithFP32InputMods() const {
312 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
313 }
314
315 bool isRegOrImmWithFP64InputMods() const {
316 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
317 }
318
319 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
322 }
323
324 bool isRegOrInlineImmWithFP32InputMods() const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
326 }
327
328 bool isPackedFP16InputMods() const {
329 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
330 }
331
332 bool isVReg() const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
342 }
343
344 bool isVReg32() const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
346 }
347
348 bool isVReg32OrOff() const {
349 return isOff() || isVReg32();
350 }
351
352 bool isNull() const {
353 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
354 }
355
356 bool isVRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDMask() const { return isImmTy(ImmTyDMask); }
379 bool isDim() const { return isImmTy(ImmTyDim); }
380 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
381 bool isOff() const { return isImmTy(ImmTyOff); }
382 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
383 bool isOffen() const { return isImmTy(ImmTyOffen); }
384 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
385 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
386 bool isOffset() const { return isImmTy(ImmTyOffset); }
387 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
388 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
389 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
390 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
391 bool isGDS() const { return isImmTy(ImmTyGDS); }
392 bool isLDS() const { return isImmTy(ImmTyLDS); }
393 bool isCPol() const { return isImmTy(ImmTyCPol); }
394 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
395 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
396 bool isTFE() const { return isImmTy(ImmTyTFE); }
397 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
399 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
400 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
401 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
402 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
403 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
404 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
405 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
406 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
407 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
408 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
409 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
410 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
411 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
412 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
413
414 bool isRegOrImm() const {
415 return isReg() || isImm();
416 }
417
418 bool isRegClass(unsigned RCID) const;
419
420 bool isInlineValue() const;
421
422 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
423 return isRegOrInline(RCID, type) && !hasModifiers();
424 }
425
426 bool isSCSrcB16() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
428 }
429
430 bool isSCSrcV2B16() const {
431 return isSCSrcB16();
432 }
433
434 bool isSCSrc_b32() const {
435 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
436 }
437
438 bool isSCSrc_b64() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
440 }
441
442 bool isBoolReg() const;
443
444 bool isSCSrcF16() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
446 }
447
448 bool isSCSrcV2F16() const {
449 return isSCSrcF16();
450 }
451
452 bool isSCSrcF32() const {
453 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
454 }
455
456 bool isSCSrcF64() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
458 }
459
460 bool isSSrc_b32() const {
461 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
462 }
463
464 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
465
466 bool isSSrcV2B16() const {
467 llvm_unreachable("cannot happen");
468 return isSSrc_b16();
469 }
470
471 bool isSSrc_b64() const {
472 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
473 // See isVSrc64().
474 return isSCSrc_b64() || isLiteralImm(MVT::i64);
475 }
476
477 bool isSSrc_f32() const {
478 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
479 }
480
481 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
482
483 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
484
485 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
486
487 bool isSSrcV2F16() const {
488 llvm_unreachable("cannot happen");
489 return isSSrc_f16();
490 }
491
492 bool isSSrcV2FP32() const {
493 llvm_unreachable("cannot happen");
494 return isSSrc_f32();
495 }
496
497 bool isSCSrcV2FP32() const {
498 llvm_unreachable("cannot happen");
499 return isSCSrcF32();
500 }
501
502 bool isSSrcV2INT32() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_b32();
505 }
506
507 bool isSCSrcV2INT32() const {
508 llvm_unreachable("cannot happen");
509 return isSCSrc_b32();
510 }
511
512 bool isSSrcOrLds_b32() const {
513 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
514 isLiteralImm(MVT::i32) || isExpr();
515 }
516
517 bool isVCSrc_b32() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
519 }
520
521 bool isVCSrcB64() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
523 }
524
525 bool isVCSrcTB16() const {
526 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
527 }
528
529 bool isVCSrcTB16_Lo128() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
531 }
532
533 bool isVCSrcFake16B16_Lo128() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
535 }
536
537 bool isVCSrc_b16() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
539 }
540
541 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
542
543 bool isVCSrc_f32() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
545 }
546
547 bool isVCSrcF64() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
549 }
550
551 bool isVCSrcTBF16() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
553 }
554
555 bool isVCSrcTF16() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
557 }
558
559 bool isVCSrcTBF16_Lo128() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
561 }
562
563 bool isVCSrcTF16_Lo128() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
565 }
566
567 bool isVCSrcFake16BF16_Lo128() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
569 }
570
571 bool isVCSrcFake16F16_Lo128() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
573 }
574
575 bool isVCSrc_bf16() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
577 }
578
579 bool isVCSrc_f16() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
581 }
582
583 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
584
585 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
586
587 bool isVSrc_b32() const {
588 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
589 }
590
591 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
592
593 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
594
595 bool isVSrcT_b16_Lo128() const {
596 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
597 }
598
599 bool isVSrcFake16_b16_Lo128() const {
600 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
601 }
602
603 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
604
605 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
606
607 bool isVCSrcV2FP32() const {
608 return isVCSrcF64();
609 }
610
611 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
612
613 bool isVCSrcV2INT32() const {
614 return isVCSrcB64();
615 }
616
617 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
618
619 bool isVSrc_f32() const {
620 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
621 }
622
623 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
624
625 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
626
627 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
628
629 bool isVSrcT_bf16_Lo128() const {
630 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
631 }
632
633 bool isVSrcT_f16_Lo128() const {
634 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
635 }
636
637 bool isVSrcFake16_bf16_Lo128() const {
638 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
639 }
640
641 bool isVSrcFake16_f16_Lo128() const {
642 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
643 }
644
645 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
646
647 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
648
649 bool isVSrc_v2bf16() const {
650 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
651 }
652
653 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
654
655 bool isVISrcB32() const {
656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
657 }
658
659 bool isVISrcB16() const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
661 }
662
663 bool isVISrcV2B16() const {
664 return isVISrcB16();
665 }
666
667 bool isVISrcF32() const {
668 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
669 }
670
671 bool isVISrcF16() const {
672 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
673 }
674
675 bool isVISrcV2F16() const {
676 return isVISrcF16() || isVISrcB32();
677 }
678
679 bool isVISrc_64_bf16() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
681 }
682
683 bool isVISrc_64_f16() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
685 }
686
687 bool isVISrc_64_b32() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
689 }
690
691 bool isVISrc_64B64() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
693 }
694
695 bool isVISrc_64_f64() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
697 }
698
699 bool isVISrc_64V2FP32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
701 }
702
703 bool isVISrc_64V2INT32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
705 }
706
707 bool isVISrc_256_b32() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
709 }
710
711 bool isVISrc_256_f32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
713 }
714
715 bool isVISrc_256B64() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
717 }
718
719 bool isVISrc_256_f64() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
721 }
722
723 bool isVISrc_128B16() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
725 }
726
727 bool isVISrc_128V2B16() const {
728 return isVISrc_128B16();
729 }
730
731 bool isVISrc_128_b32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
733 }
734
735 bool isVISrc_128_f32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
737 }
738
739 bool isVISrc_256V2FP32() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
741 }
742
743 bool isVISrc_256V2INT32() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
745 }
746
747 bool isVISrc_512_b32() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
749 }
750
751 bool isVISrc_512B16() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
753 }
754
755 bool isVISrc_512V2B16() const {
756 return isVISrc_512B16();
757 }
758
759 bool isVISrc_512_f32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
761 }
762
763 bool isVISrc_512F16() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
765 }
766
767 bool isVISrc_512V2F16() const {
768 return isVISrc_512F16() || isVISrc_512_b32();
769 }
770
771 bool isVISrc_1024_b32() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
773 }
774
775 bool isVISrc_1024B16() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
777 }
778
779 bool isVISrc_1024V2B16() const {
780 return isVISrc_1024B16();
781 }
782
783 bool isVISrc_1024_f32() const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
785 }
786
787 bool isVISrc_1024F16() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
789 }
790
791 bool isVISrc_1024V2F16() const {
792 return isVISrc_1024F16() || isVISrc_1024_b32();
793 }
794
795 bool isAISrcB32() const {
796 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
797 }
798
799 bool isAISrcB16() const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
801 }
802
803 bool isAISrcV2B16() const {
804 return isAISrcB16();
805 }
806
807 bool isAISrcF32() const {
808 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
809 }
810
811 bool isAISrcF16() const {
812 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
813 }
814
815 bool isAISrcV2F16() const {
816 return isAISrcF16() || isAISrcB32();
817 }
818
819 bool isAISrc_64B64() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
821 }
822
823 bool isAISrc_64_f64() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
825 }
826
827 bool isAISrc_128_b32() const {
828 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
829 }
830
831 bool isAISrc_128B16() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
833 }
834
835 bool isAISrc_128V2B16() const {
836 return isAISrc_128B16();
837 }
838
839 bool isAISrc_128_f32() const {
840 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
841 }
842
843 bool isAISrc_128F16() const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
845 }
846
847 bool isAISrc_128V2F16() const {
848 return isAISrc_128F16() || isAISrc_128_b32();
849 }
850
851 bool isVISrc_128_bf16() const {
852 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
853 }
854
855 bool isVISrc_128_f16() const {
856 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
857 }
858
859 bool isVISrc_128V2F16() const {
860 return isVISrc_128_f16() || isVISrc_128_b32();
861 }
862
863 bool isAISrc_256B64() const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
865 }
866
867 bool isAISrc_256_f64() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
869 }
870
871 bool isAISrc_512_b32() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
873 }
874
875 bool isAISrc_512B16() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
877 }
878
879 bool isAISrc_512V2B16() const {
880 return isAISrc_512B16();
881 }
882
883 bool isAISrc_512_f32() const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
885 }
886
887 bool isAISrc_512F16() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
889 }
890
891 bool isAISrc_512V2F16() const {
892 return isAISrc_512F16() || isAISrc_512_b32();
893 }
894
895 bool isAISrc_1024_b32() const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
897 }
898
899 bool isAISrc_1024B16() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
901 }
902
903 bool isAISrc_1024V2B16() const {
904 return isAISrc_1024B16();
905 }
906
907 bool isAISrc_1024_f32() const {
908 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
909 }
910
911 bool isAISrc_1024F16() const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
913 }
914
915 bool isAISrc_1024V2F16() const {
916 return isAISrc_1024F16() || isAISrc_1024_b32();
917 }
918
919 bool isKImmFP32() const {
920 return isLiteralImm(MVT::f32);
921 }
922
923 bool isKImmFP16() const {
924 return isLiteralImm(MVT::f16);
925 }
926
927 bool isMem() const override {
928 return false;
929 }
930
931 bool isExpr() const {
932 return Kind == Expression;
933 }
934
935 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
936
937 bool isSWaitCnt() const;
938 bool isDepCtr() const;
939 bool isSDelayALU() const;
940 bool isHwreg() const;
941 bool isSendMsg() const;
942 bool isSplitBarrier() const;
943 bool isSwizzle() const;
944 bool isSMRDOffset8() const;
945 bool isSMEMOffset() const;
946 bool isSMRDLiteralOffset() const;
947 bool isDPP8() const;
948 bool isDPPCtrl() const;
949 bool isBLGP() const;
950 bool isCBSZ() const;
951 bool isABID() const;
952 bool isGPRIdxMode() const;
953 bool isS16Imm() const;
954 bool isU16Imm() const;
955 bool isEndpgm() const;
956 bool isWaitVDST() const;
957 bool isWaitEXP() const;
958 bool isWaitVAVDst() const;
959 bool isWaitVMVSrc() const;
960
961 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
962 return std::bind(P, *this);
963 }
964
965 StringRef getToken() const {
966 assert(isToken());
967 return StringRef(Tok.Data, Tok.Length);
968 }
969
970 int64_t getImm() const {
971 assert(isImm());
972 return Imm.Val;
973 }
974
975 void setImm(int64_t Val) {
976 assert(isImm());
977 Imm.Val = Val;
978 }
979
980 ImmTy getImmTy() const {
981 assert(isImm());
982 return Imm.Type;
983 }
984
985 MCRegister getReg() const override {
986 assert(isRegKind());
987 return Reg.RegNo;
988 }
989
990 SMLoc getStartLoc() const override {
991 return StartLoc;
992 }
993
994 SMLoc getEndLoc() const override {
995 return EndLoc;
996 }
997
998 SMRange getLocRange() const {
999 return SMRange(StartLoc, EndLoc);
1000 }
1001
1002 Modifiers getModifiers() const {
1003 assert(isRegKind() || isImmTy(ImmTyNone));
1004 return isRegKind() ? Reg.Mods : Imm.Mods;
1005 }
1006
1007 void setModifiers(Modifiers Mods) {
1008 assert(isRegKind() || isImmTy(ImmTyNone));
1009 if (isRegKind())
1010 Reg.Mods = Mods;
1011 else
1012 Imm.Mods = Mods;
1013 }
1014
1015 bool hasModifiers() const {
1016 return getModifiers().hasModifiers();
1017 }
1018
1019 bool hasFPModifiers() const {
1020 return getModifiers().hasFPModifiers();
1021 }
1022
1023 bool hasIntModifiers() const {
1024 return getModifiers().hasIntModifiers();
1025 }
1026
1027 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1028
1029 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1030
1031 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1032
1033 void addRegOperands(MCInst &Inst, unsigned N) const;
1034
1035 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1036 if (isRegKind())
1037 addRegOperands(Inst, N);
1038 else
1039 addImmOperands(Inst, N);
1040 }
1041
1042 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1043 Modifiers Mods = getModifiers();
1044 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1045 if (isRegKind()) {
1046 addRegOperands(Inst, N);
1047 } else {
1048 addImmOperands(Inst, N, false);
1049 }
1050 }
1051
1052 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1053 assert(!hasIntModifiers());
1054 addRegOrImmWithInputModsOperands(Inst, N);
1055 }
1056
1057 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1058 assert(!hasFPModifiers());
1059 addRegOrImmWithInputModsOperands(Inst, N);
1060 }
1061
1062 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1063 Modifiers Mods = getModifiers();
1064 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1065 assert(isRegKind());
1066 addRegOperands(Inst, N);
1067 }
1068
1069 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1070 assert(!hasIntModifiers());
1071 addRegWithInputModsOperands(Inst, N);
1072 }
1073
1074 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1075 assert(!hasFPModifiers());
1076 addRegWithInputModsOperands(Inst, N);
1077 }
1078
1079 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1080 // clang-format off
1081 switch (Type) {
1082 case ImmTyNone: OS << "None"; break;
1083 case ImmTyGDS: OS << "GDS"; break;
1084 case ImmTyLDS: OS << "LDS"; break;
1085 case ImmTyOffen: OS << "Offen"; break;
1086 case ImmTyIdxen: OS << "Idxen"; break;
1087 case ImmTyAddr64: OS << "Addr64"; break;
1088 case ImmTyOffset: OS << "Offset"; break;
1089 case ImmTyInstOffset: OS << "InstOffset"; break;
1090 case ImmTyOffset0: OS << "Offset0"; break;
1091 case ImmTyOffset1: OS << "Offset1"; break;
1092 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1093 case ImmTyCPol: OS << "CPol"; break;
1094 case ImmTyIndexKey8bit: OS << "index_key"; break;
1095 case ImmTyIndexKey16bit: OS << "index_key"; break;
1096 case ImmTyTFE: OS << "TFE"; break;
1097 case ImmTyD16: OS << "D16"; break;
1098 case ImmTyFORMAT: OS << "FORMAT"; break;
1099 case ImmTyClampSI: OS << "ClampSI"; break;
1100 case ImmTyOModSI: OS << "OModSI"; break;
1101 case ImmTyDPP8: OS << "DPP8"; break;
1102 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1103 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1104 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1105 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1106 case ImmTyDppFI: OS << "DppFI"; break;
1107 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1108 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1109 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1110 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1111 case ImmTyDMask: OS << "DMask"; break;
1112 case ImmTyDim: OS << "Dim"; break;
1113 case ImmTyUNorm: OS << "UNorm"; break;
1114 case ImmTyDA: OS << "DA"; break;
1115 case ImmTyR128A16: OS << "R128A16"; break;
1116 case ImmTyA16: OS << "A16"; break;
1117 case ImmTyLWE: OS << "LWE"; break;
1118 case ImmTyOff: OS << "Off"; break;
1119 case ImmTyExpTgt: OS << "ExpTgt"; break;
1120 case ImmTyExpCompr: OS << "ExpCompr"; break;
1121 case ImmTyExpVM: OS << "ExpVM"; break;
1122 case ImmTyHwreg: OS << "Hwreg"; break;
1123 case ImmTySendMsg: OS << "SendMsg"; break;
1124 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1125 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1126 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1127 case ImmTyOpSel: OS << "OpSel"; break;
1128 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1129 case ImmTyNegLo: OS << "NegLo"; break;
1130 case ImmTyNegHi: OS << "NegHi"; break;
1131 case ImmTySwizzle: OS << "Swizzle"; break;
1132 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1133 case ImmTyHigh: OS << "High"; break;
1134 case ImmTyBLGP: OS << "BLGP"; break;
1135 case ImmTyCBSZ: OS << "CBSZ"; break;
1136 case ImmTyABID: OS << "ABID"; break;
1137 case ImmTyEndpgm: OS << "Endpgm"; break;
1138 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1139 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1140 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1141 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1142 }
1143 // clang-format on
1144 }
1145
1146 void print(raw_ostream &OS) const override {
1147 switch (Kind) {
1148 case Register:
1149 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1150 break;
1151 case Immediate:
1152 OS << '<' << getImm();
1153 if (getImmTy() != ImmTyNone) {
1154 OS << " type: "; printImmTy(OS, getImmTy());
1155 }
1156 OS << " mods: " << Imm.Mods << '>';
1157 break;
1158 case Token:
1159 OS << '\'' << getToken() << '\'';
1160 break;
1161 case Expression:
1162 OS << "<expr " << *Expr << '>';
1163 break;
1164 }
1165 }
1166
1167 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1168 int64_t Val, SMLoc Loc,
1169 ImmTy Type = ImmTyNone,
1170 bool IsFPImm = false) {
1171 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1172 Op->Imm.Val = Val;
1173 Op->Imm.IsFPImm = IsFPImm;
1174 Op->Imm.Kind = ImmKindTyNone;
1175 Op->Imm.Type = Type;
1176 Op->Imm.Mods = Modifiers();
1177 Op->StartLoc = Loc;
1178 Op->EndLoc = Loc;
1179 return Op;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1183 StringRef Str, SMLoc Loc,
1184 bool HasExplicitEncodingSize = true) {
1185 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1186 Res->Tok.Data = Str.data();
1187 Res->Tok.Length = Str.size();
1188 Res->StartLoc = Loc;
1189 Res->EndLoc = Loc;
1190 return Res;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1194 unsigned RegNo, SMLoc S,
1195 SMLoc E) {
1196 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1197 Op->Reg.RegNo = RegNo;
1198 Op->Reg.Mods = Modifiers();
1199 Op->StartLoc = S;
1200 Op->EndLoc = E;
1201 return Op;
1202 }
1203
1204 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1205 const class MCExpr *Expr, SMLoc S) {
1206 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1207 Op->Expr = Expr;
1208 Op->StartLoc = S;
1209 Op->EndLoc = S;
1210 return Op;
1211 }
1212};
1213
1214raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1215 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1216 return OS;
1217}
1218
1219//===----------------------------------------------------------------------===//
1220// AsmParser
1221//===----------------------------------------------------------------------===//
1222
1223// Holds info related to the current kernel, e.g. count of SGPRs used.
1224// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1225// .amdgpu_hsa_kernel or at EOF.
1226class KernelScopeInfo {
1227 int SgprIndexUnusedMin = -1;
1228 int VgprIndexUnusedMin = -1;
1229 int AgprIndexUnusedMin = -1;
1230 MCContext *Ctx = nullptr;
1231 MCSubtargetInfo const *MSTI = nullptr;
1232
1233 void usesSgprAt(int i) {
1234 if (i >= SgprIndexUnusedMin) {
1235 SgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1239 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1240 }
1241 }
1242 }
1243
1244 void usesVgprAt(int i) {
1245 if (i >= VgprIndexUnusedMin) {
1246 VgprIndexUnusedMin = ++i;
1247 if (Ctx) {
1248 MCSymbol* const Sym =
1249 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1250 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1251 VgprIndexUnusedMin);
1252 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1253 }
1254 }
1255 }
1256
1257 void usesAgprAt(int i) {
1258 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1259 if (!hasMAIInsts(*MSTI))
1260 return;
1261
1262 if (i >= AgprIndexUnusedMin) {
1263 AgprIndexUnusedMin = ++i;
1264 if (Ctx) {
1265 MCSymbol* const Sym =
1266 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1267 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1268
1269 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1270 MCSymbol* const vSym =
1271 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1272 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1273 VgprIndexUnusedMin);
1274 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1275 }
1276 }
1277 }
1278
1279public:
1280 KernelScopeInfo() = default;
1281
1282 void initialize(MCContext &Context) {
1283 Ctx = &Context;
1284 MSTI = Ctx->getSubtargetInfo();
1285
1286 usesSgprAt(SgprIndexUnusedMin = -1);
1287 usesVgprAt(VgprIndexUnusedMin = -1);
1288 if (hasMAIInsts(*MSTI)) {
1289 usesAgprAt(AgprIndexUnusedMin = -1);
1290 }
1291 }
1292
1293 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1294 unsigned RegWidth) {
1295 switch (RegKind) {
1296 case IS_SGPR:
1297 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1298 break;
1299 case IS_AGPR:
1300 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1301 break;
1302 case IS_VGPR:
1303 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1304 break;
1305 default:
1306 break;
1307 }
1308 }
1309};
1310
1311class AMDGPUAsmParser : public MCTargetAsmParser {
1312 MCAsmParser &Parser;
1313
1314 unsigned ForcedEncodingSize = 0;
1315 bool ForcedDPP = false;
1316 bool ForcedSDWA = false;
1317 KernelScopeInfo KernelScope;
1318
1319 /// @name Auto-generated Match Functions
1320 /// {
1321
1322#define GET_ASSEMBLER_HEADER
1323#include "AMDGPUGenAsmMatcher.inc"
1324
1325 /// }
1326
1327private:
1328 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1329 bool OutOfRangeError(SMRange Range);
1330 /// Calculate VGPR/SGPR blocks required for given target, reserved
1331 /// registers, and user-specified NextFreeXGPR values.
1332 ///
1333 /// \param Features [in] Target features, used for bug corrections.
1334 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1335 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1336 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1337 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1338 /// descriptor field, if valid.
1339 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1340 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1341 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1342 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1343 /// \param VGPRBlocks [out] Result VGPR block count.
1344 /// \param SGPRBlocks [out] Result SGPR block count.
1345 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1346 bool FlatScrUsed, bool XNACKUsed,
1347 std::optional<bool> EnableWavefrontSize32,
1348 unsigned NextFreeVGPR, SMRange VGPRRange,
1349 unsigned NextFreeSGPR, SMRange SGPRRange,
1350 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1351 bool ParseDirectiveAMDGCNTarget();
1352 bool ParseDirectiveAMDHSACodeObjectVersion();
1353 bool ParseDirectiveAMDHSAKernel();
1354 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1355 bool ParseDirectiveAMDKernelCodeT();
1356 // TODO: Possibly make subtargetHasRegister const.
1357 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1358 bool ParseDirectiveAMDGPUHsaKernel();
1359
1360 bool ParseDirectiveISAVersion();
1361 bool ParseDirectiveHSAMetadata();
1362 bool ParseDirectivePALMetadataBegin();
1363 bool ParseDirectivePALMetadata();
1364 bool ParseDirectiveAMDGPULDS();
1365
1366 /// Common code to parse out a block of text (typically YAML) between start and
1367 /// end directives.
1368 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1369 const char *AssemblerDirectiveEnd,
1370 std::string &CollectString);
1371
1372 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1373 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1374 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1375 unsigned &RegNum, unsigned &RegWidth,
1376 bool RestoreOnFailure = false);
1377 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1378 unsigned &RegNum, unsigned &RegWidth,
1380 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1381 unsigned &RegWidth,
1383 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1384 unsigned &RegWidth,
1386 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1387 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1388 bool ParseRegRange(unsigned& Num, unsigned& Width);
1389 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1390 unsigned RegWidth, SMLoc Loc);
1391
1392 bool isRegister();
1393 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1394 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1395 void initializeGprCountSymbol(RegisterKind RegKind);
1396 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1397 unsigned RegWidth);
1398 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1399 bool IsAtomic);
1400
1401public:
1402 enum AMDGPUMatchResultTy {
1403 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1404 };
1405 enum OperandMode {
1406 OperandMode_Default,
1407 OperandMode_NSA,
1408 };
1409
1410 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1411
1412 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1413 const MCInstrInfo &MII,
1414 const MCTargetOptions &Options)
1415 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1417
1418 if (getFeatureBits().none()) {
1419 // Set default features.
1420 copySTI().ToggleFeature("southern-islands");
1421 }
1422
1423 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1424
1425 {
1426 // TODO: make those pre-defined variables read-only.
1427 // Currently there is none suitable machinery in the core llvm-mc for this.
1428 // MCSymbol::isRedefinable is intended for another purpose, and
1429 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1431 MCContext &Ctx = getContext();
1432 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1433 MCSymbol *Sym =
1434 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1435 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1436 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1437 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1438 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1439 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1440 } else {
1441 MCSymbol *Sym =
1442 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1443 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1444 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1445 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1446 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1447 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1448 }
1449 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1450 initializeGprCountSymbol(IS_VGPR);
1451 initializeGprCountSymbol(IS_SGPR);
1452 } else
1453 KernelScope.initialize(getContext());
1454 }
1455 }
1456
1457 bool hasMIMG_R128() const {
1458 return AMDGPU::hasMIMG_R128(getSTI());
1459 }
1460
1461 bool hasPackedD16() const {
1462 return AMDGPU::hasPackedD16(getSTI());
1463 }
1464
1465 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1466
1467 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1468
1469 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1470
1471 bool isSI() const {
1472 return AMDGPU::isSI(getSTI());
1473 }
1474
1475 bool isCI() const {
1476 return AMDGPU::isCI(getSTI());
1477 }
1478
1479 bool isVI() const {
1480 return AMDGPU::isVI(getSTI());
1481 }
1482
1483 bool isGFX9() const {
1484 return AMDGPU::isGFX9(getSTI());
1485 }
1486
1487 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1488 bool isGFX90A() const {
1489 return AMDGPU::isGFX90A(getSTI());
1490 }
1491
1492 bool isGFX940() const {
1493 return AMDGPU::isGFX940(getSTI());
1494 }
1495
1496 bool isGFX9Plus() const {
1497 return AMDGPU::isGFX9Plus(getSTI());
1498 }
1499
1500 bool isGFX10() const {
1501 return AMDGPU::isGFX10(getSTI());
1502 }
1503
1504 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1505
1506 bool isGFX11() const {
1507 return AMDGPU::isGFX11(getSTI());
1508 }
1509
1510 bool isGFX11Plus() const {
1511 return AMDGPU::isGFX11Plus(getSTI());
1512 }
1513
1514 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1515
1516 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1517
1518 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1519
1520 bool isGFX10_BEncoding() const {
1522 }
1523
1524 bool hasInv2PiInlineImm() const {
1525 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1526 }
1527
1528 bool hasFlatOffsets() const {
1529 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1530 }
1531
1532 bool hasArchitectedFlatScratch() const {
1533 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1534 }
1535
1536 bool hasSGPR102_SGPR103() const {
1537 return !isVI() && !isGFX9();
1538 }
1539
1540 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1541
1542 bool hasIntClamp() const {
1543 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1544 }
1545
1546 bool hasPartialNSAEncoding() const {
1547 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1548 }
1549
1550 unsigned getNSAMaxSize(bool HasSampler = false) const {
1551 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1552 }
1553
1554 unsigned getMaxNumUserSGPRs() const {
1556 }
1557
1558 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1559
1560 AMDGPUTargetStreamer &getTargetStreamer() {
1562 return static_cast<AMDGPUTargetStreamer &>(TS);
1563 }
1564
1565 const MCRegisterInfo *getMRI() const {
1566 // We need this const_cast because for some reason getContext() is not const
1567 // in MCAsmParser.
1568 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1569 }
1570
1571 const MCInstrInfo *getMII() const {
1572 return &MII;
1573 }
1574
1575 const FeatureBitset &getFeatureBits() const {
1576 return getSTI().getFeatureBits();
1577 }
1578
1579 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1580 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1581 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1582
1583 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1584 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1585 bool isForcedDPP() const { return ForcedDPP; }
1586 bool isForcedSDWA() const { return ForcedSDWA; }
1587 ArrayRef<unsigned> getMatchedVariants() const;
1588 StringRef getMatchedVariantName() const;
1589
1590 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1591 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1592 bool RestoreOnFailure);
1593 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1595 SMLoc &EndLoc) override;
1596 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1598 unsigned Kind) override;
1599 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1602 bool MatchingInlineAsm) override;
1603 bool ParseDirective(AsmToken DirectiveID) override;
1604 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1605 OperandMode Mode = OperandMode_Default);
1606 StringRef parseMnemonicSuffix(StringRef Name);
1608 SMLoc NameLoc, OperandVector &Operands) override;
1609 //bool ProcessInstruction(MCInst &Inst);
1610
1612
1613 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1614
1616 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1617 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1618 std::function<bool(int64_t &)> ConvertResult = nullptr);
1619
1620 ParseStatus parseOperandArrayWithPrefix(
1621 const char *Prefix, OperandVector &Operands,
1622 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1623 bool (*ConvertResult)(int64_t &) = nullptr);
1624
1626 parseNamedBit(StringRef Name, OperandVector &Operands,
1627 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1628 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1630 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1631 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1632 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1633 SMLoc &StringLoc);
1634
1635 bool isModifier();
1636 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1637 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1638 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1639 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1640 bool parseSP3NegModifier();
1641 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1642 bool HasLit = false);
1644 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1645 bool HasLit = false);
1646 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1647 bool AllowImm = true);
1648 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1649 bool AllowImm = true);
1650 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1651 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1652 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1653 ParseStatus tryParseIndexKey(OperandVector &Operands,
1654 AMDGPUOperand::ImmTy ImmTy);
1655 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1656 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1657
1658 ParseStatus parseDfmtNfmt(int64_t &Format);
1659 ParseStatus parseUfmt(int64_t &Format);
1660 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1661 int64_t &Format);
1662 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1663 int64_t &Format);
1664 ParseStatus parseFORMAT(OperandVector &Operands);
1665 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1666 ParseStatus parseNumericFormat(int64_t &Format);
1667 ParseStatus parseFlatOffset(OperandVector &Operands);
1668 ParseStatus parseR128A16(OperandVector &Operands);
1670 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1671 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1672
1673 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1674
1675 bool parseCnt(int64_t &IntVal);
1676 ParseStatus parseSWaitCnt(OperandVector &Operands);
1677
1678 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1679 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1680 ParseStatus parseDepCtr(OperandVector &Operands);
1681
1682 bool parseDelay(int64_t &Delay);
1683 ParseStatus parseSDelayALU(OperandVector &Operands);
1684
1685 ParseStatus parseHwreg(OperandVector &Operands);
1686
1687private:
1688 struct OperandInfoTy {
1689 SMLoc Loc;
1690 int64_t Val;
1691 bool IsSymbolic = false;
1692 bool IsDefined = false;
1693
1694 OperandInfoTy(int64_t Val) : Val(Val) {}
1695 };
1696
1697 struct StructuredOpField : OperandInfoTy {
1700 unsigned Width;
1701 bool IsDefined = false;
1702
1703 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1704 int64_t Default)
1705 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1706 virtual ~StructuredOpField() = default;
1707
1708 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1709 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1710 return false;
1711 }
1712
1713 virtual bool validate(AMDGPUAsmParser &Parser) const {
1714 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1715 return Error(Parser, "not supported on this GPU");
1716 if (!isUIntN(Width, Val))
1717 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1718 return true;
1719 }
1720 };
1721
1722 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1723 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1724
1725 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1726 bool validateSendMsg(const OperandInfoTy &Msg,
1727 const OperandInfoTy &Op,
1728 const OperandInfoTy &Stream);
1729
1730 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1731 OperandInfoTy &Width);
1732
1733 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1734 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1735 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1736
1737 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1738 const OperandVector &Operands) const;
1739 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1740 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1741 SMLoc getLitLoc(const OperandVector &Operands,
1742 bool SearchMandatoryLiterals = false) const;
1743 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1744 SMLoc getConstLoc(const OperandVector &Operands) const;
1745 SMLoc getInstLoc(const OperandVector &Operands) const;
1746
1747 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1748 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1749 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1750 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1751 bool validateSOPLiteral(const MCInst &Inst) const;
1752 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1753 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1754 const OperandVector &Operands);
1755 bool validateIntClampSupported(const MCInst &Inst);
1756 bool validateMIMGAtomicDMask(const MCInst &Inst);
1757 bool validateMIMGGatherDMask(const MCInst &Inst);
1758 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1759 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1760 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1761 bool validateMIMGD16(const MCInst &Inst);
1762 bool validateMIMGMSAA(const MCInst &Inst);
1763 bool validateOpSel(const MCInst &Inst);
1764 bool validateNeg(const MCInst &Inst, int OpName);
1765 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1766 bool validateVccOperand(unsigned Reg) const;
1767 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1768 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1769 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1770 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1771 bool validateAGPRLdSt(const MCInst &Inst) const;
1772 bool validateVGPRAlign(const MCInst &Inst) const;
1773 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1775 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1776 bool validateDivScale(const MCInst &Inst);
1777 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1778 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1779 const SMLoc &IDLoc);
1780 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1781 const unsigned CPol);
1782 bool validateExeczVcczOperands(const OperandVector &Operands);
1783 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1784 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1785 unsigned getConstantBusLimit(unsigned Opcode) const;
1786 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1787 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1788 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1789
1790 bool isSupportedMnemo(StringRef Mnemo,
1791 const FeatureBitset &FBS);
1792 bool isSupportedMnemo(StringRef Mnemo,
1793 const FeatureBitset &FBS,
1794 ArrayRef<unsigned> Variants);
1795 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1796
1797 bool isId(const StringRef Id) const;
1798 bool isId(const AsmToken &Token, const StringRef Id) const;
1799 bool isToken(const AsmToken::TokenKind Kind) const;
1800 StringRef getId() const;
1801 bool trySkipId(const StringRef Id);
1802 bool trySkipId(const StringRef Pref, const StringRef Id);
1803 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1804 bool trySkipToken(const AsmToken::TokenKind Kind);
1805 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1806 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1807 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1808
1809 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1810 AsmToken::TokenKind getTokenKind() const;
1811 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1812 bool parseExpr(OperandVector &Operands);
1813 StringRef getTokenStr() const;
1814 AsmToken peekToken(bool ShouldSkipSpace = true);
1815 AsmToken getToken() const;
1816 SMLoc getLoc() const;
1817 void lex();
1818
1819public:
1820 void onBeginOfFile() override;
1821 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1822
1823 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1824
1825 ParseStatus parseExpTgt(OperandVector &Operands);
1826 ParseStatus parseSendMsg(OperandVector &Operands);
1827 ParseStatus parseInterpSlot(OperandVector &Operands);
1828 ParseStatus parseInterpAttr(OperandVector &Operands);
1829 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1830 ParseStatus parseBoolReg(OperandVector &Operands);
1831
1832 bool parseSwizzleOperand(int64_t &Op,
1833 const unsigned MinVal,
1834 const unsigned MaxVal,
1835 const StringRef ErrMsg,
1836 SMLoc &Loc);
1837 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1838 const unsigned MinVal,
1839 const unsigned MaxVal,
1840 const StringRef ErrMsg);
1841 ParseStatus parseSwizzle(OperandVector &Operands);
1842 bool parseSwizzleOffset(int64_t &Imm);
1843 bool parseSwizzleMacro(int64_t &Imm);
1844 bool parseSwizzleQuadPerm(int64_t &Imm);
1845 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1846 bool parseSwizzleBroadcast(int64_t &Imm);
1847 bool parseSwizzleSwap(int64_t &Imm);
1848 bool parseSwizzleReverse(int64_t &Imm);
1849
1850 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1851 int64_t parseGPRIdxMacro();
1852
1853 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1854 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1855
1856 ParseStatus parseOModSI(OperandVector &Operands);
1857
1858 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1859 OptionalImmIndexMap &OptionalIdx);
1860 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1861 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1862 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1863 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1864
1865 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1866 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1867 OptionalImmIndexMap &OptionalIdx);
1868 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1869 OptionalImmIndexMap &OptionalIdx);
1870
1871 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1872 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1873
1874 bool parseDimId(unsigned &Encoding);
1876 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1878 ParseStatus parseDPPCtrl(OperandVector &Operands);
1879 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1880 int64_t parseDPPCtrlSel(StringRef Ctrl);
1881 int64_t parseDPPCtrlPerm();
1882 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1883 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1884 cvtDPP(Inst, Operands, true);
1885 }
1886 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1887 bool IsDPP8 = false);
1888 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1889 cvtVOP3DPP(Inst, Operands, true);
1890 }
1891
1892 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1893 AMDGPUOperand::ImmTy Type);
1894 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1895 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1896 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1897 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1898 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1899 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1900 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1901 uint64_t BasicInstType,
1902 bool SkipDstVcc = false,
1903 bool SkipSrcVcc = false);
1904
1905 ParseStatus parseEndpgm(OperandVector &Operands);
1906
1908};
1909
1910} // end anonymous namespace
1911
1912// May be called with integer type with equivalent bitwidth.
1913static const fltSemantics *getFltSemantics(unsigned Size) {
1914 switch (Size) {
1915 case 4:
1916 return &APFloat::IEEEsingle();
1917 case 8:
1918 return &APFloat::IEEEdouble();
1919 case 2:
1920 return &APFloat::IEEEhalf();
1921 default:
1922 llvm_unreachable("unsupported fp type");
1923 }
1924}
1925
1927 return getFltSemantics(VT.getSizeInBits() / 8);
1928}
1929
1931 switch (OperandType) {
1932 // When floating-point immediate is used as operand of type i16, the 32-bit
1933 // representation of the constant truncated to the 16 LSBs should be used.
1953 return &APFloat::IEEEsingle();
1959 return &APFloat::IEEEdouble();
1968 return &APFloat::IEEEhalf();
1976 return &APFloat::BFloat();
1977 default:
1978 llvm_unreachable("unsupported fp type");
1979 }
1980}
1981
1982//===----------------------------------------------------------------------===//
1983// Operand
1984//===----------------------------------------------------------------------===//
1985
1986static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1987 bool Lost;
1988
1989 // Convert literal to single precision
1991 APFloat::rmNearestTiesToEven,
1992 &Lost);
1993 // We allow precision lost but not overflow or underflow
1994 if (Status != APFloat::opOK &&
1995 Lost &&
1996 ((Status & APFloat::opOverflow) != 0 ||
1997 (Status & APFloat::opUnderflow) != 0)) {
1998 return false;
1999 }
2000
2001 return true;
2002}
2003
2004static bool isSafeTruncation(int64_t Val, unsigned Size) {
2005 return isUIntN(Size, Val) || isIntN(Size, Val);
2006}
2007
2008static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2009 if (VT.getScalarType() == MVT::i16)
2010 return isInlinableLiteral32(Val, HasInv2Pi);
2011
2012 if (VT.getScalarType() == MVT::f16)
2013 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2014
2015 assert(VT.getScalarType() == MVT::bf16);
2016
2017 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2018}
2019
2020bool AMDGPUOperand::isInlinableImm(MVT type) const {
2021
2022 // This is a hack to enable named inline values like
2023 // shared_base with both 32-bit and 64-bit operands.
2024 // Note that these values are defined as
2025 // 32-bit operands only.
2026 if (isInlineValue()) {
2027 return true;
2028 }
2029
2030 if (!isImmTy(ImmTyNone)) {
2031 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2032 return false;
2033 }
2034 // TODO: We should avoid using host float here. It would be better to
2035 // check the float bit values which is what a few other places do.
2036 // We've had bot failures before due to weird NaN support on mips hosts.
2037
2038 APInt Literal(64, Imm.Val);
2039
2040 if (Imm.IsFPImm) { // We got fp literal token
2041 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2043 AsmParser->hasInv2PiInlineImm());
2044 }
2045
2046 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2047 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2048 return false;
2049
2050 if (type.getScalarSizeInBits() == 16) {
2051 bool Lost = false;
2052 switch (type.getScalarType().SimpleTy) {
2053 default:
2054 llvm_unreachable("unknown 16-bit type");
2055 case MVT::bf16:
2056 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2057 &Lost);
2058 break;
2059 case MVT::f16:
2060 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2061 &Lost);
2062 break;
2063 case MVT::i16:
2064 FPLiteral.convert(APFloatBase::IEEEsingle(),
2065 APFloat::rmNearestTiesToEven, &Lost);
2066 break;
2067 }
2068 // We need to use 32-bit representation here because when a floating-point
2069 // inline constant is used as an i16 operand, its 32-bit representation
2070 // representation will be used. We will need the 32-bit value to check if
2071 // it is FP inline constant.
2072 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2073 return isInlineableLiteralOp16(ImmVal, type,
2074 AsmParser->hasInv2PiInlineImm());
2075 }
2076
2077 // Check if single precision literal is inlinable
2079 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2080 AsmParser->hasInv2PiInlineImm());
2081 }
2082
2083 // We got int literal token.
2084 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2086 AsmParser->hasInv2PiInlineImm());
2087 }
2088
2089 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2090 return false;
2091 }
2092
2093 if (type.getScalarSizeInBits() == 16) {
2095 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2096 type, AsmParser->hasInv2PiInlineImm());
2097 }
2098
2100 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2101 AsmParser->hasInv2PiInlineImm());
2102}
2103
2104bool AMDGPUOperand::isLiteralImm(MVT type) const {
2105 // Check that this immediate can be added as literal
2106 if (!isImmTy(ImmTyNone)) {
2107 return false;
2108 }
2109
2110 if (!Imm.IsFPImm) {
2111 // We got int literal token.
2112
2113 if (type == MVT::f64 && hasFPModifiers()) {
2114 // Cannot apply fp modifiers to int literals preserving the same semantics
2115 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2116 // disable these cases.
2117 return false;
2118 }
2119
2120 unsigned Size = type.getSizeInBits();
2121 if (Size == 64)
2122 Size = 32;
2123
2124 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2125 // types.
2126 return isSafeTruncation(Imm.Val, Size);
2127 }
2128
2129 // We got fp literal token
2130 if (type == MVT::f64) { // Expected 64-bit fp operand
2131 // We would set low 64-bits of literal to zeroes but we accept this literals
2132 return true;
2133 }
2134
2135 if (type == MVT::i64) { // Expected 64-bit int operand
2136 // We don't allow fp literals in 64-bit integer instructions. It is
2137 // unclear how we should encode them.
2138 return false;
2139 }
2140
2141 // We allow fp literals with f16x2 operands assuming that the specified
2142 // literal goes into the lower half and the upper half is zero. We also
2143 // require that the literal may be losslessly converted to f16.
2144 //
2145 // For i16x2 operands, we assume that the specified literal is encoded as a
2146 // single-precision float. This is pretty odd, but it matches SP3 and what
2147 // happens in hardware.
2148 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2149 : (type == MVT::v2i16) ? MVT::f32
2150 : (type == MVT::v2f32) ? MVT::f32
2151 : type;
2152
2153 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2154 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2155}
2156
2157bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2158 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2159}
2160
2161bool AMDGPUOperand::isVRegWithInputMods() const {
2162 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2163 // GFX90A allows DPP on 64-bit operands.
2164 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2165 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2166}
2167
2168template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2169 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2170 : AMDGPU::VGPR_16_Lo128RegClassID);
2171}
2172
2173bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2174 if (AsmParser->isVI())
2175 return isVReg32();
2176 else if (AsmParser->isGFX9Plus())
2177 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2178 else
2179 return false;
2180}
2181
2182bool AMDGPUOperand::isSDWAFP16Operand() const {
2183 return isSDWAOperand(MVT::f16);
2184}
2185
2186bool AMDGPUOperand::isSDWAFP32Operand() const {
2187 return isSDWAOperand(MVT::f32);
2188}
2189
2190bool AMDGPUOperand::isSDWAInt16Operand() const {
2191 return isSDWAOperand(MVT::i16);
2192}
2193
2194bool AMDGPUOperand::isSDWAInt32Operand() const {
2195 return isSDWAOperand(MVT::i32);
2196}
2197
2198bool AMDGPUOperand::isBoolReg() const {
2199 auto FB = AsmParser->getFeatureBits();
2200 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2201 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2202}
2203
2204uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2205{
2206 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2207 assert(Size == 2 || Size == 4 || Size == 8);
2208
2209 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2210
2211 if (Imm.Mods.Abs) {
2212 Val &= ~FpSignMask;
2213 }
2214 if (Imm.Mods.Neg) {
2215 Val ^= FpSignMask;
2216 }
2217
2218 return Val;
2219}
2220
2221void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2222 if (isExpr()) {
2224 return;
2225 }
2226
2227 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2228 Inst.getNumOperands())) {
2229 addLiteralImmOperand(Inst, Imm.Val,
2230 ApplyModifiers &
2231 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2232 } else {
2233 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2235 setImmKindNone();
2236 }
2237}
2238
2239void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2240 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2241 auto OpNum = Inst.getNumOperands();
2242 // Check that this operand accepts literals
2243 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2244
2245 if (ApplyModifiers) {
2246 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2247 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2248 Val = applyInputFPModifiers(Val, Size);
2249 }
2250
2251 APInt Literal(64, Val);
2252 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2253
2254 if (Imm.IsFPImm) { // We got fp literal token
2255 switch (OpTy) {
2261 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2262 AsmParser->hasInv2PiInlineImm())) {
2263 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2264 setImmKindConst();
2265 return;
2266 }
2267
2268 // Non-inlineable
2269 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2270 // For fp operands we check if low 32 bits are zeros
2271 if (Literal.getLoBits(32) != 0) {
2272 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2273 "Can't encode literal as exact 64-bit floating-point operand. "
2274 "Low 32-bits will be set to zero");
2275 Val &= 0xffffffff00000000u;
2276 }
2277
2279 setImmKindLiteral();
2280 return;
2281 }
2282
2283 // We don't allow fp literals in 64-bit integer instructions. It is
2284 // unclear how we should encode them. This case should be checked earlier
2285 // in predicate methods (isLiteralImm())
2286 llvm_unreachable("fp literal in 64-bit integer instruction.");
2287
2295 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2296 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2297 // loss of precision. The constant represents ideomatic fp32 value of
2298 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2299 // bits. Prevent rounding below.
2300 Inst.addOperand(MCOperand::createImm(0x3e22));
2301 setImmKindLiteral();
2302 return;
2303 }
2304 [[fallthrough]];
2305
2333 bool lost;
2334 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2335 // Convert literal to single precision
2336 FPLiteral.convert(*getOpFltSemantics(OpTy),
2337 APFloat::rmNearestTiesToEven, &lost);
2338 // We allow precision lost but not overflow or underflow. This should be
2339 // checked earlier in isLiteralImm()
2340
2341 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2342 Inst.addOperand(MCOperand::createImm(ImmVal));
2343 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2344 setImmKindMandatoryLiteral();
2345 } else {
2346 setImmKindLiteral();
2347 }
2348 return;
2349 }
2350 default:
2351 llvm_unreachable("invalid operand size");
2352 }
2353
2354 return;
2355 }
2356
2357 // We got int literal token.
2358 // Only sign extend inline immediates.
2359 switch (OpTy) {
2375 if (isSafeTruncation(Val, 32) &&
2376 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2377 AsmParser->hasInv2PiInlineImm())) {
2379 setImmKindConst();
2380 return;
2381 }
2382
2383 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2384 setImmKindLiteral();
2385 return;
2386
2392 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2394 setImmKindConst();
2395 return;
2396 }
2397
2398 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2399 : Lo_32(Val);
2400
2402 setImmKindLiteral();
2403 return;
2404
2408 if (isSafeTruncation(Val, 16) &&
2409 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2410 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2411 setImmKindConst();
2412 return;
2413 }
2414
2415 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2416 setImmKindLiteral();
2417 return;
2418
2423 if (isSafeTruncation(Val, 16) &&
2424 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2425 AsmParser->hasInv2PiInlineImm())) {
2427 setImmKindConst();
2428 return;
2429 }
2430
2431 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2432 setImmKindLiteral();
2433 return;
2434
2439 if (isSafeTruncation(Val, 16) &&
2440 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2441 AsmParser->hasInv2PiInlineImm())) {
2443 setImmKindConst();
2444 return;
2445 }
2446
2447 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2448 setImmKindLiteral();
2449 return;
2450
2453 assert(isSafeTruncation(Val, 16));
2454 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2456 return;
2457 }
2460 assert(isSafeTruncation(Val, 16));
2461 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2462 AsmParser->hasInv2PiInlineImm()));
2463
2465 return;
2466 }
2467
2470 assert(isSafeTruncation(Val, 16));
2471 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2472 AsmParser->hasInv2PiInlineImm()));
2473
2475 return;
2476 }
2477
2479 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2480 setImmKindMandatoryLiteral();
2481 return;
2483 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2484 setImmKindMandatoryLiteral();
2485 return;
2486 default:
2487 llvm_unreachable("invalid operand size");
2488 }
2489}
2490
2491void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2492 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2493}
2494
2495bool AMDGPUOperand::isInlineValue() const {
2496 return isRegKind() && ::isInlineValue(getReg());
2497}
2498
2499//===----------------------------------------------------------------------===//
2500// AsmParser
2501//===----------------------------------------------------------------------===//
2502
2503static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2504 if (Is == IS_VGPR) {
2505 switch (RegWidth) {
2506 default: return -1;
2507 case 32:
2508 return AMDGPU::VGPR_32RegClassID;
2509 case 64:
2510 return AMDGPU::VReg_64RegClassID;
2511 case 96:
2512 return AMDGPU::VReg_96RegClassID;
2513 case 128:
2514 return AMDGPU::VReg_128RegClassID;
2515 case 160:
2516 return AMDGPU::VReg_160RegClassID;
2517 case 192:
2518 return AMDGPU::VReg_192RegClassID;
2519 case 224:
2520 return AMDGPU::VReg_224RegClassID;
2521 case 256:
2522 return AMDGPU::VReg_256RegClassID;
2523 case 288:
2524 return AMDGPU::VReg_288RegClassID;
2525 case 320:
2526 return AMDGPU::VReg_320RegClassID;
2527 case 352:
2528 return AMDGPU::VReg_352RegClassID;
2529 case 384:
2530 return AMDGPU::VReg_384RegClassID;
2531 case 512:
2532 return AMDGPU::VReg_512RegClassID;
2533 case 1024:
2534 return AMDGPU::VReg_1024RegClassID;
2535 }
2536 } else if (Is == IS_TTMP) {
2537 switch (RegWidth) {
2538 default: return -1;
2539 case 32:
2540 return AMDGPU::TTMP_32RegClassID;
2541 case 64:
2542 return AMDGPU::TTMP_64RegClassID;
2543 case 128:
2544 return AMDGPU::TTMP_128RegClassID;
2545 case 256:
2546 return AMDGPU::TTMP_256RegClassID;
2547 case 512:
2548 return AMDGPU::TTMP_512RegClassID;
2549 }
2550 } else if (Is == IS_SGPR) {
2551 switch (RegWidth) {
2552 default: return -1;
2553 case 32:
2554 return AMDGPU::SGPR_32RegClassID;
2555 case 64:
2556 return AMDGPU::SGPR_64RegClassID;
2557 case 96:
2558 return AMDGPU::SGPR_96RegClassID;
2559 case 128:
2560 return AMDGPU::SGPR_128RegClassID;
2561 case 160:
2562 return AMDGPU::SGPR_160RegClassID;
2563 case 192:
2564 return AMDGPU::SGPR_192RegClassID;
2565 case 224:
2566 return AMDGPU::SGPR_224RegClassID;
2567 case 256:
2568 return AMDGPU::SGPR_256RegClassID;
2569 case 288:
2570 return AMDGPU::SGPR_288RegClassID;
2571 case 320:
2572 return AMDGPU::SGPR_320RegClassID;
2573 case 352:
2574 return AMDGPU::SGPR_352RegClassID;
2575 case 384:
2576 return AMDGPU::SGPR_384RegClassID;
2577 case 512:
2578 return AMDGPU::SGPR_512RegClassID;
2579 }
2580 } else if (Is == IS_AGPR) {
2581 switch (RegWidth) {
2582 default: return -1;
2583 case 32:
2584 return AMDGPU::AGPR_32RegClassID;
2585 case 64:
2586 return AMDGPU::AReg_64RegClassID;
2587 case 96:
2588 return AMDGPU::AReg_96RegClassID;
2589 case 128:
2590 return AMDGPU::AReg_128RegClassID;
2591 case 160:
2592 return AMDGPU::AReg_160RegClassID;
2593 case 192:
2594 return AMDGPU::AReg_192RegClassID;
2595 case 224:
2596 return AMDGPU::AReg_224RegClassID;
2597 case 256:
2598 return AMDGPU::AReg_256RegClassID;
2599 case 288:
2600 return AMDGPU::AReg_288RegClassID;
2601 case 320:
2602 return AMDGPU::AReg_320RegClassID;
2603 case 352:
2604 return AMDGPU::AReg_352RegClassID;
2605 case 384:
2606 return AMDGPU::AReg_384RegClassID;
2607 case 512:
2608 return AMDGPU::AReg_512RegClassID;
2609 case 1024:
2610 return AMDGPU::AReg_1024RegClassID;
2611 }
2612 }
2613 return -1;
2614}
2615
2618 .Case("exec", AMDGPU::EXEC)
2619 .Case("vcc", AMDGPU::VCC)
2620 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2621 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2622 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2623 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2624 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2625 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2626 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2627 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2628 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2629 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2630 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2631 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2632 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2633 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2634 .Case("m0", AMDGPU::M0)
2635 .Case("vccz", AMDGPU::SRC_VCCZ)
2636 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2637 .Case("execz", AMDGPU::SRC_EXECZ)
2638 .Case("src_execz", AMDGPU::SRC_EXECZ)
2639 .Case("scc", AMDGPU::SRC_SCC)
2640 .Case("src_scc", AMDGPU::SRC_SCC)
2641 .Case("tba", AMDGPU::TBA)
2642 .Case("tma", AMDGPU::TMA)
2643 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2644 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2645 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2646 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2647 .Case("vcc_lo", AMDGPU::VCC_LO)
2648 .Case("vcc_hi", AMDGPU::VCC_HI)
2649 .Case("exec_lo", AMDGPU::EXEC_LO)
2650 .Case("exec_hi", AMDGPU::EXEC_HI)
2651 .Case("tma_lo", AMDGPU::TMA_LO)
2652 .Case("tma_hi", AMDGPU::TMA_HI)
2653 .Case("tba_lo", AMDGPU::TBA_LO)
2654 .Case("tba_hi", AMDGPU::TBA_HI)
2655 .Case("pc", AMDGPU::PC_REG)
2656 .Case("null", AMDGPU::SGPR_NULL)
2657 .Default(AMDGPU::NoRegister);
2658}
2659
2660bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2661 SMLoc &EndLoc, bool RestoreOnFailure) {
2662 auto R = parseRegister();
2663 if (!R) return true;
2664 assert(R->isReg());
2665 RegNo = R->getReg();
2666 StartLoc = R->getStartLoc();
2667 EndLoc = R->getEndLoc();
2668 return false;
2669}
2670
2671bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2672 SMLoc &EndLoc) {
2673 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2674}
2675
2676ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2677 SMLoc &EndLoc) {
2678 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2679 bool PendingErrors = getParser().hasPendingError();
2680 getParser().clearPendingErrors();
2681 if (PendingErrors)
2682 return ParseStatus::Failure;
2683 if (Result)
2684 return ParseStatus::NoMatch;
2685 return ParseStatus::Success;
2686}
2687
2688bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2689 RegisterKind RegKind, unsigned Reg1,
2690 SMLoc Loc) {
2691 switch (RegKind) {
2692 case IS_SPECIAL:
2693 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2694 Reg = AMDGPU::EXEC;
2695 RegWidth = 64;
2696 return true;
2697 }
2698 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2699 Reg = AMDGPU::FLAT_SCR;
2700 RegWidth = 64;
2701 return true;
2702 }
2703 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2704 Reg = AMDGPU::XNACK_MASK;
2705 RegWidth = 64;
2706 return true;
2707 }
2708 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2709 Reg = AMDGPU::VCC;
2710 RegWidth = 64;
2711 return true;
2712 }
2713 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2714 Reg = AMDGPU::TBA;
2715 RegWidth = 64;
2716 return true;
2717 }
2718 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2719 Reg = AMDGPU::TMA;
2720 RegWidth = 64;
2721 return true;
2722 }
2723 Error(Loc, "register does not fit in the list");
2724 return false;
2725 case IS_VGPR:
2726 case IS_SGPR:
2727 case IS_AGPR:
2728 case IS_TTMP:
2729 if (Reg1 != Reg + RegWidth / 32) {
2730 Error(Loc, "registers in a list must have consecutive indices");
2731 return false;
2732 }
2733 RegWidth += 32;
2734 return true;
2735 default:
2736 llvm_unreachable("unexpected register kind");
2737 }
2738}
2739
2740struct RegInfo {
2742 RegisterKind Kind;
2743};
2744
2745static constexpr RegInfo RegularRegisters[] = {
2746 {{"v"}, IS_VGPR},
2747 {{"s"}, IS_SGPR},
2748 {{"ttmp"}, IS_TTMP},
2749 {{"acc"}, IS_AGPR},
2750 {{"a"}, IS_AGPR},
2751};
2752
2753static bool isRegularReg(RegisterKind Kind) {
2754 return Kind == IS_VGPR ||
2755 Kind == IS_SGPR ||
2756 Kind == IS_TTMP ||
2757 Kind == IS_AGPR;
2758}
2759
2761 for (const RegInfo &Reg : RegularRegisters)
2762 if (Str.starts_with(Reg.Name))
2763 return &Reg;
2764 return nullptr;
2765}
2766
2767static bool getRegNum(StringRef Str, unsigned& Num) {
2768 return !Str.getAsInteger(10, Num);
2769}
2770
2771bool
2772AMDGPUAsmParser::isRegister(const AsmToken &Token,
2773 const AsmToken &NextToken) const {
2774
2775 // A list of consecutive registers: [s0,s1,s2,s3]
2776 if (Token.is(AsmToken::LBrac))
2777 return true;
2778
2779 if (!Token.is(AsmToken::Identifier))
2780 return false;
2781
2782 // A single register like s0 or a range of registers like s[0:1]
2783
2784 StringRef Str = Token.getString();
2785 const RegInfo *Reg = getRegularRegInfo(Str);
2786 if (Reg) {
2787 StringRef RegName = Reg->Name;
2788 StringRef RegSuffix = Str.substr(RegName.size());
2789 if (!RegSuffix.empty()) {
2790 RegSuffix.consume_back(".l");
2791 RegSuffix.consume_back(".h");
2792 unsigned Num;
2793 // A single register with an index: rXX
2794 if (getRegNum(RegSuffix, Num))
2795 return true;
2796 } else {
2797 // A range of registers: r[XX:YY].
2798 if (NextToken.is(AsmToken::LBrac))
2799 return true;
2800 }
2801 }
2802
2803 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2804}
2805
2806bool
2807AMDGPUAsmParser::isRegister()
2808{
2809 return isRegister(getToken(), peekToken());
2810}
2811
2812unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2813 unsigned SubReg, unsigned RegWidth,
2814 SMLoc Loc) {
2815 assert(isRegularReg(RegKind));
2816
2817 unsigned AlignSize = 1;
2818 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2819 // SGPR and TTMP registers must be aligned.
2820 // Max required alignment is 4 dwords.
2821 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2822 }
2823
2824 if (RegNum % AlignSize != 0) {
2825 Error(Loc, "invalid register alignment");
2826 return AMDGPU::NoRegister;
2827 }
2828
2829 unsigned RegIdx = RegNum / AlignSize;
2830 int RCID = getRegClass(RegKind, RegWidth);
2831 if (RCID == -1) {
2832 Error(Loc, "invalid or unsupported register size");
2833 return AMDGPU::NoRegister;
2834 }
2835
2836 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2837 const MCRegisterClass RC = TRI->getRegClass(RCID);
2838 if (RegIdx >= RC.getNumRegs()) {
2839 Error(Loc, "register index is out of range");
2840 return AMDGPU::NoRegister;
2841 }
2842
2843 unsigned Reg = RC.getRegister(RegIdx);
2844
2845 if (SubReg) {
2846 Reg = TRI->getSubReg(Reg, SubReg);
2847
2848 // Currently all regular registers have their .l and .h subregisters, so
2849 // we should never need to generate an error here.
2850 assert(Reg && "Invalid subregister!");
2851 }
2852
2853 return Reg;
2854}
2855
2856bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2857 int64_t RegLo, RegHi;
2858 if (!skipToken(AsmToken::LBrac, "missing register index"))
2859 return false;
2860
2861 SMLoc FirstIdxLoc = getLoc();
2862 SMLoc SecondIdxLoc;
2863
2864 if (!parseExpr(RegLo))
2865 return false;
2866
2867 if (trySkipToken(AsmToken::Colon)) {
2868 SecondIdxLoc = getLoc();
2869 if (!parseExpr(RegHi))
2870 return false;
2871 } else {
2872 RegHi = RegLo;
2873 }
2874
2875 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2876 return false;
2877
2878 if (!isUInt<32>(RegLo)) {
2879 Error(FirstIdxLoc, "invalid register index");
2880 return false;
2881 }
2882
2883 if (!isUInt<32>(RegHi)) {
2884 Error(SecondIdxLoc, "invalid register index");
2885 return false;
2886 }
2887
2888 if (RegLo > RegHi) {
2889 Error(FirstIdxLoc, "first register index should not exceed second index");
2890 return false;
2891 }
2892
2893 Num = static_cast<unsigned>(RegLo);
2894 RegWidth = 32 * ((RegHi - RegLo) + 1);
2895 return true;
2896}
2897
2898unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2899 unsigned &RegNum, unsigned &RegWidth,
2900 SmallVectorImpl<AsmToken> &Tokens) {
2901 assert(isToken(AsmToken::Identifier));
2902 unsigned Reg = getSpecialRegForName(getTokenStr());
2903 if (Reg) {
2904 RegNum = 0;
2905 RegWidth = 32;
2906 RegKind = IS_SPECIAL;
2907 Tokens.push_back(getToken());
2908 lex(); // skip register name
2909 }
2910 return Reg;
2911}
2912
2913unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2914 unsigned &RegNum, unsigned &RegWidth,
2915 SmallVectorImpl<AsmToken> &Tokens) {
2916 assert(isToken(AsmToken::Identifier));
2917 StringRef RegName = getTokenStr();
2918 auto Loc = getLoc();
2919
2920 const RegInfo *RI = getRegularRegInfo(RegName);
2921 if (!RI) {
2922 Error(Loc, "invalid register name");
2923 return AMDGPU::NoRegister;
2924 }
2925
2926 Tokens.push_back(getToken());
2927 lex(); // skip register name
2928
2929 RegKind = RI->Kind;
2930 StringRef RegSuffix = RegName.substr(RI->Name.size());
2931 unsigned SubReg = NoSubRegister;
2932 if (!RegSuffix.empty()) {
2933 // We don't know the opcode till we are done parsing, so we don't know if
2934 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2935 // .h to correctly specify 16 bit registers. We also can't determine class
2936 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2937 if (RegSuffix.consume_back(".l"))
2938 SubReg = AMDGPU::lo16;
2939 else if (RegSuffix.consume_back(".h"))
2940 SubReg = AMDGPU::hi16;
2941
2942 // Single 32-bit register: vXX.
2943 if (!getRegNum(RegSuffix, RegNum)) {
2944 Error(Loc, "invalid register index");
2945 return AMDGPU::NoRegister;
2946 }
2947 RegWidth = 32;
2948 } else {
2949 // Range of registers: v[XX:YY]. ":YY" is optional.
2950 if (!ParseRegRange(RegNum, RegWidth))
2951 return AMDGPU::NoRegister;
2952 }
2953
2954 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2955}
2956
2957unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2958 unsigned &RegWidth,
2959 SmallVectorImpl<AsmToken> &Tokens) {
2960 unsigned Reg = AMDGPU::NoRegister;
2961 auto ListLoc = getLoc();
2962
2963 if (!skipToken(AsmToken::LBrac,
2964 "expected a register or a list of registers")) {
2965 return AMDGPU::NoRegister;
2966 }
2967
2968 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2969
2970 auto Loc = getLoc();
2971 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2972 return AMDGPU::NoRegister;
2973 if (RegWidth != 32) {
2974 Error(Loc, "expected a single 32-bit register");
2975 return AMDGPU::NoRegister;
2976 }
2977
2978 for (; trySkipToken(AsmToken::Comma); ) {
2979 RegisterKind NextRegKind;
2980 unsigned NextReg, NextRegNum, NextRegWidth;
2981 Loc = getLoc();
2982
2983 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2984 NextRegNum, NextRegWidth,
2985 Tokens)) {
2986 return AMDGPU::NoRegister;
2987 }
2988 if (NextRegWidth != 32) {
2989 Error(Loc, "expected a single 32-bit register");
2990 return AMDGPU::NoRegister;
2991 }
2992 if (NextRegKind != RegKind) {
2993 Error(Loc, "registers in a list must be of the same kind");
2994 return AMDGPU::NoRegister;
2995 }
2996 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2997 return AMDGPU::NoRegister;
2998 }
2999
3000 if (!skipToken(AsmToken::RBrac,
3001 "expected a comma or a closing square bracket")) {
3002 return AMDGPU::NoRegister;
3003 }
3004
3005 if (isRegularReg(RegKind))
3006 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3007
3008 return Reg;
3009}
3010
3011bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3012 unsigned &RegNum, unsigned &RegWidth,
3013 SmallVectorImpl<AsmToken> &Tokens) {
3014 auto Loc = getLoc();
3015 Reg = AMDGPU::NoRegister;
3016
3017 if (isToken(AsmToken::Identifier)) {
3018 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3019 if (Reg == AMDGPU::NoRegister)
3020 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3021 } else {
3022 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3023 }
3024
3025 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3026 if (Reg == AMDGPU::NoRegister) {
3027 assert(Parser.hasPendingError());
3028 return false;
3029 }
3030
3031 if (!subtargetHasRegister(*TRI, Reg)) {
3032 if (Reg == AMDGPU::SGPR_NULL) {
3033 Error(Loc, "'null' operand is not supported on this GPU");
3034 } else {
3035 Error(Loc, "register not available on this GPU");
3036 }
3037 return false;
3038 }
3039
3040 return true;
3041}
3042
3043bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3044 unsigned &RegNum, unsigned &RegWidth,
3045 bool RestoreOnFailure /*=false*/) {
3046 Reg = AMDGPU::NoRegister;
3047
3049 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3050 if (RestoreOnFailure) {
3051 while (!Tokens.empty()) {
3052 getLexer().UnLex(Tokens.pop_back_val());
3053 }
3054 }
3055 return true;
3056 }
3057 return false;
3058}
3059
3060std::optional<StringRef>
3061AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3062 switch (RegKind) {
3063 case IS_VGPR:
3064 return StringRef(".amdgcn.next_free_vgpr");
3065 case IS_SGPR:
3066 return StringRef(".amdgcn.next_free_sgpr");
3067 default:
3068 return std::nullopt;
3069 }
3070}
3071
3072void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3073 auto SymbolName = getGprCountSymbolName(RegKind);
3074 assert(SymbolName && "initializing invalid register kind");
3075 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3076 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3077}
3078
3079bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3080 unsigned DwordRegIndex,
3081 unsigned RegWidth) {
3082 // Symbols are only defined for GCN targets
3083 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3084 return true;
3085
3086 auto SymbolName = getGprCountSymbolName(RegKind);
3087 if (!SymbolName)
3088 return true;
3089 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3090
3091 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3092 int64_t OldCount;
3093
3094 if (!Sym->isVariable())
3095 return !Error(getLoc(),
3096 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3097 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3098 return !Error(
3099 getLoc(),
3100 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3101
3102 if (OldCount <= NewMax)
3103 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3104
3105 return true;
3106}
3107
3108std::unique_ptr<AMDGPUOperand>
3109AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3110 const auto &Tok = getToken();
3111 SMLoc StartLoc = Tok.getLoc();
3112 SMLoc EndLoc = Tok.getEndLoc();
3113 RegisterKind RegKind;
3114 unsigned Reg, RegNum, RegWidth;
3115
3116 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3117 return nullptr;
3118 }
3119 if (isHsaAbi(getSTI())) {
3120 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3121 return nullptr;
3122 } else
3123 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3124 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3125}
3126
3127ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3128 bool HasSP3AbsModifier, bool HasLit) {
3129 // TODO: add syntactic sugar for 1/(2*PI)
3130
3131 if (isRegister())
3132 return ParseStatus::NoMatch;
3133 assert(!isModifier());
3134
3135 if (!HasLit) {
3136 HasLit = trySkipId("lit");
3137 if (HasLit) {
3138 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3139 return ParseStatus::Failure;
3140 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3141 if (S.isSuccess() &&
3142 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3143 return ParseStatus::Failure;
3144 return S;
3145 }
3146 }
3147
3148 const auto& Tok = getToken();
3149 const auto& NextTok = peekToken();
3150 bool IsReal = Tok.is(AsmToken::Real);
3151 SMLoc S = getLoc();
3152 bool Negate = false;
3153
3154 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3155 lex();
3156 IsReal = true;
3157 Negate = true;
3158 }
3159
3160 AMDGPUOperand::Modifiers Mods;
3161 Mods.Lit = HasLit;
3162
3163 if (IsReal) {
3164 // Floating-point expressions are not supported.
3165 // Can only allow floating-point literals with an
3166 // optional sign.
3167
3168 StringRef Num = getTokenStr();
3169 lex();
3170
3171 APFloat RealVal(APFloat::IEEEdouble());
3172 auto roundMode = APFloat::rmNearestTiesToEven;
3173 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3174 return ParseStatus::Failure;
3175 if (Negate)
3176 RealVal.changeSign();
3177
3178 Operands.push_back(
3179 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3180 AMDGPUOperand::ImmTyNone, true));
3181 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3182 Op.setModifiers(Mods);
3183
3184 return ParseStatus::Success;
3185
3186 } else {
3187 int64_t IntVal;
3188 const MCExpr *Expr;
3189 SMLoc S = getLoc();
3190
3191 if (HasSP3AbsModifier) {
3192 // This is a workaround for handling expressions
3193 // as arguments of SP3 'abs' modifier, for example:
3194 // |1.0|
3195 // |-1|
3196 // |1+x|
3197 // This syntax is not compatible with syntax of standard
3198 // MC expressions (due to the trailing '|').
3199 SMLoc EndLoc;
3200 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3201 return ParseStatus::Failure;
3202 } else {
3203 if (Parser.parseExpression(Expr))
3204 return ParseStatus::Failure;
3205 }
3206
3207 if (Expr->evaluateAsAbsolute(IntVal)) {
3208 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3209 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3210 Op.setModifiers(Mods);
3211 } else {
3212 if (HasLit)
3213 return ParseStatus::NoMatch;
3214 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3215 }
3216
3217 return ParseStatus::Success;
3218 }
3219
3220 return ParseStatus::NoMatch;
3221}
3222
3223ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3224 if (!isRegister())
3225 return ParseStatus::NoMatch;
3226
3227 if (auto R = parseRegister()) {
3228 assert(R->isReg());
3229 Operands.push_back(std::move(R));
3230 return ParseStatus::Success;
3231 }
3232 return ParseStatus::Failure;
3233}
3234
3235ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3236 bool HasSP3AbsMod, bool HasLit) {
3237 ParseStatus Res = parseReg(Operands);
3238 if (!Res.isNoMatch())
3239 return Res;
3240 if (isModifier())
3241 return ParseStatus::NoMatch;
3242 return parseImm(Operands, HasSP3AbsMod, HasLit);
3243}
3244
3245bool
3246AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3247 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3248 const auto &str = Token.getString();
3249 return str == "abs" || str == "neg" || str == "sext";
3250 }
3251 return false;
3252}
3253
3254bool
3255AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3256 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3257}
3258
3259bool
3260AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3261 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3262}
3263
3264bool
3265AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3266 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3267}
3268
3269// Check if this is an operand modifier or an opcode modifier
3270// which may look like an expression but it is not. We should
3271// avoid parsing these modifiers as expressions. Currently
3272// recognized sequences are:
3273// |...|
3274// abs(...)
3275// neg(...)
3276// sext(...)
3277// -reg
3278// -|...|
3279// -abs(...)
3280// name:...
3281//
3282bool
3283AMDGPUAsmParser::isModifier() {
3284
3285 AsmToken Tok = getToken();
3286 AsmToken NextToken[2];
3287 peekTokens(NextToken);
3288
3289 return isOperandModifier(Tok, NextToken[0]) ||
3290 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3291 isOpcodeModifierWithVal(Tok, NextToken[0]);
3292}
3293
3294// Check if the current token is an SP3 'neg' modifier.
3295// Currently this modifier is allowed in the following context:
3296//
3297// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3298// 2. Before an 'abs' modifier: -abs(...)
3299// 3. Before an SP3 'abs' modifier: -|...|
3300//
3301// In all other cases "-" is handled as a part
3302// of an expression that follows the sign.
3303//
3304// Note: When "-" is followed by an integer literal,
3305// this is interpreted as integer negation rather
3306// than a floating-point NEG modifier applied to N.
3307// Beside being contr-intuitive, such use of floating-point
3308// NEG modifier would have resulted in different meaning
3309// of integer literals used with VOP1/2/C and VOP3,
3310// for example:
3311// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3312// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3313// Negative fp literals with preceding "-" are
3314// handled likewise for uniformity
3315//
3316bool
3317AMDGPUAsmParser::parseSP3NegModifier() {
3318
3319 AsmToken NextToken[2];
3320 peekTokens(NextToken);
3321
3322 if (isToken(AsmToken::Minus) &&
3323 (isRegister(NextToken[0], NextToken[1]) ||
3324 NextToken[0].is(AsmToken::Pipe) ||
3325 isId(NextToken[0], "abs"))) {
3326 lex();
3327 return true;
3328 }
3329
3330 return false;
3331}
3332
3334AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3335 bool AllowImm) {
3336 bool Neg, SP3Neg;
3337 bool Abs, SP3Abs;
3338 bool Lit;
3339 SMLoc Loc;
3340
3341 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3342 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3343 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3344
3345 SP3Neg = parseSP3NegModifier();
3346
3347 Loc = getLoc();
3348 Neg = trySkipId("neg");
3349 if (Neg && SP3Neg)
3350 return Error(Loc, "expected register or immediate");
3351 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3352 return ParseStatus::Failure;
3353
3354 Abs = trySkipId("abs");
3355 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3356 return ParseStatus::Failure;
3357
3358 Lit = trySkipId("lit");
3359 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3360 return ParseStatus::Failure;
3361
3362 Loc = getLoc();
3363 SP3Abs = trySkipToken(AsmToken::Pipe);
3364 if (Abs && SP3Abs)
3365 return Error(Loc, "expected register or immediate");
3366
3367 ParseStatus Res;
3368 if (AllowImm) {
3369 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3370 } else {
3371 Res = parseReg(Operands);
3372 }
3373 if (!Res.isSuccess())
3374 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3375
3376 if (Lit && !Operands.back()->isImm())
3377 Error(Loc, "expected immediate with lit modifier");
3378
3379 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3380 return ParseStatus::Failure;
3381 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3382 return ParseStatus::Failure;
3383 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3384 return ParseStatus::Failure;
3385 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3386 return ParseStatus::Failure;
3387
3388 AMDGPUOperand::Modifiers Mods;
3389 Mods.Abs = Abs || SP3Abs;
3390 Mods.Neg = Neg || SP3Neg;
3391 Mods.Lit = Lit;
3392
3393 if (Mods.hasFPModifiers() || Lit) {
3394 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3395 if (Op.isExpr())
3396 return Error(Op.getStartLoc(), "expected an absolute expression");
3397 Op.setModifiers(Mods);
3398 }
3399 return ParseStatus::Success;
3400}
3401
3403AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3404 bool AllowImm) {
3405 bool Sext = trySkipId("sext");
3406 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3407 return ParseStatus::Failure;
3408
3409 ParseStatus Res;
3410 if (AllowImm) {
3411 Res = parseRegOrImm(Operands);
3412 } else {
3413 Res = parseReg(Operands);
3414 }
3415 if (!Res.isSuccess())
3416 return Sext ? ParseStatus::Failure : Res;
3417
3418 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3419 return ParseStatus::Failure;
3420
3421 AMDGPUOperand::Modifiers Mods;
3422 Mods.Sext = Sext;
3423
3424 if (Mods.hasIntModifiers()) {
3425 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3426 if (Op.isExpr())
3427 return Error(Op.getStartLoc(), "expected an absolute expression");
3428 Op.setModifiers(Mods);
3429 }
3430
3431 return ParseStatus::Success;
3432}
3433
3434ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3435 return parseRegOrImmWithFPInputMods(Operands, false);
3436}
3437
3438ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3439 return parseRegOrImmWithIntInputMods(Operands, false);
3440}
3441
3442ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3443 auto Loc = getLoc();
3444 if (trySkipId("off")) {
3445 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3446 AMDGPUOperand::ImmTyOff, false));
3447 return ParseStatus::Success;
3448 }
3449
3450 if (!isRegister())
3451 return ParseStatus::NoMatch;
3452
3453 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3454 if (Reg) {
3455 Operands.push_back(std::move(Reg));
3456 return ParseStatus::Success;
3457 }
3458
3459 return ParseStatus::Failure;
3460}
3461
3462unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3463 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3464
3465 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3466 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3467 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3468 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3469 return Match_InvalidOperand;
3470
3471 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3472 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3473 // v_mac_f32/16 allow only dst_sel == DWORD;
3474 auto OpNum =
3475 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3476 const auto &Op = Inst.getOperand(OpNum);
3477 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3478 return Match_InvalidOperand;
3479 }
3480 }
3481
3482 return Match_Success;
3483}
3484
3486 static const unsigned Variants[] = {
3490 };
3491
3492 return ArrayRef(Variants);
3493}
3494
3495// What asm variants we should check
3496ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3497 if (isForcedDPP() && isForcedVOP3()) {
3498 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3499 return ArrayRef(Variants);
3500 }
3501 if (getForcedEncodingSize() == 32) {
3502 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3503 return ArrayRef(Variants);
3504 }
3505
3506 if (isForcedVOP3()) {
3507 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3508 return ArrayRef(Variants);
3509 }
3510
3511 if (isForcedSDWA()) {
3512 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3514 return ArrayRef(Variants);
3515 }
3516
3517 if (isForcedDPP()) {
3518 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3519 return ArrayRef(Variants);
3520 }
3521
3522 return getAllVariants();
3523}
3524
3525StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3526 if (isForcedDPP() && isForcedVOP3())
3527 return "e64_dpp";
3528
3529 if (getForcedEncodingSize() == 32)
3530 return "e32";
3531
3532 if (isForcedVOP3())
3533 return "e64";
3534
3535 if (isForcedSDWA())
3536 return "sdwa";
3537
3538 if (isForcedDPP())
3539 return "dpp";
3540
3541 return "";
3542}
3543
3544unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3545 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3546 for (MCPhysReg Reg : Desc.implicit_uses()) {
3547 switch (Reg) {
3548 case AMDGPU::FLAT_SCR:
3549 case AMDGPU::VCC:
3550 case AMDGPU::VCC_LO:
3551 case AMDGPU::VCC_HI:
3552 case AMDGPU::M0:
3553 return Reg;
3554 default:
3555 break;
3556 }
3557 }
3558 return AMDGPU::NoRegister;
3559}
3560
3561// NB: This code is correct only when used to check constant
3562// bus limitations because GFX7 support no f16 inline constants.
3563// Note that there are no cases when a GFX7 opcode violates
3564// constant bus limitations due to the use of an f16 constant.
3565bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3566 unsigned OpIdx) const {
3567 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3568
3569 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3570 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3571 return false;
3572 }
3573
3574 const MCOperand &MO = Inst.getOperand(OpIdx);
3575
3576 int64_t Val = MO.getImm();
3577 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3578
3579 switch (OpSize) { // expected operand size
3580 case 8:
3581 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3582 case 4:
3583 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3584 case 2: {
3585 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3589 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3590
3595
3600
3605
3610 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3611
3616 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3617
3618 llvm_unreachable("invalid operand type");
3619 }
3620 default:
3621 llvm_unreachable("invalid operand size");
3622 }
3623}
3624
3625unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3626 if (!isGFX10Plus())
3627 return 1;
3628
3629 switch (Opcode) {
3630 // 64-bit shift instructions can use only one scalar value input
3631 case AMDGPU::V_LSHLREV_B64_e64:
3632 case AMDGPU::V_LSHLREV_B64_gfx10:
3633 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3634 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3635 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3636 case AMDGPU::V_LSHRREV_B64_e64:
3637 case AMDGPU::V_LSHRREV_B64_gfx10:
3638 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3639 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3640 case AMDGPU::V_ASHRREV_I64_e64:
3641 case AMDGPU::V_ASHRREV_I64_gfx10:
3642 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3643 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3644 case AMDGPU::V_LSHL_B64_e64:
3645 case AMDGPU::V_LSHR_B64_e64:
3646 case AMDGPU::V_ASHR_I64_e64:
3647 return 1;
3648 default:
3649 return 2;
3650 }
3651}
3652
3653constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3655
3656// Get regular operand indices in the same order as specified
3657// in the instruction (but append mandatory literals to the end).
3659 bool AddMandatoryLiterals = false) {
3660
3661 int16_t ImmIdx =
3662 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3663
3664 if (isVOPD(Opcode)) {
3665 int16_t ImmDeferredIdx =
3666 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3667 : -1;
3668
3669 return {getNamedOperandIdx(Opcode, OpName::src0X),
3670 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3671 getNamedOperandIdx(Opcode, OpName::src0Y),
3672 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3673 ImmDeferredIdx,
3674 ImmIdx};
3675 }
3676
3677 return {getNamedOperandIdx(Opcode, OpName::src0),
3678 getNamedOperandIdx(Opcode, OpName::src1),
3679 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3680}
3681
3682bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3683 const MCOperand &MO = Inst.getOperand(OpIdx);
3684 if (MO.isImm()) {
3685 return !isInlineConstant(Inst, OpIdx);
3686 } else if (MO.isReg()) {
3687 auto Reg = MO.getReg();
3688 if (!Reg) {
3689 return false;
3690 }
3691 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3692 auto PReg = mc2PseudoReg(Reg);
3693 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3694 } else {
3695 return true;
3696 }
3697}
3698
3699// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3700// Writelane is special in that it can use SGPR and M0 (which would normally
3701// count as using the constant bus twice - but in this case it is allowed since
3702// the lane selector doesn't count as a use of the constant bus). However, it is
3703// still required to abide by the 1 SGPR rule.
3704static bool checkWriteLane(const MCInst &Inst) {
3705 const unsigned Opcode = Inst.getOpcode();
3706 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3707 return false;
3708 const MCOperand &LaneSelOp = Inst.getOperand(2);
3709 if (!LaneSelOp.isReg())
3710 return false;
3711 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3712 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3713}
3714
3715bool AMDGPUAsmParser::validateConstantBusLimitations(
3716 const MCInst &Inst, const OperandVector &Operands) {
3717 const unsigned Opcode = Inst.getOpcode();
3718 const MCInstrDesc &Desc = MII.get(Opcode);
3719 unsigned LastSGPR = AMDGPU::NoRegister;
3720 unsigned ConstantBusUseCount = 0;
3721 unsigned NumLiterals = 0;
3722 unsigned LiteralSize;
3723
3724 if (!(Desc.TSFlags &
3727 !isVOPD(Opcode))
3728 return true;
3729
3730 if (checkWriteLane(Inst))
3731 return true;
3732
3733 // Check special imm operands (used by madmk, etc)
3734 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3735 ++NumLiterals;
3736 LiteralSize = 4;
3737 }
3738
3739 SmallDenseSet<unsigned> SGPRsUsed;
3740 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3741 if (SGPRUsed != AMDGPU::NoRegister) {
3742 SGPRsUsed.insert(SGPRUsed);
3743 ++ConstantBusUseCount;
3744 }
3745
3746 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3747
3748 for (int OpIdx : OpIndices) {
3749 if (OpIdx == -1)
3750 continue;
3751
3752 const MCOperand &MO = Inst.getOperand(OpIdx);
3753 if (usesConstantBus(Inst, OpIdx)) {
3754 if (MO.isReg()) {
3755 LastSGPR = mc2PseudoReg(MO.getReg());
3756 // Pairs of registers with a partial intersections like these
3757 // s0, s[0:1]
3758 // flat_scratch_lo, flat_scratch
3759 // flat_scratch_lo, flat_scratch_hi
3760 // are theoretically valid but they are disabled anyway.
3761 // Note that this code mimics SIInstrInfo::verifyInstruction
3762 if (SGPRsUsed.insert(LastSGPR).second) {
3763 ++ConstantBusUseCount;
3764 }
3765 } else { // Expression or a literal
3766
3767 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3768 continue; // special operand like VINTERP attr_chan
3769
3770 // An instruction may use only one literal.
3771 // This has been validated on the previous step.
3772 // See validateVOPLiteral.
3773 // This literal may be used as more than one operand.
3774 // If all these operands are of the same size,
3775 // this literal counts as one scalar value.
3776 // Otherwise it counts as 2 scalar values.
3777 // See "GFX10 Shader Programming", section 3.6.2.3.
3778
3779 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3780 if (Size < 4)
3781 Size = 4;
3782
3783 if (NumLiterals == 0) {
3784 NumLiterals = 1;
3785 LiteralSize = Size;
3786 } else if (LiteralSize != Size) {
3787 NumLiterals = 2;
3788 }
3789 }
3790 }
3791 }
3792 ConstantBusUseCount += NumLiterals;
3793
3794 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3795 return true;
3796
3797 SMLoc LitLoc = getLitLoc(Operands);
3798 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3799 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3800 Error(Loc, "invalid operand (violates constant bus restrictions)");
3801 return false;
3802}
3803
3804bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3805 const MCInst &Inst, const OperandVector &Operands) {
3806
3807 const unsigned Opcode = Inst.getOpcode();
3808 if (!isVOPD(Opcode))
3809 return true;
3810
3811 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3812
3813 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3814 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3815 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3816 ? Opr.getReg()
3818 };
3819
3820 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3821 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3822
3823 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3824 auto InvalidCompOprIdx =
3825 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3826 if (!InvalidCompOprIdx)
3827 return true;
3828
3829 auto CompOprIdx = *InvalidCompOprIdx;
3830 auto ParsedIdx =
3831 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3832 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3833 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3834
3835 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3836 if (CompOprIdx == VOPD::Component::DST) {
3837 Error(Loc, "one dst register must be even and the other odd");
3838 } else {
3839 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3840 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3841 " operands must use different VGPR banks");
3842 }
3843
3844 return false;
3845}
3846
3847bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3848
3849 const unsigned Opc = Inst.getOpcode();
3850 const MCInstrDesc &Desc = MII.get(Opc);
3851
3852 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3853 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3854 assert(ClampIdx != -1);
3855 return Inst.getOperand(ClampIdx).getImm() == 0;
3856 }
3857
3858 return true;
3859}
3860
3863
3864bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3865 const SMLoc &IDLoc) {
3866
3867 const unsigned Opc = Inst.getOpcode();
3868 const MCInstrDesc &Desc = MII.get(Opc);
3869
3870 if ((Desc.TSFlags & MIMGFlags) == 0)
3871 return true;
3872
3873 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3874 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3875 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3876
3877 assert(VDataIdx != -1);
3878
3879 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3880 return true;
3881
3882 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3883 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3884 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3885 if (DMask == 0)
3886 DMask = 1;
3887
3888 bool IsPackedD16 = false;
3889 unsigned DataSize =
3890 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3891 if (hasPackedD16()) {
3892 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3893 IsPackedD16 = D16Idx >= 0;
3894 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3895 DataSize = (DataSize + 1) / 2;
3896 }
3897
3898 if ((VDataSize / 4) == DataSize + TFESize)
3899 return true;
3900
3901 StringRef Modifiers;
3902 if (isGFX90A())
3903 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3904 else
3905 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3906
3907 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3908 return false;
3909}
3910
3911bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3912 const SMLoc &IDLoc) {
3913 const unsigned Opc = Inst.getOpcode();
3914 const MCInstrDesc &Desc = MII.get(Opc);
3915
3916 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3917 return true;
3918
3920
3921 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3923 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3924 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3925 : AMDGPU::OpName::rsrc;
3926 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3927 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3928 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3929
3930 assert(VAddr0Idx != -1);
3931 assert(SrsrcIdx != -1);
3932 assert(SrsrcIdx > VAddr0Idx);
3933
3934 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3935 if (BaseOpcode->BVH) {
3936 if (IsA16 == BaseOpcode->A16)
3937 return true;
3938 Error(IDLoc, "image address size does not match a16");
3939 return false;
3940 }
3941
3942 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3944 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3945 unsigned ActualAddrSize =
3946 IsNSA ? SrsrcIdx - VAddr0Idx
3947 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3948
3949 unsigned ExpectedAddrSize =
3950 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3951
3952 if (IsNSA) {
3953 if (hasPartialNSAEncoding() &&
3954 ExpectedAddrSize >
3956 int VAddrLastIdx = SrsrcIdx - 1;
3957 unsigned VAddrLastSize =
3958 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3959
3960 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3961 }
3962 } else {
3963 if (ExpectedAddrSize > 12)
3964 ExpectedAddrSize = 16;
3965
3966 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3967 // This provides backward compatibility for assembly created
3968 // before 160b/192b/224b types were directly supported.
3969 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3970 return true;
3971 }
3972
3973 if (ActualAddrSize == ExpectedAddrSize)
3974 return true;
3975
3976 Error(IDLoc, "image address size does not match dim and a16");
3977 return false;
3978}
3979
3980bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3981
3982 const unsigned Opc = Inst.getOpcode();
3983 const MCInstrDesc &Desc = MII.get(Opc);
3984
3985 if ((Desc.TSFlags & MIMGFlags) == 0)
3986 return true;
3987 if (!Desc.mayLoad() || !Desc.mayStore())
3988 return true; // Not atomic
3989
3990 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3991 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3992
3993 // This is an incomplete check because image_atomic_cmpswap
3994 // may only use 0x3 and 0xf while other atomic operations
3995 // may use 0x1 and 0x3. However these limitations are
3996 // verified when we check that dmask matches dst size.
3997 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3998}
3999
4000bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4001
4002 const unsigned Opc = Inst.getOpcode();
4003 const MCInstrDesc &Desc = MII.get(Opc);
4004
4005 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4006 return true;
4007
4008 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4009 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4010
4011 // GATHER4 instructions use dmask in a different fashion compared to
4012 // other MIMG instructions. The only useful DMASK values are
4013 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4014 // (red,red,red,red) etc.) The ISA document doesn't mention
4015 // this.
4016 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4017}
4018
4019bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4020 const unsigned Opc = Inst.getOpcode();
4021 const MCInstrDesc &Desc = MII.get(Opc);
4022
4023 if ((Desc.TSFlags & MIMGFlags) == 0)
4024 return true;
4025
4027 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4029
4030 if (!BaseOpcode->MSAA)
4031 return true;
4032
4033 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4034 assert(DimIdx != -1);
4035
4036 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4038
4039 return DimInfo->MSAA;
4040}
4041
4042static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4043{
4044 switch (Opcode) {
4045 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4046 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4047 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4048 return true;
4049 default:
4050 return false;
4051 }
4052}
4053
4054// movrels* opcodes should only allow VGPRS as src0.
4055// This is specified in .td description for vop1/vop3,
4056// but sdwa is handled differently. See isSDWAOperand.
4057bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4058 const OperandVector &Operands) {
4059
4060 const unsigned Opc = Inst.getOpcode();
4061 const MCInstrDesc &Desc = MII.get(Opc);
4062
4063 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4064 return true;
4065
4066 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4067 assert(Src0Idx != -1);
4068
4069 SMLoc ErrLoc;
4070 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4071 if (Src0.isReg()) {
4072 auto Reg = mc2PseudoReg(Src0.getReg());
4073 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4074 if (!isSGPR(Reg, TRI))
4075 return true;
4076 ErrLoc = getRegLoc(Reg, Operands);
4077 } else {
4078 ErrLoc = getConstLoc(Operands);
4079 }
4080
4081 Error(ErrLoc, "source operand must be a VGPR");
4082 return false;
4083}
4084
4085bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4086 const OperandVector &Operands) {
4087
4088 const unsigned Opc = Inst.getOpcode();
4089
4090 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4091 return true;
4092
4093 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4094 assert(Src0Idx != -1);
4095
4096 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4097 if (!Src0.isReg())
4098 return true;
4099
4100 auto Reg = mc2PseudoReg(Src0.getReg());
4101 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4102 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4103 Error(getRegLoc(Reg, Operands),
4104 "source operand must be either a VGPR or an inline constant");
4105 return false;
4106 }
4107
4108 return true;
4109}
4110
4111bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4112 const OperandVector &Operands) {
4113 unsigned Opcode = Inst.getOpcode();
4114 const MCInstrDesc &Desc = MII.get(Opcode);
4115
4116 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4117 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4118 return true;
4119
4120 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4121 if (Src2Idx == -1)
4122 return true;
4123
4124 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4125 Error(getConstLoc(Operands),
4126 "inline constants are not allowed for this operand");
4127 return false;
4128 }
4129
4130 return true;
4131}
4132
4133bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4134 const OperandVector &Operands) {
4135 const unsigned Opc = Inst.getOpcode();
4136 const MCInstrDesc &Desc = MII.get(Opc);
4137
4138 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4139 return true;
4140
4141 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4142 if (Src2Idx == -1)
4143 return true;
4144
4145 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4146 if (!Src2.isReg())
4147 return true;
4148
4149 MCRegister Src2Reg = Src2.getReg();
4150 MCRegister DstReg = Inst.getOperand(0).getReg();
4151 if (Src2Reg == DstReg)
4152 return true;
4153
4154 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4155 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4156 return true;
4157
4158 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4159 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4160 "source 2 operand must not partially overlap with dst");
4161 return false;
4162 }
4163
4164 return true;
4165}
4166
4167bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4168 switch (Inst.getOpcode()) {
4169 default:
4170 return true;
4171 case V_DIV_SCALE_F32_gfx6_gfx7:
4172 case V_DIV_SCALE_F32_vi:
4173 case V_DIV_SCALE_F32_gfx10:
4174 case V_DIV_SCALE_F64_gfx6_gfx7:
4175 case V_DIV_SCALE_F64_vi:
4176 case V_DIV_SCALE_F64_gfx10:
4177 break;
4178 }
4179
4180 // TODO: Check that src0 = src1 or src2.
4181
4182 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4183 AMDGPU::OpName::src2_modifiers,
4184 AMDGPU::OpName::src2_modifiers}) {
4186 .getImm() &
4188 return false;
4189 }
4190 }
4191
4192 return true;
4193}
4194
4195bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4196
4197 const unsigned Opc = Inst.getOpcode();
4198 const MCInstrDesc &Desc = MII.get(Opc);
4199
4200 if ((Desc.TSFlags & MIMGFlags) == 0)
4201 return true;
4202
4203 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4204 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4205 if (isCI() || isSI())
4206 return false;
4207 }
4208
4209 return true;
4210}
4211
4212static bool IsRevOpcode(const unsigned Opcode)
4213{
4214 switch (Opcode) {
4215 case AMDGPU::V_SUBREV_F32_e32:
4216 case AMDGPU::V_SUBREV_F32_e64:
4217 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4218 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4219 case AMDGPU::V_SUBREV_F32_e32_vi:
4220 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4221 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4222 case AMDGPU::V_SUBREV_F32_e64_vi:
4223
4224 case AMDGPU::V_SUBREV_CO_U32_e32:
4225 case AMDGPU::V_SUBREV_CO_U32_e64:
4226 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4227 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4228
4229 case AMDGPU::V_SUBBREV_U32_e32:
4230 case AMDGPU::V_SUBBREV_U32_e64:
4231 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4232 case AMDGPU::V_SUBBREV_U32_e32_vi:
4233 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4234 case AMDGPU::V_SUBBREV_U32_e64_vi:
4235
4236 case AMDGPU::V_SUBREV_U32_e32:
4237 case AMDGPU::V_SUBREV_U32_e64:
4238 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4239 case AMDGPU::V_SUBREV_U32_e32_vi:
4240 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4241 case AMDGPU::V_SUBREV_U32_e64_vi:
4242
4243 case AMDGPU::V_SUBREV_F16_e32:
4244 case AMDGPU::V_SUBREV_F16_e64:
4245 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4246 case AMDGPU::V_SUBREV_F16_e32_vi:
4247 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4248 case AMDGPU::V_SUBREV_F16_e64_vi:
4249
4250 case AMDGPU::V_SUBREV_U16_e32:
4251 case AMDGPU::V_SUBREV_U16_e64:
4252 case AMDGPU::V_SUBREV_U16_e32_vi:
4253 case AMDGPU::V_SUBREV_U16_e64_vi:
4254
4255 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4256 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4257 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4258
4259 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4260 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4261
4262 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4263 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4264
4265 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4266 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4267
4268 case AMDGPU::V_LSHRREV_B32_e32:
4269 case AMDGPU::V_LSHRREV_B32_e64:
4270 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4271 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4272 case AMDGPU::V_LSHRREV_B32_e32_vi:
4273 case AMDGPU::V_LSHRREV_B32_e64_vi:
4274 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4275 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4276
4277 case AMDGPU::V_ASHRREV_I32_e32:
4278 case AMDGPU::V_ASHRREV_I32_e64:
4279 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4280 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4281 case AMDGPU::V_ASHRREV_I32_e32_vi:
4282 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4283 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4284 case AMDGPU::V_ASHRREV_I32_e64_vi:
4285
4286 case AMDGPU::V_LSHLREV_B32_e32:
4287 case AMDGPU::V_LSHLREV_B32_e64:
4288 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4289 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4290 case AMDGPU::V_LSHLREV_B32_e32_vi:
4291 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4292 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4293 case AMDGPU::V_LSHLREV_B32_e64_vi:
4294
4295 case AMDGPU::V_LSHLREV_B16_e32:
4296 case AMDGPU::V_LSHLREV_B16_e64:
4297 case AMDGPU::V_LSHLREV_B16_e32_vi:
4298 case AMDGPU::V_LSHLREV_B16_e64_vi:
4299 case AMDGPU::V_LSHLREV_B16_gfx10:
4300
4301 case AMDGPU::V_LSHRREV_B16_e32:
4302 case AMDGPU::V_LSHRREV_B16_e64:
4303 case AMDGPU::V_LSHRREV_B16_e32_vi:
4304 case AMDGPU::V_LSHRREV_B16_e64_vi:
4305 case AMDGPU::V_LSHRREV_B16_gfx10:
4306
4307 case AMDGPU::V_ASHRREV_I16_e32:
4308 case AMDGPU::V_ASHRREV_I16_e64:
4309 case AMDGPU::V_ASHRREV_I16_e32_vi:
4310 case AMDGPU::V_ASHRREV_I16_e64_vi:
4311 case AMDGPU::V_ASHRREV_I16_gfx10:
4312
4313 case AMDGPU::V_LSHLREV_B64_e64:
4314 case AMDGPU::V_LSHLREV_B64_gfx10:
4315 case AMDGPU::V_LSHLREV_B64_vi:
4316
4317 case AMDGPU::V_LSHRREV_B64_e64:
4318 case AMDGPU::V_LSHRREV_B64_gfx10:
4319 case AMDGPU::V_LSHRREV_B64_vi:
4320
4321 case AMDGPU::V_ASHRREV_I64_e64:
4322 case AMDGPU::V_ASHRREV_I64_gfx10:
4323 case AMDGPU::V_ASHRREV_I64_vi:
4324
4325 case AMDGPU::V_PK_LSHLREV_B16:
4326 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4327 case AMDGPU::V_PK_LSHLREV_B16_vi:
4328
4329 case AMDGPU::V_PK_LSHRREV_B16:
4330 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4331 case AMDGPU::V_PK_LSHRREV_B16_vi:
4332 case AMDGPU::V_PK_ASHRREV_I16:
4333 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4334 case AMDGPU::V_PK_ASHRREV_I16_vi:
4335 return true;
4336 default:
4337 return false;
4338 }
4339}
4340
4341std::optional<StringRef>
4342AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4343
4344 using namespace SIInstrFlags;
4345 const unsigned Opcode = Inst.getOpcode();
4346 const MCInstrDesc &Desc = MII.get(Opcode);
4347
4348 // lds_direct register is defined so that it can be used
4349 // with 9-bit operands only. Ignore encodings which do not accept these.
4350 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4351 if ((Desc.TSFlags & Enc) == 0)
4352 return std::nullopt;
4353
4354 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4355 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4356 if (SrcIdx == -1)
4357 break;
4358 const auto &Src = Inst.getOperand(SrcIdx);
4359 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4360
4361 if (isGFX90A() || isGFX11Plus())
4362 return StringRef("lds_direct is not supported on this GPU");
4363
4364 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4365 return StringRef("lds_direct cannot be used with this instruction");
4366
4367 if (SrcName != OpName::src0)
4368 return StringRef("lds_direct may be used as src0 only");
4369 }
4370 }
4371
4372 return std::nullopt;
4373}
4374
4375SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4376 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4377 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4378 if (Op.isFlatOffset())
4379 return Op.getStartLoc();
4380 }
4381 return getLoc();
4382}
4383
4384bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4385 const OperandVector &Operands) {
4386 auto Opcode = Inst.getOpcode();
4387 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4388 if (OpNum == -1)
4389 return true;
4390
4391 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4392 if ((TSFlags & SIInstrFlags::FLAT))
4393 return validateFlatOffset(Inst, Operands);
4394
4395 if ((TSFlags & SIInstrFlags::SMRD))
4396 return validateSMEMOffset(Inst, Operands);
4397
4398 const auto &Op = Inst.getOperand(OpNum);
4399 if (isGFX12Plus() &&
4400 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4401 const unsigned OffsetSize = 24;
4402 if (!isIntN(OffsetSize, Op.getImm())) {
4403 Error(getFlatOffsetLoc(Operands),
4404 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4405 return false;
4406 }
4407 } else {
4408 const unsigned OffsetSize = 16;
4409 if (!isUIntN(OffsetSize, Op.getImm())) {
4410 Error(getFlatOffsetLoc(Operands),
4411 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4412 return false;
4413 }
4414 }
4415 return true;
4416}
4417
4418bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4419 const OperandVector &Operands) {
4420 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4421 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4422 return true;
4423
4424 auto Opcode = Inst.getOpcode();
4425 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4426 assert(OpNum != -1);
4427
4428 const auto &Op = Inst.getOperand(OpNum);
4429 if (!hasFlatOffsets() && Op.getImm() != 0) {
4430 Error(getFlatOffsetLoc(Operands),
4431 "flat offset modifier is not supported on this GPU");
4432 return false;
4433 }
4434
4435 // For pre-GFX12 FLAT instructions the offset must be positive;
4436 // MSB is ignored and forced to zero.
4437 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4438 bool AllowNegative =
4440 isGFX12Plus();
4441 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4442 Error(getFlatOffsetLoc(Operands),
4443 Twine("expected a ") +
4444 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4445 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4446 return false;
4447 }
4448
4449 return true;
4450}
4451
4452SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4453 // Start with second operand because SMEM Offset cannot be dst or src0.
4454 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4455 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4456 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4457 return Op.getStartLoc();
4458 }
4459 return getLoc();
4460}
4461
4462bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4463 const OperandVector &Operands) {
4464 if (isCI() || isSI())
4465 return true;
4466
4467 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4468 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4469 return true;
4470
4471 auto Opcode = Inst.getOpcode();
4472 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4473 if (OpNum == -1)
4474 return true;
4475
4476 const auto &Op = Inst.getOperand(OpNum);
4477 if (!Op.isImm())
4478 return true;
4479
4480 uint64_t Offset = Op.getImm();
4481 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4484 return true;
4485
4486 Error(getSMEMOffsetLoc(Operands),
4487 isGFX12Plus() ? "expected a 24-bit signed offset"
4488 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4489 : "expected a 21-bit signed offset");
4490
4491 return false;
4492}
4493
4494bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4495 unsigned Opcode = Inst.getOpcode();
4496 const MCInstrDesc &Desc = MII.get(Opcode);
4497 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4498 return true;
4499
4500 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4501 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4502
4503 const int OpIndices[] = { Src0Idx, Src1Idx };
4504
4505 unsigned NumExprs = 0;
4506 unsigned NumLiterals = 0;
4508
4509 for (int OpIdx : OpIndices) {
4510 if (OpIdx == -1) break;
4511
4512 const MCOperand &MO = Inst.getOperand(OpIdx);
4513 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4514 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4515 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4516 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4517 if (NumLiterals == 0 || LiteralValue != Value) {
4519 ++NumLiterals;
4520 }
4521 } else if (MO.isExpr()) {
4522 ++NumExprs;
4523 }
4524 }
4525 }
4526
4527 return NumLiterals + NumExprs <= 1;
4528}
4529
4530bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4531 const unsigned Opc = Inst.getOpcode();
4532 if (isPermlane16(Opc)) {
4533 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4534 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4535
4536 if (OpSel & ~3)
4537 return false;
4538 }
4539
4540 uint64_t TSFlags = MII.get(Opc).TSFlags;
4541
4542 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4543 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4544 if (OpSelIdx != -1) {
4545 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4546 return false;
4547 }
4548 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4549 if (OpSelHiIdx != -1) {
4550 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4551 return false;
4552 }
4553 }
4554
4555 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4556 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4557 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4559 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4560 if (OpSel & 3)
4561 return false;
4562 }
4563
4564 return true;
4565}
4566
4567bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4568 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4569
4570 const unsigned Opc = Inst.getOpcode();
4571 uint64_t TSFlags = MII.get(Opc).TSFlags;
4572
4573 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4574 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4575 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4576 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4577 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4578 !(TSFlags & SIInstrFlags::IsSWMMAC))
4579 return true;
4580
4581 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4582 if (NegIdx == -1)
4583 return true;
4584
4585 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4586
4587 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4588 // on some src operands but not allowed on other.
4589 // It is convenient that such instructions don't have src_modifiers operand
4590 // for src operands that don't allow neg because they also don't allow opsel.
4591
4592 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4593 AMDGPU::OpName::src1_modifiers,
4594 AMDGPU::OpName::src2_modifiers};
4595
4596 for (unsigned i = 0; i < 3; ++i) {
4597 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4598 if (Neg & (1 << i))
4599 return false;
4600 }
4601 }
4602
4603 return true;
4604}
4605
4606bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4607 const OperandVector &Operands) {
4608 const unsigned Opc = Inst.getOpcode();
4609 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4610 if (DppCtrlIdx >= 0) {
4611 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4612
4613 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4614 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4615 // DP ALU DPP is supported for row_newbcast only on GFX9*
4616 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4617 Error(S, "DP ALU dpp only supports row_newbcast");
4618 return false;
4619 }
4620 }
4621
4622 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4623 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4624
4625 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4626 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4627 if (Src1Idx >= 0) {
4628 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4629 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4630 if (Src1.isImm() ||
4631 (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4632 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4633 Error(Op.getStartLoc(), "invalid operand for instruction");
4634 return false;
4635 }
4636 }
4637 }
4638
4639 return true;
4640}
4641
4642// Check if VCC register matches wavefront size
4643bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4644 auto FB = getFeatureBits();
4645 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4646 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4647}
4648
4649// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4650bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4651 const OperandVector &Operands) {
4652 unsigned Opcode = Inst.getOpcode();
4653 const MCInstrDesc &Desc = MII.get(Opcode);
4654 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4655 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4656 !HasMandatoryLiteral && !isVOPD(Opcode))
4657 return true;
4658
4659 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4660
4661 unsigned NumExprs = 0;
4662 unsigned NumLiterals = 0;
4664
4665 for (int OpIdx : OpIndices) {
4666 if (OpIdx == -1)
4667 continue;
4668
4669 const MCOperand &MO = Inst.getOperand(OpIdx);
4670 if (!MO.isImm() && !MO.isExpr())
4671 continue;
4672 if (!isSISrcOperand(Desc, OpIdx))
4673 continue;
4674
4675 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4676 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4677 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4678 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4679 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4680
4681 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4682 Error(getLitLoc(Operands), "invalid operand for instruction");
4683 return false;
4684 }
4685
4686 if (IsFP64 && IsValid32Op)
4687 Value = Hi_32(Value);
4688
4689 if (NumLiterals == 0 || LiteralValue != Value) {
4691 ++NumLiterals;
4692 }
4693 } else if (MO.isExpr()) {
4694 ++NumExprs;
4695 }
4696 }
4697 NumLiterals += NumExprs;
4698
4699 if (!NumLiterals)
4700 return true;
4701
4702 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4703 Error(getLitLoc(Operands), "literal operands are not supported");
4704 return false;
4705 }
4706
4707 if (NumLiterals > 1) {
4708 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4709 return false;
4710 }
4711
4712 return true;
4713}
4714
4715// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4716static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4717 const MCRegisterInfo *MRI) {
4718 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4719 if (OpIdx < 0)
4720 return -1;
4721
4722 const MCOperand &Op = Inst.getOperand(OpIdx);
4723 if (!Op.isReg())
4724 return -1;
4725
4726 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4727 auto Reg = Sub ? Sub : Op.getReg();
4728 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4729 return AGPR32.contains(Reg) ? 1 : 0;
4730}
4731
4732bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4733 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4734 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4736 SIInstrFlags::DS)) == 0)
4737 return true;
4738
4739 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4740 : AMDGPU::OpName::vdata;
4741
4742 const MCRegisterInfo *MRI = getMRI();
4743 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4744 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4745
4746 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4747 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4748 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4749 return false;
4750 }
4751
4752 auto FB = getFeatureBits();
4753 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4754 if (DataAreg < 0 || DstAreg < 0)
4755 return true;
4756 return DstAreg == DataAreg;
4757 }
4758
4759 return DstAreg < 1 && DataAreg < 1;
4760}
4761
4762bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4763 auto FB = getFeatureBits();
4764 if (!FB[AMDGPU::FeatureGFX90AInsts])
4765 return true;
4766
4767 const MCRegisterInfo *MRI = getMRI();
4768 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4769 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4770 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4771 const MCOperand &Op = Inst.getOperand(I);
4772 if (!Op.isReg())
4773 continue;
4774
4775 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4776 if (!Sub)
4777 continue;
4778
4779 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4780 return false;
4781 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4782 return false;
4783 }
4784
4785 return true;
4786}
4787
4788SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4789 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4790 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4791 if (Op.isBLGP())
4792 return Op.getStartLoc();
4793 }
4794 return SMLoc();
4795}
4796
4797bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4798 const OperandVector &Operands) {
4799 unsigned Opc = Inst.getOpcode();
4800 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4801 if (BlgpIdx == -1)
4802 return true;
4803 SMLoc BLGPLoc = getBLGPLoc(Operands);
4804 if (!BLGPLoc.isValid())
4805 return true;
4806 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4807 auto FB = getFeatureBits();
4808 bool UsesNeg = false;
4809 if (FB[AMDGPU::FeatureGFX940Insts]) {
4810 switch (Opc) {
4811 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4812 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4813 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4814 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4815 UsesNeg = true;
4816 }
4817 }
4818
4819 if (IsNeg == UsesNeg)
4820 return true;
4821
4822 Error(BLGPLoc,
4823 UsesNeg ? "invalid modifier: blgp is not supported"
4824 : "invalid modifier: neg is not supported");
4825
4826 return false;
4827}
4828
4829bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4830 const OperandVector &Operands) {
4831 if (!isGFX11Plus())
4832 return true;
4833
4834 unsigned Opc = Inst.getOpcode();
4835 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4836 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4837 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4838 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4839 return true;
4840
4841 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4842 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4843 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4844 if (Reg == AMDGPU::SGPR_NULL)
4845 return true;
4846
4847 SMLoc RegLoc = getRegLoc(Reg, Operands);
4848 Error(RegLoc, "src0 must be null");
4849 return false;
4850}
4851
4852bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4853 const OperandVector &Operands) {
4854 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4855 if ((TSFlags & SIInstrFlags::DS) == 0)
4856 return true;
4857 if (TSFlags & SIInstrFlags::GWS)
4858 return validateGWS(Inst, Operands);
4859 // Only validate GDS for non-GWS instructions.
4860 if (hasGDS())
4861 return true;
4862 int GDSIdx =
4863 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4864 if (GDSIdx < 0)
4865 return true;
4866 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4867 if (GDS) {
4868 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4869 Error(S, "gds modifier is not supported on this GPU");
4870 return false;
4871 }
4872 return true;
4873}
4874
4875// gfx90a has an undocumented limitation:
4876// DS_GWS opcodes must use even aligned registers.
4877bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4878 const OperandVector &Operands) {
4879 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4880 return true;
4881
4882 int Opc = Inst.getOpcode();
4883 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4884 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4885 return true;
4886
4887 const MCRegisterInfo *MRI = getMRI();
4888 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4889 int Data0Pos =
4890 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4891 assert(Data0Pos != -1);
4892 auto Reg = Inst.getOperand(Data0Pos).getReg();
4893 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4894 if (RegIdx & 1) {
4895 SMLoc RegLoc = getRegLoc(Reg, Operands);
4896 Error(RegLoc, "vgpr must be even aligned");
4897 return false;
4898 }
4899
4900 return true;
4901}
4902
4903bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4904 const OperandVector &Operands,
4905 const SMLoc &IDLoc) {
4906 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4907 AMDGPU::OpName::cpol);
4908 if (CPolPos == -1)
4909 return true;
4910
4911 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4912
4913 if (isGFX12Plus())
4914 return validateTHAndScopeBits(Inst, Operands, CPol);
4915
4916 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4917 if (TSFlags & SIInstrFlags::SMRD) {
4918 if (CPol && (isSI() || isCI())) {
4919 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4920 Error(S, "cache policy is not supported for SMRD instructions");
4921 return false;
4922 }
4923 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4924 Error(IDLoc, "invalid cache policy for SMEM instruction");
4925 return false;
4926 }
4927 }
4928
4929 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4930 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4933 if (!(TSFlags & AllowSCCModifier)) {
4934 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4935 StringRef CStr(S.getPointer());
4936 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4937 Error(S,
4938 "scc modifier is not supported for this instruction on this GPU");
4939 return false;
4940 }
4941 }
4942
4944 return true;
4945
4946 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4947 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4948 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4949 : "instruction must use glc");
4950 return false;
4951 }
4952 } else {
4953 if (CPol & CPol::GLC) {
4954 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4955 StringRef CStr(S.getPointer());
4957 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4958 Error(S, isGFX940() ? "instruction must not use sc0"
4959 : "instruction must not use glc");
4960 return false;
4961 }
4962 }
4963
4964 return true;
4965}
4966
4967bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4968 const OperandVector &Operands,
4969 const unsigned CPol) {
4970 const unsigned TH = CPol & AMDGPU::CPol::TH;
4971 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4972
4973 const unsigned Opcode = Inst.getOpcode();
4974 const MCInstrDesc &TID = MII.get(Opcode);
4975
4976 auto PrintError = [&](StringRef Msg) {
4977 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4978 Error(S, Msg);
4979 return false;
4980 };
4981
4982 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4985 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4986
4987 if (TH == 0)
4988 return true;
4989
4990 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4991 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4992 (TH == AMDGPU::CPol::TH_NT_HT)))
4993 return PrintError("invalid th value for SMEM instruction");
4994
4995 if (TH == AMDGPU::CPol::TH_BYPASS) {
4996 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4998 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5000 return PrintError("scope and th combination is not valid");
5001 }
5002
5003 bool IsStore = TID.mayStore();
5004 bool IsAtomic =
5006
5007 if (IsAtomic) {
5008 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5009 return PrintError("invalid th value for atomic instructions");
5010 } else if (IsStore) {
5011 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5012 return PrintError("invalid th value for store instructions");
5013 } else {
5014 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5015 return PrintError("invalid th value for load instructions");
5016 }
5017
5018 return true;
5019}
5020
5021bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5022 if (!isGFX11Plus())
5023 return true;
5024 for (auto &Operand : Operands) {
5025 if (!Operand->isReg())
5026 continue;
5027 unsigned Reg = Operand->getReg();
5028 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5029 Error(getRegLoc(Reg, Operands),
5030 "execz and vccz are not supported on this GPU");
5031 return false;
5032 }
5033 }
5034 return true;
5035}
5036
5037bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5038 const OperandVector &Operands) {
5039 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5040 if (Desc.mayStore() &&
5042 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5043 if (Loc != getInstLoc(Operands)) {
5044 Error(Loc, "TFE modifier has no meaning for store instructions");
5045 return false;
5046 }
5047 }
5048
5049 return true;
5050}
5051
5052bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5053 const SMLoc &IDLoc,
5054 const OperandVector &Operands) {
5055 if (auto ErrMsg = validateLdsDirect(Inst)) {
5056 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5057 return false;
5058 }
5059 if (!validateSOPLiteral(Inst)) {
5060 Error(getLitLoc(Operands),
5061 "only one unique literal operand is allowed");
5062 return false;
5063 }
5064 if (!validateVOPLiteral(Inst, Operands)) {
5065 return false;
5066 }
5067 if (!validateConstantBusLimitations(Inst, Operands)) {
5068 return false;
5069 }
5070 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5071 return false;
5072 }
5073 if (!validateIntClampSupported(Inst)) {
5074 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
5075 "integer clamping is not supported on this GPU");
5076 return false;
5077 }
5078 if (!validateOpSel(Inst)) {
5079 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5080 "invalid op_sel operand");
5081 return false;
5082 }
5083 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5084 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5085 "invalid neg_lo operand");
5086 return false;
5087 }
5088 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5089 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5090 "invalid neg_hi operand");
5091 return false;
5092 }
5093 if (!validateDPP(Inst, Operands)) {
5094 return false;
5095 }
5096 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5097 if (!validateMIMGD16(Inst)) {
5098 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5099 "d16 modifier is not supported on this GPU");
5100 return false;
5101 }
5102 if (!validateMIMGMSAA(Inst)) {
5103 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5104 "invalid dim; must be MSAA type");
5105 return false;
5106 }
5107 if (!validateMIMGDataSize(Inst, IDLoc)) {
5108 return false;
5109 }
5110 if (!validateMIMGAddrSize(Inst, IDLoc))
5111 return false;
5112 if (!validateMIMGAtomicDMask(Inst)) {
5113 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5114 "invalid atomic image dmask");
5115 return false;
5116 }
5117 if (!validateMIMGGatherDMask(Inst)) {
5118 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5119 "invalid image_gather dmask: only one bit must be set");
5120 return false;
5121 }
5122 if (!validateMovrels(Inst, Operands)) {
5123 return false;
5124 }
5125 if (!validateOffset(Inst, Operands)) {
5126 return false;
5127 }
5128 if (!validateMAIAccWrite(Inst, Operands)) {
5129 return false;
5130 }
5131 if (!validateMAISrc2(Inst, Operands)) {
5132 return false;
5133 }
5134 if (!validateMFMA(Inst, Operands)) {
5135 return false;
5136 }
5137 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5138 return false;
5139 }
5140
5141 if (!validateAGPRLdSt(Inst)) {
5142 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5143 ? "invalid register class: data and dst should be all VGPR or AGPR"
5144 : "invalid register class: agpr loads and stores not supported on this GPU"
5145 );
5146 return false;
5147 }
5148 if (!validateVGPRAlign(Inst)) {
5149 Error(IDLoc,
5150 "invalid register class: vgpr tuples must be 64 bit aligned");
5151 return false;
5152 }
5153 if (!validateDS(Inst, Operands)) {
5154 return false;
5155 }
5156
5157 if (!validateBLGP(Inst, Operands)) {
5158 return false;
5159 }
5160
5161 if (!validateDivScale(Inst)) {
5162 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5163 return false;
5164 }
5165 if (!validateWaitCnt(Inst, Operands)) {
5166 return false;
5167 }
5168 if (!validateExeczVcczOperands(Operands)) {
5169 return false;
5170 }
5171 if (!validateTFE(Inst, Operands)) {
5172 return false;
5173 }
5174
5175 return true;
5176}
5177
5179 const FeatureBitset &FBS,
5180 unsigned VariantID = 0);
5181
5182static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5183 const FeatureBitset &AvailableFeatures,
5184 unsigned VariantID);
5185
5186bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5187 const FeatureBitset &FBS) {
5188 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5189}
5190
5191bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5192 const FeatureBitset &FBS,
5193 ArrayRef<unsigned> Variants) {
5194 for (auto Variant : Variants) {
5195 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5196 return true;
5197 }
5198
5199 return false;
5200}
5201
5202bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5203 const SMLoc &IDLoc) {
5204 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5205
5206 // Check if requested instruction variant is supported.
5207 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5208 return false;
5209
5210 // This instruction is not supported.
5211 // Clear any other pending errors because they are no longer relevant.
5212 getParser().clearPendingErrors();
5213
5214 // Requested instruction variant is not supported.
5215 // Check if any other variants are supported.
5216 StringRef VariantName = getMatchedVariantName();
5217 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5218 return Error(IDLoc,
5219 Twine(VariantName,
5220 " variant of this instruction is not supported"));
5221 }
5222
5223 // Check if this instruction may be used with a different wavesize.
5224 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5225 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5226
5227 FeatureBitset FeaturesWS32 = getFeatureBits();
5228 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5229 .flip(AMDGPU::FeatureWavefrontSize32);
5230 FeatureBitset AvailableFeaturesWS32 =
5231 ComputeAvailableFeatures(FeaturesWS32);
5232
5233 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5234 return Error(IDLoc, "instruction requires wavesize=32");
5235 }
5236
5237 // Finally check if this instruction is supported on any other GPU.
5238 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5239 return Error(IDLoc, "instruction not supported on this GPU");
5240 }
5241
5242 // Instruction not supported on any GPU. Probably a typo.
5243 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5244 return Error(IDLoc, "invalid instruction" + Suggestion);
5245}
5246
5248 uint64_t InvalidOprIdx) {
5249 assert(InvalidOprIdx < Operands.size());
5250 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5251 if (Op.isToken() && InvalidOprIdx > 1) {
5252 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5253 return PrevOp.isToken() && PrevOp.getToken() == "::";
5254 }
5255 return false;
5256}
5257
5258bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5260 MCStreamer &Out,
5262 bool MatchingInlineAsm) {
5263 MCInst Inst;
5264 unsigned Result = Match_Success;
5265 for (auto Variant : getMatchedVariants()) {
5266 uint64_t EI;
5267 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5268 Variant);
5269 // We order match statuses from least to most specific. We use most specific
5270 // status as resulting
5271 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5272 if ((R == Match_Success) ||
5273 (R == Match_PreferE32) ||
5274 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5275 (R == Match_InvalidOperand && Result != Match_MissingFeature
5276 && Result != Match_PreferE32) ||
5277 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5278 && Result != Match_MissingFeature
5279 && Result != Match_PreferE32)) {
5280 Result = R;
5281 ErrorInfo = EI;
5282 }
5283 if (R == Match_Success)
5284 break;
5285 }
5286
5287 if (Result == Match_Success) {
5288 if (!validateInstruction(Inst, IDLoc, Operands)) {
5289 return true;
5290 }
5291 Inst.setLoc(IDLoc);
5292 Out.emitInstruction(Inst, getSTI());
5293 return false;
5294 }
5295
5296 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5297 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5298 return true;
5299 }
5300
5301 switch (Result) {
5302 default: break;
5303 case Match_MissingFeature:
5304 // It has been verified that the specified instruction
5305 // mnemonic is valid. A match was found but it requires
5306 // features which are not supported on this GPU.
5307 return Error(IDLoc, "operands are not valid for this GPU or mode");
5308
5309 case Match_InvalidOperand: {
5310 SMLoc ErrorLoc = IDLoc;
5311 if (ErrorInfo != ~0ULL) {
5312 if (ErrorInfo >= Operands.size()) {
5313 return Error(IDLoc, "too few operands for instruction");
5314 }
5315 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5316 if (ErrorLoc == SMLoc())
5317 ErrorLoc = IDLoc;
5318
5320 return Error(ErrorLoc, "invalid VOPDY instruction");
5321 }
5322 return Error(ErrorLoc, "invalid operand for instruction");
5323 }
5324
5325 case Match_PreferE32:
5326 return Error(IDLoc, "internal error: instruction without _e64 suffix "
5327 "should be encoded as e32");
5328 case Match_MnemonicFail:
5329 llvm_unreachable("Invalid instructions should have been handled already");
5330 }
5331 llvm_unreachable("Implement any new match types added!");
5332}
5333
5334bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5335 int64_t Tmp = -1;
5336 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5337 return true;
5338 }
5339 if (getParser().parseAbsoluteExpression(Tmp)) {
5340 return true;
5341 }
5342 Ret = static_cast<uint32_t>(Tmp);
5343 return false;
5344}
5345
5346bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5347 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5348 return TokError("directive only supported for amdgcn architecture");
5349
5350 std::string TargetIDDirective;
5351 SMLoc TargetStart = getTok().getLoc();
5352 if (getParser().parseEscapedString(TargetIDDirective))
5353 return true;
5354
5355 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5356 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5357 return getParser().Error(TargetRange.Start,
5358 (Twine(".amdgcn_target directive's target id ") +
5359 Twine(TargetIDDirective) +
5360 Twine(" does not match the specified target id ") +
5361 Twine(getTargetStreamer().getTargetID()->toString())).str());
5362
5363 return false;
5364}
5365
5366bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5367 return Error(Range.Start, "value out of range", Range);
5368}
5369
5370bool AMDGPUAsmParser::calculateGPRBlocks(
5371 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5372 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5373 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5374 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5375 // TODO(scott.linder): These calculations are duplicated from
5376 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5377 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5378
5379 unsigned NumVGPRs = NextFreeVGPR;
5380 unsigned NumSGPRs = NextFreeSGPR;
5381
5382 if (Version.Major >= 10)
5383 NumSGPRs = 0;
5384 else {
5385 unsigned MaxAddressableNumSGPRs =
5387
5388 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5389 NumSGPRs > MaxAddressableNumSGPRs)
5390 return OutOfRangeError(SGPRRange);
5391
5392 NumSGPRs +=
5393 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5394
5395 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5396 NumSGPRs > MaxAddressableNumSGPRs)
5397 return OutOfRangeError(SGPRRange);
5398
5399 if (Features.test(FeatureSGPRInitBug))
5401 }
5402
5403 VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
5404 EnableWavefrontSize32);
5405 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5406
5407 return false;
5408}
5409
5410bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5411 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5412 return TokError("directive only supported for amdgcn architecture");
5413
5414 if (!isHsaAbi(getSTI()))
5415 return TokError("directive only supported for amdhsa OS");
5416
5417 StringRef KernelName;
5418 if (getParser().parseIdentifier(KernelName))
5419 return true;
5420
5423 &getSTI(), getContext());
5424
5425 StringSet<> Seen;
5426
5427 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5428
5429 SMRange VGPRRange;
5430 uint64_t NextFreeVGPR = 0;
5431 uint64_t AccumOffset = 0;
5432 uint64_t SharedVGPRCount = 0;
5433 uint64_t PreloadLength = 0;
5434 uint64_t PreloadOffset = 0;
5435 SMRange SGPRRange;
5436 uint64_t NextFreeSGPR = 0;
5437
5438 // Count the number of user SGPRs implied from the enabled feature bits.
5439 unsigned ImpliedUserSGPRCount = 0;
5440
5441 // Track if the asm explicitly contains the directive for the user SGPR
5442 // count.
5443 std::optional<unsigned> ExplicitUserSGPRCount;
5444 bool ReserveVCC = true;
5445 bool ReserveFlatScr = true;
5446 std::optional<bool> EnableWavefrontSize32;
5447
5448 while (true) {
5449 while (trySkipToken(AsmToken::EndOfStatement));
5450
5451 StringRef ID;
5452 SMRange IDRange = getTok().getLocRange();
5453 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5454 return true;
5455
5456 if (ID == ".end_amdhsa_kernel")
5457 break;
5458
5459 if (!Seen.insert(ID).second)
5460 return TokError(".amdhsa_ directives cannot be repeated");
5461
5462 SMLoc ValStart = getLoc();
5463 const MCExpr *ExprVal;
5464 if (getParser().parseExpression(ExprVal))
5465 return true;
5466 SMLoc ValEnd = getLoc();
5467 SMRange ValRange = SMRange(ValStart, ValEnd);
5468
5469 int64_t IVal = 0;
5470 uint64_t Val = IVal;
5471 bool EvaluatableExpr;
5472 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5473 if (IVal < 0)
5474 return OutOfRangeError(ValRange);
5475 Val = IVal;
5476 }
5477
5478#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5479 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5480 return OutOfRangeError(RANGE); \
5481 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5482 getContext());
5483
5484// Some fields use the parsed value immediately which requires the expression to
5485// be solvable.
5486#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5487 if (!(RESOLVED)) \
5488 return Error(IDRange.Start, "directive should have resolvable expression", \
5489 IDRange);
5490
5491 if (ID == ".amdhsa_group_segment_fixed_size") {
5493 CHAR_BIT>(Val))
5494 return OutOfRangeError(ValRange);
5495 KD.group_segment_fixed_size = ExprVal;
5496 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5498 CHAR_BIT>(Val))
5499 return OutOfRangeError(ValRange);
5500 KD.private_segment_fixed_size = ExprVal;
5501 } else if (ID == ".amdhsa_kernarg_size") {
5502 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5503 return OutOfRangeError(ValRange);
5504 KD.kernarg_size = ExprVal;
5505 } else if (ID == ".amdhsa_user_sgpr_count") {
5506 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5507 ExplicitUserSGPRCount = Val;
5508 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5509 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5511 return Error(IDRange.Start,
5512 "directive is not supported with architected flat scratch",
5513 IDRange);
5515 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5516 ExprVal, ValRange);
5517 if (Val)
5518 ImpliedUserSGPRCount += 4;
5519 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5520 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5521 if (!hasKernargPreload())
5522 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5523
5524 if (Val > getMaxNumUserSGPRs())
5525 return OutOfRangeError(ValRange);
5526 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5527 ValRange);
5528 if (Val) {
5529 ImpliedUserSGPRCount += Val;
5530 PreloadLength = Val;
5531 }
5532 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5533 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5534 if (!hasKernargPreload())
5535 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5536
5537 if (Val >= 1024)
5538 return OutOfRangeError(ValRange);
5539 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5540 ValRange);
5541 if (Val)
5542 PreloadOffset = Val;
5543 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5544 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5546 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5547 ValRange);
5548 if (Val)
5549 ImpliedUserSGPRCount += 2;
5550 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5551 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5553 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5554 ValRange);
5555 if (Val)
5556 ImpliedUserSGPRCount += 2;
5557 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5558 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5560 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5561 ExprVal, ValRange);
5562 if (Val)
5563 ImpliedUserSGPRCount += 2;
5564 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5565 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5567 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5568 ValRange);
5569 if (Val)
5570 ImpliedUserSGPRCount += 2;
5571 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5573 return Error(IDRange.Start,
5574 "directive is not supported with architected flat scratch",
5575 IDRange);
5576 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5578 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5579 ExprVal, ValRange);
5580 if (Val)
5581 ImpliedUserSGPRCount += 2;
5582 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5583 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5585 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5586 ExprVal, ValRange);
5587 if (Val)
5588 ImpliedUserSGPRCount += 1;
5589 } else if (ID == ".amdhsa_wavefront_size32") {
5590 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5591 if (IVersion.Major < 10)
5592 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5593 EnableWavefrontSize32 = Val;
5595 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5596 ValRange);
5597 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5599 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5600 ValRange);
5601 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5603 return Error(IDRange.Start,
5604 "directive is not supported with architected flat scratch",
5605 IDRange);
5607 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5608 ValRange);
5609 } else if (ID == ".amdhsa_enable_private_segment") {
5611 return Error(
5612 IDRange.Start,
5613 "directive is not supported without architected flat scratch",
5614 IDRange);
5616 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5617 ValRange);
5618 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5620 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5621 ValRange);
5622 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5624 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5625 ValRange);
5626 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5628 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5629 ValRange);
5630 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5632 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5633 ValRange);
5634 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5636 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5637 ValRange);
5638 } else if (ID == ".amdhsa_next_free_vgpr") {
5639 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5640 VGPRRange = ValRange;
5641 NextFreeVGPR = Val;
5642 } else if (ID == ".amdhsa_next_free_sgpr") {
5643 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5644 SGPRRange = ValRange;
5645 NextFreeSGPR = Val;
5646 } else if (ID == ".amdhsa_accum_offset") {
5647 if (!isGFX90A())
5648 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5649 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5650 AccumOffset = Val;
5651 } else if (ID == ".amdhsa_reserve_vcc") {
5652 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5653 if (!isUInt<1>(Val))
5654 return OutOfRangeError(ValRange);
5655 ReserveVCC = Val;
5656 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5657 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5658 if (IVersion.Major < 7)
5659 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5661 return Error(IDRange.Start,
5662 "directive is not supported with architected flat scratch",
5663 IDRange);
5664 if (!isUInt<1>(Val))
5665 return OutOfRangeError(ValRange);
5666 ReserveFlatScr = Val;
5667 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5668 if (IVersion.Major < 8)
5669 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5670 if (!isUInt<1>(Val))
5671 return OutOfRangeError(ValRange);
5672 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5673 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5674 IDRange);
5675 } else if (ID == ".amdhsa_float_round_mode_32") {
5677 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5678 ValRange);
5679 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5681 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5682 ValRange);
5683 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5685 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5686 ValRange);
5687 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5689 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5690 ValRange);
5691 } else if (ID == ".amdhsa_dx10_clamp") {
5692 if (IVersion.Major >= 12)
5693 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5695 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5696 ValRange);
5697 } else if (ID == ".amdhsa_ieee_mode") {
5698 if (IVersion.Major >= 12)
5699 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5701 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5702 ValRange);
5703 } else if (ID == ".amdhsa_fp16_overflow") {
5704 if (IVersion.Major < 9)
5705 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5707 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5708 ValRange);
5709 } else if (ID == ".amdhsa_tg_split") {
5710 if (!isGFX90A())
5711 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5712 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5713 ExprVal, ValRange);
5714 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5715 if (IVersion.Major < 10)
5716 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5718 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5719 ValRange);
5720 } else if (ID == ".amdhsa_memory_ordered") {
5721 if (IVersion.Major < 10)
5722 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5724 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5725 ValRange);
5726 } else if (ID == ".amdhsa_forward_progress") {
5727 if (IVersion.Major < 10)
5728 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5730 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5731 ValRange);
5732 } else if (ID == ".amdhsa_shared_vgpr_count") {
5733 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5734 if (IVersion.Major < 10 || IVersion.Major >= 12)
5735 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5736 IDRange);
5737 SharedVGPRCount = Val;
5739 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5740 ValRange);
5741 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5744 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5745 ExprVal, ValRange);
5746 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5748 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5749 ExprVal, ValRange);
5750 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5753 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5754 ExprVal, ValRange);
5755 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5757 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5758 ExprVal, ValRange);
5759 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5761 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5762 ExprVal, ValRange);
5763 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5765 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5766 ExprVal, ValRange);
5767 } else if (ID == ".amdhsa_exception_int_div_zero") {
5769 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5770 ExprVal, ValRange);
5771 } else if (ID == ".amdhsa_round_robin_scheduling") {
5772 if (IVersion.Major < 12)
5773 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5775 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5776 ValRange);
5777 } else {
5778 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5779 }
5780
5781#undef PARSE_BITS_ENTRY
5782 }
5783
5784 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5785 return TokError(".amdhsa_next_free_vgpr directive is required");
5786
5787 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5788 return TokError(".amdhsa_next_free_sgpr directive is required");
5789
5790 unsigned VGPRBlocks;
5791 unsigned SGPRBlocks;
5792 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5793 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5794 EnableWavefrontSize32, NextFreeVGPR,
5795 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5796 SGPRBlocks))
5797 return true;
5798
5799 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5800 VGPRBlocks))
5801 return OutOfRangeError(VGPRRange);
5803 KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
5804 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5805 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5806
5807 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5808 SGPRBlocks))
5809 return OutOfRangeError(SGPRRange);
5811 KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
5812 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5813 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5814
5815 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5816 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5817 "enabled user SGPRs");
5818
5819 unsigned UserSGPRCount =
5820 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5821
5822 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5823 return TokError("too many user SGPRs enabled");
5825 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5826 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5827 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5828
5829 int64_t IVal = 0;
5830 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5831 return TokError("Kernarg size should be resolvable");
5832 uint64_t kernarg_size = IVal;
5833 if (PreloadLength && kernarg_size &&
5834 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5835 return TokError("Kernarg preload length + offset is larger than the "
5836 "kernarg segment size");
5837
5838 if (isGFX90A()) {
5839 if (!Seen.contains(".amdhsa_accum_offset"))
5840 return TokError(".amdhsa_accum_offset directive is required");
5841 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5842 return TokError("accum_offset should be in range [4..256] in "
5843 "increments of 4");
5844 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5845 return TokError("accum_offset exceeds total VGPR allocation");
5848 MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
5849 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5850 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5851 }
5852
5853 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5854 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5855 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5856 return TokError("shared_vgpr_count directive not valid on "
5857 "wavefront size 32");
5858 }
5859 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5860 return TokError("shared_vgpr_count*2 + "
5861 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5862 "exceed 63\n");
5863 }
5864 }
5865
5866 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5867 NextFreeVGPR, NextFreeSGPR,
5868 ReserveVCC, ReserveFlatScr);
5869 return false;
5870}
5871
5872bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5874 if (ParseAsAbsoluteExpression(Version))
5875 return true;
5876
5877 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5878 return false;
5879}
5880
5881bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5882 amd_kernel_code_t &Header) {
5883 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5884 // assembly for backwards compatibility.
5885 if (ID == "max_scratch_backing_memory_byte_size") {
5886 Parser.eatToEndOfStatement();
5887 return false;
5888 }
5889
5890 SmallString<40> ErrStr;
5891 raw_svector_ostream Err(ErrStr);
5892 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5893 return TokError(Err.str());
5894 }
5895 Lex();
5896
5897 if (ID == "enable_dx10_clamp") {
5898 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5899 isGFX12Plus())
5900 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5901 }
5902
5903 if (ID == "enable_ieee_mode") {
5904 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5905 isGFX12Plus())
5906 return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5907 }
5908
5909 if (ID == "enable_wavefront_size32") {
5910 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5911 if (!isGFX10Plus())
5912 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5913 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5914 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5915 } else {
5916 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5917 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5918 }
5919 }
5920
5921 if (ID == "wavefront_size") {
5922 if (Header.wavefront_size == 5) {
5923 if (!isGFX10Plus())
5924 return TokError("wavefront_size=5 is only allowed on GFX10+");
5925 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5926 return TokError("wavefront_size=5 requires +WavefrontSize32");
5927 } else if (Header.wavefront_size == 6) {
5928 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5929 return TokError("wavefront_size=6 requires +WavefrontSize64");
5930 }
5931 }
5932
5933 if (ID == "enable_wgp_mode") {
5934 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5935 !isGFX10Plus())
5936 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5937 }
5938
5939 if (ID == "enable_mem_ordered") {
5940 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5941 !isGFX10Plus())
5942 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5943 }
5944
5945 if (ID == "enable_fwd_progress") {
5946 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5947 !isGFX10Plus())
5948 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5949 }
5950
5951 return false;
5952}
5953
5954bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5955 amd_kernel_code_t Header;
5956 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5957
5958 while (true) {
5959 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5960 // will set the current token to EndOfStatement.
5961 while(trySkipToken(AsmToken::EndOfStatement));
5962
5963 StringRef ID;
5964 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5965 return true;
5966
5967 if (ID == ".end_amd_kernel_code_t")
5968 break;
5969
5970 if (ParseAMDKernelCodeTValue(ID, Header))
5971 return true;
5972 }
5973
5974 getTargetStreamer().EmitAMDKernelCodeT(Header);
5975
5976 return false;
5977}
5978
5979bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5980 StringRef KernelName;
5981 if (!parseId(KernelName, "expected symbol name"))
5982 return true;
5983
5984 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5986
5987 KernelScope.initialize(getContext());
5988 return false;
5989}
5990
5991bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5992 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5993 return Error(getLoc(),
5994 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5995 "architectures");
5996 }
5997
5998 auto TargetIDDirective = getLexer().getTok().getStringContents();
5999 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6000 return Error(getParser().getTok().getLoc(), "target id must match options");
6001
6002 getTargetStreamer().EmitISAVersion();
6003 Lex();
6004
6005 return false;
6006}
6007
6008bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6009 assert(isHsaAbi(getSTI()));
6010
6011 std::string HSAMetadataString;
6012 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6013 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6014 return true;
6015
6016 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6017 return Error(getLoc(), "invalid HSA metadata");
6018
6019 return false;
6020}
6021
6022/// Common code to parse out a block of text (typically YAML) between start and
6023/// end directives.
6024bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6025 const char *AssemblerDirectiveEnd,
6026 std::string &CollectString) {
6027
6028 raw_string_ostream CollectStream(CollectString);
6029
6030 getLexer().setSkipSpace(false);
6031
6032 bool FoundEnd = false;
6033 while (!isToken(AsmToken::Eof)) {
6034 while (isToken(AsmToken::Space)) {
6035 CollectStream << getTokenStr();
6036 Lex();
6037 }
6038
6039 if (trySkipId(AssemblerDirectiveEnd)) {
6040 FoundEnd = true;
6041 break;
6042 }
6043
6044 CollectStream << Parser.parseStringToEndOfStatement()
6045 << getContext().getAsmInfo()->getSeparatorString();
6046
6047 Parser.eatToEndOfStatement();
6048 }
6049
6050 getLexer().setSkipSpace(true);
6051
6052 if (isToken(AsmToken::Eof) && !FoundEnd) {
6053 return TokError(Twine("expected directive ") +
6054 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6055 }
6056
6057 CollectStream.flush();
6058 return false;
6059}
6060
6061/// Parse the assembler directive for new MsgPack-format PAL metadata.
6062bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6063 std::string String;
6064 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6066 return true;
6067
6068 auto PALMetadata = getTargetStreamer().getPALMetadata();
6069 if (!PALMetadata->setFromString(String))
6070 return Error(getLoc(), "invalid PAL metadata");
6071 return false;
6072}
6073
6074/// Parse the assembler directive for old linear-format PAL metadata.
6075bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6076 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6077 return Error(getLoc(),
6078 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6079 "not available on non-amdpal OSes")).str());
6080 }
6081
6082 auto PALMetadata = getTargetStreamer().getPALMetadata();
6083 PALMetadata->setLegacy();
6084 for (;;) {
6086 if (ParseAsAbsoluteExpression(Key)) {
6087 return TokError(Twine("invalid value in ") +
6089 }
6090 if (!trySkipToken(AsmToken::Comma)) {
6091 return TokError(Twine("expected an even number of values in ") +
6093 }
6094 if (ParseAsAbsoluteExpression(Value)) {
6095 return TokError(Twine("invalid value in ") +
6097 }
6098 PALMetadata->setRegister(Key, Value);
6099 if (!trySkipToken(AsmToken::Comma))
6100 break;
6101 }
6102 return false;
6103}
6104
6105/// ParseDirectiveAMDGPULDS
6106/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6107bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6108 if (getParser().checkForValidSection())
6109 return true;
6110
6112 SMLoc NameLoc = getLoc();
6113 if (getParser().parseIdentifier(Name))
6114 return TokError("expected identifier in directive");
6115
6116 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6117 if (getParser().parseComma())
6118 return true;
6119
6120 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6121
6122 int64_t Size;
6123 SMLoc SizeLoc = getLoc();
6124 if (getParser().parseAbsoluteExpression(Size))
6125 return true;
6126 if (Size < 0)
6127 return Error(SizeLoc, "size must be non-negative");
6128 if (Size > LocalMemorySize)
6129 return Error(SizeLoc, "size is too large");
6130
6131 int64_t Alignment = 4;
6132 if (trySkipToken(AsmToken::Comma)) {
6133 SMLoc AlignLoc = getLoc();
6134 if (getParser().parseAbsoluteExpression(Alignment))
6135 return true;
6136 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6137 return Error(AlignLoc, "alignment must be a power of two");
6138
6139 // Alignment larger than the size of LDS is possible in theory, as long
6140 // as the linker manages to place to symbol at address 0, but we do want
6141 // to make sure the alignment fits nicely into a 32-bit integer.
6142 if (Alignment >= 1u << 31)
6143 return Error(AlignLoc, "alignment is too large");
6144 }
6145
6146 if (parseEOL())
6147 return true;
6148
6149 Symbol->redefineIfPossible();
6150 if (!Symbol->isUndefined())
6151 return Error(NameLoc, "invalid symbol redefinition");
6152
6153 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6154 return false;
6155}
6156
6157bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6158 StringRef IDVal = DirectiveID.getString();
6159
6160 if (isHsaAbi(getSTI())) {
6161 if (IDVal == ".amdhsa_kernel")
6162 return ParseDirectiveAMDHSAKernel();
6163
6164 if (IDVal == ".amdhsa_code_object_version")
6165 return ParseDirectiveAMDHSACodeObjectVersion();
6166
6167 // TODO: Restructure/combine with PAL metadata directive.
6169 return ParseDirectiveHSAMetadata();
6170 } else {
6171 if (IDVal == ".amd_kernel_code_t")
6172 return ParseDirectiveAMDKernelCodeT();
6173
6174 if (IDVal == ".amdgpu_hsa_kernel")
6175 return ParseDirectiveAMDGPUHsaKernel();
6176
6177 if (IDVal == ".amd_amdgpu_isa")
6178 return ParseDirectiveISAVersion();
6179
6181 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6182 Twine(" directive is "
6183 "not available on non-amdhsa OSes"))
6184 .str());
6185 }
6186 }
6187
6188 if (IDVal == ".amdgcn_target")
6189 return ParseDirectiveAMDGCNTarget();
6190
6191 if (IDVal == ".amdgpu_lds")
6192 return ParseDirectiveAMDGPULDS();
6193
6194 if (IDVal == PALMD::AssemblerDirectiveBegin)
6195 return ParseDirectivePALMetadataBegin();
6196
6197 if (IDVal == PALMD::AssemblerDirective)
6198 return ParseDirectivePALMetadata();
6199
6200 return true;
6201}
6202
6203bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6204 unsigned RegNo) {
6205
6206 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6207 return isGFX9Plus();
6208
6209 // GFX10+ has 2 more SGPRs 104 and 105.
6210 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6211 return hasSGPR104_SGPR105();
6212
6213 switch (RegNo) {
6214 case AMDGPU::SRC_SHARED_BASE_LO:
6215 case AMDGPU::SRC_SHARED_BASE:
6216 case AMDGPU::SRC_SHARED_LIMIT_LO:
6217 case AMDGPU::SRC_SHARED_LIMIT:
6218 case AMDGPU::SRC_PRIVATE_BASE_LO:
6219 case AMDGPU::SRC_PRIVATE_BASE:
6220 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6221 case AMDGPU::SRC_PRIVATE_LIMIT:
6222 return isGFX9Plus();
6223 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6224 return isGFX9Plus() && !isGFX11Plus();
6225 case AMDGPU::TBA:
6226 case AMDGPU::TBA_LO:
6227 case AMDGPU::TBA_HI:
6228 case AMDGPU::TMA:
6229 case AMDGPU::TMA_LO:
6230 case AMDGPU::TMA_HI:
6231 return !isGFX9Plus();
6232 case AMDGPU::XNACK_MASK:
6233 case AMDGPU::XNACK_MASK_LO:
6234 case AMDGPU::XNACK_MASK_HI:
6235 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6236 case AMDGPU::SGPR_NULL:
6237 return isGFX10Plus();
6238 default:
6239 break;
6240 }
6241
6242 if (isCI())
6243 return true;
6244
6245 if (isSI() || isGFX10Plus()) {
6246 // No flat_scr on SI.
6247 // On GFX10Plus flat scratch is not a valid register operand and can only be
6248 // accessed with s_setreg/s_getreg.
6249 switch (RegNo) {
6250 case AMDGPU::FLAT_SCR:
6251 case AMDGPU::FLAT_SCR_LO:
6252 case AMDGPU::FLAT_SCR_HI:
6253 return false;
6254 default:
6255 return true;
6256 }
6257 }
6258
6259 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6260 // SI/CI have.
6261 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6262 return hasSGPR102_SGPR103();
6263
6264 return true;
6265}
6266
6267ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6268 StringRef Mnemonic,
6269 OperandMode Mode) {
6270 ParseStatus Res = parseVOPD(Operands);
6271 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6272 return Res;
6273
6274 // Try to parse with a custom parser
6275 Res = MatchOperandParserImpl(Operands, Mnemonic);
6276
6277 // If we successfully parsed the operand or if there as an error parsing,
6278 // we are done.
6279 //
6280 // If we are parsing after we reach EndOfStatement then this means we
6281 // are appending default values to the Operands list. This is only done
6282 // by custom parser, so we shouldn't continue on to the generic parsing.
6283 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6284 return Res;
6285
6286 SMLoc RBraceLoc;
6287 SMLoc LBraceLoc = getLoc();
6288 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6289 unsigned Prefix = Operands.size();
6290
6291 for (;;) {
6292 auto Loc = getLoc();
6293 Res = parseReg(Operands);
6294 if (Res.isNoMatch())
6295 Error(Loc, "expected a register");
6296 if (!Res.isSuccess())
6297 return ParseStatus::Failure;
6298
6299 RBraceLoc = getLoc();
6300 if (trySkipToken(AsmToken::RBrac))
6301 break;
6302
6303 if (!skipToken(AsmToken::Comma,
6304 "expected a comma or a closing square bracket"))
6305 return ParseStatus::Failure;
6306 }
6307
6308 if (Operands.size() - Prefix > 1) {
6309 Operands.insert(Operands.begin() + Prefix,
6310 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6311 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6312 }
6313
6314 return ParseStatus::Success;
6315 }
6316
6317 return parseRegOrImm(Operands);
6318}
6319
6320StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6321 // Clear any forced encodings from the previous instruction.
6322 setForcedEncodingSize(0);
6323 setForcedDPP(false);
6324 setForcedSDWA(false);
6325
6326 if (Name.ends_with("_e64_dpp")) {
6327 setForcedDPP(true);
6328 setForcedEncodingSize(64);
6329 return Name.substr(0, Name.size() - 8);
6330 } else if (Name.ends_with("_e64")) {
6331 setForcedEncodingSize(64);
6332 return Name.substr(0, Name.size() - 4);
6333 } else if (Name.ends_with("_e32")) {
6334 setForcedEncodingSize(32);
6335 return Name.substr(0, Name.size() - 4);
6336 } else if (Name.ends_with("_dpp")) {
6337 setForcedDPP(true);
6338 return Name.substr(0, Name.size() - 4);
6339 } else if (Name.ends_with("_sdwa")) {
6340 setForcedSDWA(true);
6341 return Name.substr(0, Name.size() - 5);
6342 }
6343 return Name;
6344}
6345
6346static void applyMnemonicAliases(StringRef &Mnemonic,
6347 const FeatureBitset &Features,
6348 unsigned VariantID);
6349
6350bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6352 SMLoc NameLoc, OperandVector &Operands) {
6353 // Add the instruction mnemonic
6354 Name = parseMnemonicSuffix(Name);
6355
6356 // If the target architecture uses MnemonicAlias, call it here to parse
6357 // operands correctly.
6358 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6359
6360 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6361
6362 bool IsMIMG = Name.starts_with("image_");
6363
6364 while (!trySkipToken(AsmToken::EndOfStatement)) {
6365 OperandMode Mode = OperandMode_Default;
6366 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6367 Mode = OperandMode_NSA;
6368 ParseStatus Res = parseOperand(Operands, Name, Mode);
6369
6370 if (!Res.isSuccess()) {
6371 checkUnsupportedInstruction(Name, NameLoc);
6372 if (!Parser.hasPendingError()) {
6373 // FIXME: use real operand location rather than the current location.
6374 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6375 : "not a valid operand.";
6376 Error(getLoc(), Msg);
6377 }
6378 while (!trySkipToken(AsmToken::EndOfStatement)) {
6379 lex();
6380 }
6381 return true;
6382 }
6383
6384 // Eat the comma or space if there is one.
6385 trySkipToken(AsmToken::Comma);
6386 }
6387
6388 return false;
6389}
6390
6391//===----------------------------------------------------------------------===//
6392// Utility functions
6393//===----------------------------------------------------------------------===//
6394
6395ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6397 SMLoc S = getLoc();
6398 if (!trySkipId(Name))
6399 return ParseStatus::NoMatch;
6400
6401 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6402 return ParseStatus::Success;
6403}
6404
6405ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6406 int64_t &IntVal) {
6407
6408 if (!trySkipId(Prefix, AsmToken::Colon))
6409 return ParseStatus::NoMatch;
6410
6411 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6412}
6413
6414ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6415 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6416 std::function<bool(int64_t &)> ConvertResult) {
6417 SMLoc S = getLoc();
6418 int64_t Value = 0;
6419
6420 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6421 if (!Res.isSuccess())
6422 return Res;
6423
6424 if (ConvertResult && !ConvertResult(Value)) {
6425 Error(S, "invalid " + StringRef(Prefix) + " value.");
6426 }
6427
6428 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6429 return ParseStatus::Success;
6430}
6431
6432ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6433 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6434 bool (*ConvertResult)(int64_t &)) {
6435 SMLoc S = getLoc();
6436 if (!trySkipId(Prefix, AsmToken::Colon))
6437 return ParseStatus::NoMatch;
6438
6439 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6440 return ParseStatus::Failure;
6441
6442 unsigned Val = 0;
6443 const unsigned MaxSize = 4;
6444
6445 // FIXME: How to verify the number of elements matches the number of src
6446 // operands?
6447 for (int I = 0; ; ++I) {
6448 int64_t Op;
6449 SMLoc Loc = getLoc();
6450 if (!parseExpr(Op))
6451 return ParseStatus::Failure;
6452
6453 if (Op != 0 && Op != 1)
6454 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6455
6456 Val |= (Op << I);
6457
6458 if (trySkipToken(AsmToken::RBrac))
6459 break;
6460
6461 if (I + 1 == MaxSize)
6462 return Error(getLoc(), "expected a closing square bracket");
6463
6464 if (!skipToken(AsmToken::Comma, "expected a comma"))
6465 return ParseStatus::Failure;
6466 }
6467
6468 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6469 return ParseStatus::Success;
6470}
6471
6472ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6474 AMDGPUOperand::ImmTy ImmTy) {
6475 int64_t Bit;
6476 SMLoc S = getLoc();
6477
6478 if (trySkipId(Name)) {
6479 Bit = 1;
6480 } else if (trySkipId("no", Name)) {
6481 Bit = 0;
6482 } else {
6483 return ParseStatus::NoMatch;
6484 }
6485
6486 if (Name == "r128" && !hasMIMG_R128())
6487 return Error(S, "r128 modifier is not supported on this GPU");
6488 if (Name == "a16" && !hasA16())
6489 return Error(S, "a16 modifier is not supported on this GPU");
6490
6491 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6492 ImmTy = AMDGPUOperand::ImmTyR128A16;
6493
6494 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6495 return ParseStatus::Success;
6496}
6497
6498unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6499 bool &Disabling) const {
6500 Disabling = Id.consume_front("no");
6501
6502 if (isGFX940() && !Mnemo.starts_with("s_")) {
6503 return StringSwitch<unsigned>(Id)
6504 .Case("nt", AMDGPU::CPol::NT)
6505 .Case("sc0", AMDGPU::CPol::SC0)
6506 .Case("sc1", AMDGPU::CPol::SC1)
6507 .Default(0);
6508 }
6509
6510 return StringSwitch<unsigned>(Id)
6511 .Case("dlc", AMDGPU::CPol::DLC)
6512 .Case("glc", AMDGPU::CPol::GLC)
6513 .Case("scc", AMDGPU::CPol::SCC)
6514 .Case("slc", AMDGPU::CPol::SLC)
6515 .Default(0);
6516}
6517
6518ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6519 if (isGFX12Plus()) {
6520 SMLoc StringLoc = getLoc();
6521
6522 int64_t CPolVal = 0;
6525
6526 for (;;) {
6527 if (ResTH.isNoMatch()) {
6528 int64_t TH;
6529 ResTH = parseTH(Operands, TH);
6530 if (ResTH.isFailure())
6531 return ResTH;
6532 if (ResTH.isSuccess()) {
6533 CPolVal |= TH;
6534 continue;
6535 }
6536 }
6537
6538 if (ResScope.isNoMatch()) {
6539 int64_t Scope;
6540 ResScope = parseScope(Operands, Scope);
6541 if (ResScope.isFailure())
6542 return ResScope;
6543 if (ResScope.isSuccess()) {
6544 CPolVal |= Scope;
6545 continue;
6546 }
6547 }
6548
6549 break;
6550 }
6551
6552 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6553 return ParseStatus::NoMatch;
6554
6555 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6556 AMDGPUOperand::ImmTyCPol));
6557 return ParseStatus::Success;
6558 }
6559
6560 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6561 SMLoc OpLoc = getLoc();
6562 unsigned Enabled = 0, Seen = 0;
6563 for (;;) {
6564 SMLoc S = getLoc();
6565 bool Disabling;
6566 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6567 if (!CPol)
6568 break;
6569
6570 lex();
6571
6572 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6573 return Error(S, "dlc modifier is not supported on this GPU");
6574
6575 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6576 return Error(S, "scc modifier is not supported on this GPU");
6577
6578 if (Seen & CPol)
6579 return Error(S, "duplicate cache policy modifier");
6580
6581 if (!Disabling)
6582 Enabled |= CPol;
6583
6584 Seen |= CPol;
6585 }
6586
6587 if (!Seen)
6588 return ParseStatus::NoMatch;
6589
6590 Operands.push_back(
6591 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6592 return ParseStatus::Success;
6593}
6594
6595ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6596 int64_t &Scope) {
6597 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6598
6600 SMLoc StringLoc;
6601 ParseStatus Res;
6602
6603 Res = parseStringWithPrefix("scope", Value, StringLoc);
6604 if (!Res.isSuccess())
6605 return Res;
6606
6608 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6609 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6610 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6611 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6612 .Default(0xffffffff);
6613
6614 if (Scope == 0xffffffff)
6615 return Error(StringLoc, "invalid scope value");
6616
6617 return ParseStatus::Success;
6618}
6619
6620ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6621 TH = AMDGPU::CPol::TH_RT; // default
6622
6624 SMLoc StringLoc;
6625 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6626 if (!Res.isSuccess())
6627 return Res;
6628
6629 if (Value == "TH_DEFAULT")
6631 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6632 Value == "TH_LOAD_NT_WB") {
6633 return Error(StringLoc, "invalid th value");
6634 } else if (Value.consume_front("TH_ATOMIC_")) {
6636 } else if (Value.consume_front("TH_LOAD_")) {
6638 } else if (Value.consume_front("TH_STORE_")) {
6640 } else {
6641 return Error(StringLoc, "invalid th value");
6642 }
6643
6644 if (Value == "BYPASS")
6646
6647 if (TH != 0) {
6654 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6657 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6659 .Default(0xffffffff);
6660 else
6666 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6667 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6668 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6669 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6670 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6671 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6672 .Default(0xffffffff);
6673 }
6674
6675 if (TH == 0xffffffff)
6676 return Error(StringLoc, "invalid th value");
6677
6678 return ParseStatus::Success;
6679}
6680
6682 MCInst& Inst, const OperandVector& Operands,
6683 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6684 AMDGPUOperand::ImmTy ImmT,
6685 int64_t Default = 0) {
6686 auto i = OptionalIdx.find(ImmT);
6687 if (i != OptionalIdx.end()) {
6688 unsigned Idx = i->second;
6689 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6690 } else {
6692 }
6693}
6694
6695ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6697 SMLoc &StringLoc) {
6698 if (!trySkipId(Prefix, AsmToken::Colon))
6699 return ParseStatus::NoMatch;
6700
6701 StringLoc = getLoc();
6702 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6704}
6705
6706//===----------------------------------------------------------------------===//
6707// MTBUF format
6708//===----------------------------------------------------------------------===//
6709
6710bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6711 int64_t MaxVal,
6712 int64_t &Fmt) {
6713 int64_t Val;
6714 SMLoc Loc = getLoc();
6715
6716 auto Res = parseIntWithPrefix(Pref, Val);
6717 if (Res.isFailure())
6718 return false;
6719 if (Res.isNoMatch())
6720 return true;
6721
6722 if (Val < 0 || Val > MaxVal) {
6723 Error(Loc, Twine("out of range ", StringRef(Pref)));
6724 return false;
6725 }
6726
6727 Fmt = Val;
6728 return true;
6729}
6730
6731ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6732 AMDGPUOperand::ImmTy ImmTy) {
6733 const char *Pref = "index_key";
6734 int64_t ImmVal = 0;
6735 SMLoc Loc = getLoc();
6736 auto Res = parseIntWithPrefix(Pref, ImmVal);
6737 if (!Res.isSuccess())
6738 return Res;
6739
6740 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6741 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6742
6743 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6744 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6745
6746 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6747 return ParseStatus::Success;
6748}
6749
6750ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6751 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6752}
6753
6754ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6755 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6756}
6757
6758// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6759// values to live in a joint format operand in the MCInst encoding.
6760ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6761 using namespace llvm::AMDGPU::MTBUFFormat;
6762
6763 int64_t Dfmt = DFMT_UNDEF;
6764 int64_t Nfmt = NFMT_UNDEF;
6765
6766 // dfmt and nfmt can appear in either order, and each is optional.
6767 for (int I = 0; I < 2; ++I) {
6768 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6769 return ParseStatus::Failure;
6770
6771 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6772 return ParseStatus::Failure;
6773
6774 // Skip optional comma between dfmt/nfmt
6775 // but guard against 2 commas following each other.
6776 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6777 !peekToken().is(AsmToken::Comma)) {
6778 trySkipToken(AsmToken::Comma);
6779 }
6780 }
6781
6782 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6783 return ParseStatus::NoMatch;
6784
6785 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6786 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6787
6788 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6789 return ParseStatus::Success;
6790}
6791
6792ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6793 using namespace llvm::AMDGPU::MTBUFFormat;
6794
6795 int64_t Fmt = UFMT_UNDEF;
6796
6797 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6798 return ParseStatus::Failure;
6799
6800 if (Fmt == UFMT_UNDEF)
6801 return ParseStatus::NoMatch;
6802
6803 Format = Fmt;
6804 return ParseStatus::Success;
6805}
6806
6807bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6808 int64_t &Nfmt,
6809 StringRef FormatStr,
6810 SMLoc Loc) {
6811 using namespace llvm::AMDGPU::MTBUFFormat;
6812 int64_t Format;
6813
6814 Format = getDfmt(FormatStr);
6815 if (Format != DFMT_UNDEF) {
6816 Dfmt = Format;
6817 return true;
6818 }
6819
6820 Format = getNfmt(FormatStr, getSTI());
6821 if (Format != NFMT_UNDEF) {
6822 Nfmt = Format;
6823 return true;
6824 }
6825
6826 Error(Loc, "unsupported format");
6827 return false;
6828}
6829
6830ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6831 SMLoc FormatLoc,
6832 int64_t &Format) {
6833 using namespace llvm::AMDGPU::MTBUFFormat;
6834
6835 int64_t Dfmt = DFMT_UNDEF;
6836 int64_t Nfmt = NFMT_UNDEF;
6837 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6838 return ParseStatus::Failure;
6839
6840 if (trySkipToken(AsmToken::Comma)) {
6841 StringRef Str;
6842 SMLoc Loc = getLoc();
6843 if (!parseId(Str, "expected a format string") ||
6844 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6845 return ParseStatus::Failure;
6846 if (Dfmt == DFMT_UNDEF)
6847 return Error(Loc, "duplicate numeric format");
6848 if (Nfmt == NFMT_UNDEF)
6849 return Error(Loc, "duplicate data format");
6850 }
6851
6852 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6853 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6854
6855 if (isGFX10Plus()) {
6856 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6857 if (Ufmt == UFMT_UNDEF)
6858 return Error(FormatLoc, "unsupported format");
6859 Format = Ufmt;
6860 } else {
6861 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6862 }
6863
6864 return ParseStatus::Success;
6865}
6866
6867ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6868 SMLoc Loc,
6869 int64_t &Format) {
6870 using namespace llvm::AMDGPU::MTBUFFormat;
6871
6872 auto Id = getUnifiedFormat(FormatStr, getSTI());
6873 if (Id == UFMT_UNDEF)
6874 return ParseStatus::NoMatch;
6875
6876 if (!isGFX10Plus())
6877 return Error(Loc, "unified format is not supported on this GPU");
6878
6879 Format = Id;
6880 return ParseStatus::Success;
6881}
6882
6883ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6884 using namespace llvm::AMDGPU::MTBUFFormat;
6885 SMLoc Loc = getLoc();
6886
6887 if (!parseExpr(Format))
6888 return ParseStatus::Failure;
6889 if (!isValidFormatEncoding(Format, getSTI()))
6890 return Error(Loc, "out of range format");
6891
6892 return ParseStatus::Success;
6893}
6894
6895ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6896 using namespace llvm::AMDGPU::MTBUFFormat;
6897
6898 if (!trySkipId("format", AsmToken::Colon))
6899 return ParseStatus::NoMatch;
6900
6901 if (trySkipToken(AsmToken::LBrac)) {
6902 StringRef FormatStr;
6903 SMLoc Loc = getLoc();
6904 if (!parseId(FormatStr, "expected a format string"))
6905 return ParseStatus::Failure;
6906
6907 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6908 if (Res.isNoMatch())
6909 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6910 if (!Res.isSuccess())
6911 return Res;
6912
6913 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6914 return ParseStatus::Failure;
6915
6916 return ParseStatus::Success;
6917 }
6918
6919 return parseNumericFormat(Format);
6920}
6921
6922ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6923 using namespace llvm::AMDGPU::MTBUFFormat;
6924
6925 int64_t Format = getDefaultFormatEncoding(getSTI());
6926 ParseStatus Res;
6927 SMLoc Loc = getLoc();
6928
6929 // Parse legacy format syntax.
6930 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6931 if (Res.isFailure())
6932 return Res;
6933
6934 bool FormatFound = Res.isSuccess();
6935
6936 Operands.push_back(
6937 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6938
6939 if (FormatFound)
6940 trySkipToken(AsmToken::Comma);
6941
6942 if (isToken(AsmToken::EndOfStatement)) {
6943 // We are expecting an soffset operand,
6944 // but let matcher handle the error.
6945 return ParseStatus::Success;
6946 }
6947
6948 // Parse soffset.
6949 Res = parseRegOrImm(Operands);
6950 if (!Res.isSuccess())
6951 return Res;
6952
6953 trySkipToken(AsmToken::Comma);
6954
6955 if (!FormatFound) {
6956 Res = parseSymbolicOrNumericFormat(Format);
6957 if (Res.isFailure())
6958 return Res;
6959 if (Res.isSuccess()) {
6960 auto Size = Operands.size();
6961 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6962 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6963 Op.setImm(Format);
6964 }
6965 return ParseStatus::Success;
6966 }
6967
6968 if (isId("format") && peekToken().is(AsmToken::Colon))
6969 return Error(getLoc(), "duplicate format");
6970 return ParseStatus::Success;
6971}
6972
6973ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6974 ParseStatus Res =
6975 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6976 if (Res.isNoMatch()) {
6977 Res = parseIntWithPrefix("inst_offset", Operands,
6978 AMDGPUOperand::ImmTyInstOffset);
6979 }
6980 return Res;
6981}
6982
6983ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6984 ParseStatus Res =
6985 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6986 if (Res.isNoMatch())
6987 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6988 return Res;
6989}
6990
6991ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6992 ParseStatus Res =
6993 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6994 if (Res.isNoMatch()) {
6995 Res =
6996 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6997 }
6998 return Res;
6999}
7000
7001//===----------------------------------------------------------------------===//
7002// Exp
7003//===----------------------------------------------------------------------===//
7004
7005void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7006 OptionalImmIndexMap OptionalIdx;
7007
7008 unsigned OperandIdx[4];
7009 unsigned EnMask = 0;
7010 int SrcIdx = 0;
7011
7012 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7013 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7014
7015 // Add the register arguments
7016 if (Op.isReg()) {
7017 assert(SrcIdx < 4);
7018 OperandIdx[SrcIdx] = Inst.size();
7019 Op.addRegOperands(Inst, 1);
7020 ++SrcIdx;
7021 continue;
7022 }
7023
7024 if (Op.isOff()) {
7025 assert(SrcIdx < 4);
7026 OperandIdx[SrcIdx] = Inst.size();
7027 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7028 ++SrcIdx;
7029 continue;
7030 }
7031
7032 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7033 Op.addImmOperands(Inst, 1);
7034 continue;
7035 }
7036
7037 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7038 continue;
7039
7040 // Handle optional arguments
7041 OptionalIdx[Op.getImmTy()] = i;
7042 }
7043
7044 assert(SrcIdx == 4);
7045
7046 bool Compr = false;
7047 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7048 Compr = true;
7049 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7050 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7051 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7052 }
7053
7054 for (auto i = 0; i < SrcIdx; ++i) {
7055 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7056 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7057 }
7058 }
7059
7060 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7061 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7062
7063 Inst.addOperand(MCOperand::createImm(EnMask));
7064}
7065
7066//===----------------------------------------------------------------------===//
7067// s_waitcnt
7068//===----------------------------------------------------------------------===//
7069
7070static bool
7072 const AMDGPU::IsaVersion ISA,
7073 int64_t &IntVal,
7074 int64_t CntVal,
7075 bool Saturate,
7076 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7077 unsigned (*decode)(const IsaVersion &Version, unsigned))
7078{
7079 bool Failed = false;
7080
7081 IntVal = encode(ISA, IntVal, CntVal);
7082 if (CntVal != decode(ISA, IntVal)) {
7083 if (Saturate) {
7084 IntVal = encode(ISA, IntVal, -1);
7085 } else {
7086 Failed = true;
7087 }
7088 }
7089 return Failed;
7090}
7091
7092bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7093
7094 SMLoc CntLoc = getLoc();
7095 StringRef CntName = getTokenStr();
7096
7097 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7098 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7099 return false;
7100
7101 int64_t CntVal;
7102 SMLoc ValLoc = getLoc();
7103 if (!parseExpr(CntVal))
7104 return false;
7105
7107
7108 bool Failed = true;
7109 bool Sat = CntName.ends_with("_sat");
7110
7111 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7112 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7113 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7114 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7115 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7116 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7117 } else {
7118 Error(CntLoc, "invalid counter name " + CntName);
7119 return false;
7120 }
7121
7122 if (Failed) {
7123 Error(ValLoc, "too large value for " + CntName);
7124 return false;
7125 }
7126
7127 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7128 return false;
7129
7130 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7131 if (isToken(AsmToken::EndOfStatement)) {
7132 Error(getLoc(), "expected a counter name");
7133 return false;
7134 }
7135 }
7136
7137 return true;
7138}
7139
7140ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7142 int64_t Waitcnt = getWaitcntBitMask(ISA);
7143 SMLoc S = getLoc();
7144
7145 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7146 while (!isToken(AsmToken::EndOfStatement)) {
7147 if (!parseCnt(Waitcnt))
7148 return ParseStatus::Failure;
7149 }
7150 } else {
7151 if (!parseExpr(Waitcnt))
7152 return ParseStatus::Failure;
7153 }
7154
7155 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7156 return ParseStatus::Success;
7157}
7158
7159bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7160 SMLoc FieldLoc = getLoc();
7161 StringRef FieldName = getTokenStr();
7162 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7163 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7164 return false;
7165
7166 SMLoc ValueLoc = getLoc();
7167 StringRef ValueName = getTokenStr();
7168 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7169 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7170 return false;
7171
7172 unsigned Shift;
7173 if (FieldName == "instid0") {
7174 Shift = 0;
7175 } else if (FieldName == "instskip") {
7176 Shift = 4;
7177 } else if (FieldName == "instid1") {
7178 Shift = 7;
7179 } else {
7180 Error(FieldLoc, "invalid field name " + FieldName);
7181 return false;
7182 }
7183
7184 int Value;
7185 if (Shift == 4) {
7186 // Parse values for instskip.
7188 .Case("SAME", 0)
7189 .Case("NEXT", 1)
7190 .Case("SKIP_1", 2)
7191 .Case("SKIP_2", 3)
7192 .Case("SKIP_3", 4)
7193 .Case("SKIP_4", 5)
7194 .Default(-1);
7195 } else {
7196 // Parse values for instid0 and instid1.
7198 .Case("NO_DEP", 0)
7199 .Case("VALU_DEP_1", 1)
7200 .Case("VALU_DEP_2", 2)
7201 .Case("VALU_DEP_3", 3)
7202 .Case("VALU_DEP_4", 4)
7203 .Case("TRANS32_DEP_1", 5)
7204 .Case("TRANS32_DEP_2", 6)
7205 .Case("TRANS32_DEP_3", 7)
7206 .Case("FMA_ACCUM_CYCLE_1", 8)
7207 .Case("SALU_CYCLE_1", 9)
7208 .Case("SALU_CYCLE_2", 10)
7209 .Case("SALU_CYCLE_3", 11)
7210 .Default(-1);
7211 }
7212 if (Value < 0) {
7213 Error(ValueLoc, "invalid value name " + ValueName);
7214 return false;
7215 }
7216
7217 Delay |= Value << Shift;
7218 return true;
7219}
7220
7221ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7222 int64_t Delay = 0;
7223 SMLoc S = getLoc();
7224
7225 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7226 do {
7227 if (!parseDelay(Delay))
7228 return ParseStatus::Failure;
7229 } while (trySkipToken(AsmToken::Pipe));
7230 } else {
7231 if (!parseExpr(Delay))
7232 return ParseStatus::Failure;
7233 }
7234
7235 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7236 return ParseStatus::Success;
7237}
7238
7239bool
7240AMDGPUOperand::isSWaitCnt() const {
7241 return isImm();
7242}
7243
7244bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7245
7246//===----------------------------------------------------------------------===//
7247// DepCtr
7248//===----------------------------------------------------------------------===//
7249
7250void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7251 StringRef DepCtrName) {
7252 switch (ErrorId) {
7253 case OPR_ID_UNKNOWN:
7254 Error(Loc, Twine("invalid counter name ", DepCtrName));
7255 return;
7256 case OPR_ID_UNSUPPORTED:
7257 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7258 return;
7259 case OPR_ID_DUPLICATE:
7260 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7261 return;
7262 case OPR_VAL_INVALID:
7263 Error(Loc, Twine("invalid value for ", DepCtrName));
7264 return;
7265 default:
7266 assert(false);
7267 }
7268}
7269
7270bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7271
7272 using namespace llvm::AMDGPU::DepCtr;
7273
7274 SMLoc DepCtrLoc = getLoc();
7275 StringRef DepCtrName = getTokenStr();
7276
7277 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7278 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7279 return false;
7280
7281 int64_t ExprVal;
7282 if (!parseExpr(ExprVal))
7283 return false;
7284
7285 unsigned PrevOprMask = UsedOprMask;
7286 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7287
7288 if (CntVal < 0) {
7289 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7290 return false;
7291 }
7292
7293 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7294 return false;
7295
7296 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7297 if (isToken(AsmToken::EndOfStatement)) {
7298 Error(getLoc(), "expected a counter name");
7299 return false;
7300 }
7301 }
7302
7303 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7304 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7305 return true;
7306}
7307
7308ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7309 using namespace llvm::AMDGPU::DepCtr;
7310
7311 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7312 SMLoc Loc = getLoc();
7313
7314 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7315 unsigned UsedOprMask = 0;
7316 while (!isToken(AsmToken::EndOfStatement)) {
7317 if (!parseDepCtr(DepCtr, UsedOprMask))
7318 return ParseStatus::Failure;
7319 }
7320 } else {
7321 if (!parseExpr(DepCtr))
7322 return ParseStatus::Failure;
7323 }
7324
7325 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7326 return ParseStatus::Success;
7327}
7328
7329bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7330
7331//===----------------------------------------------------------------------===//
7332// hwreg
7333//===----------------------------------------------------------------------===//
7334
7335ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7336 OperandInfoTy &Offset,
7337 OperandInfoTy &Width) {
7338 using namespace llvm::AMDGPU::Hwreg;
7339
7340 if (!trySkipId("hwreg", AsmToken::LParen))
7341 return ParseStatus::NoMatch;
7342
7343 // The register may be specified by name or using a numeric code
7344 HwReg.Loc = getLoc();
7345 if (isToken(AsmToken::Identifier) &&
7346 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7347 HwReg.IsSymbolic = true;
7348 lex(); // skip register name
7349 } else if (!parseExpr(HwReg.Val, "a register name")) {
7350 return ParseStatus::Failure;
7351 }
7352
7353 if (trySkipToken(AsmToken::RParen))
7354 return ParseStatus::Success;
7355
7356 // parse optional params
7357 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7358 return ParseStatus::Failure;
7359
7360 Offset.Loc = getLoc();
7361 if (!parseExpr(Offset.Val))
7362 return ParseStatus::Failure;
7363
7364 if (!skipToken(AsmToken::Comma, "expected a comma"))
7365 return ParseStatus::Failure;
7366
7367 Width.Loc = getLoc();
7368 if (!parseExpr(Width.Val) ||
7369 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7370 return ParseStatus::Failure;
7371
7372 return ParseStatus::Success;
7373}
7374
7375ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7376 using namespace llvm::AMDGPU::Hwreg;
7377
7378 int64_t ImmVal = 0;
7379 SMLoc Loc = getLoc();
7380
7381 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7382 HwregId::Default);
7383 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7384 HwregOffset::Default);
7385 struct : StructuredOpField {
7386 using StructuredOpField::StructuredOpField;
7387 bool validate(AMDGPUAsmParser &Parser) const override {
7388 if (!isUIntN(Width, Val - 1))
7389 return Error(Parser, "only values from 1 to 32 are legal");
7390 return true;
7391 }
7392 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7393 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7394
7395 if (Res.isNoMatch())
7396 Res = parseHwregFunc(HwReg, Offset, Width);
7397
7398 if (Res.isSuccess()) {
7399 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7400 return ParseStatus::Failure;
7401 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7402 }
7403
7404 if (Res.isNoMatch() &&
7405 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7407
7408 if (!Res.isSuccess())
7409 return ParseStatus::Failure;
7410
7411 if (!isUInt<16>(ImmVal))
7412 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7413 Operands.push_back(
7414 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7415 return ParseStatus::Success;
7416}
7417
7418bool AMDGPUOperand::isHwreg() const {
7419 return isImmTy(ImmTyHwreg);
7420}
7421
7422//===----------------------------------------------------------------------===//
7423// sendmsg
7424//===----------------------------------------------------------------------===//
7425
7426bool
7427AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7428 OperandInfoTy &Op,
7429 OperandInfoTy &Stream) {
7430 using namespace llvm::AMDGPU::SendMsg;
7431
7432 Msg.Loc = getLoc();
7433 if (isToken(AsmToken::Identifier) &&
7434 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7435 Msg.IsSymbolic = true;
7436 lex(); // skip message name
7437 } else if (!parseExpr(Msg.Val, "a message name")) {
7438 return false;
7439 }
7440
7441 if (trySkipToken(AsmToken::Comma)) {
7442 Op.IsDefined = true;
7443 Op.Loc = getLoc();
7444 if (isToken(AsmToken::Identifier) &&
7445 (Op.Val = getMsgOpId(Msg.Val, getTokenStr())) >= 0) {
7446 lex(); // skip operation name
7447 } else if (!parseExpr(Op.Val, "an operation name")) {
7448 return false;
7449 }
7450
7451 if (trySkipToken(AsmToken::Comma)) {
7452 Stream.IsDefined = true;
7453 Stream.Loc = getLoc();
7454 if (!parseExpr(Stream.Val))
7455 return false;
7456 }
7457 }
7458
7459 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7460}
7461
7462bool
7463AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7464 const OperandInfoTy &Op,
7465 const OperandInfoTy &Stream) {
7466 using namespace llvm::AMDGPU::SendMsg;
7467
7468 // Validation strictness depends on whether message is specified
7469 // in a symbolic or in a numeric form. In the latter case
7470 // only encoding possibility is checked.
7471 bool Strict = Msg.IsSymbolic;
7472
7473 if (Strict) {
7474 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7475 Error(Msg.Loc, "specified message id is not supported on this GPU");
7476 return false;
7477 }
7478 } else {
7479 if (!isValidMsgId(Msg.Val, getSTI())) {
7480 Error(Msg.Loc, "invalid message id");
7481 return false;
7482 }
7483 }
7484 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7485 if (Op.IsDefined) {
7486 Error(Op.Loc, "message does not support operations");
7487 } else {
7488 Error(Msg.Loc, "missing message operation");
7489 }
7490 return false;
7491 }
7492 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7493 Error(Op.Loc, "invalid operation id");
7494 return false;
7495 }
7496 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7497 Stream.IsDefined) {
7498 Error(Stream.Loc, "message operation does not support streams");
7499 return false;
7500 }
7501 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7502 Error(Stream.Loc, "invalid message stream id");
7503 return false;
7504 }
7505 return true;
7506}
7507
7508ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7509 using namespace llvm::AMDGPU::SendMsg;
7510
7511 int64_t ImmVal = 0;
7512 SMLoc Loc = getLoc();
7513
7514 if (trySkipId("sendmsg", AsmToken::LParen)) {
7515 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7516 OperandInfoTy Op(OP_NONE_);
7517 OperandInfoTy Stream(STREAM_ID_NONE_);
7518 if (parseSendMsgBody(Msg, Op, Stream) &&
7519 validateSendMsg(Msg, Op, Stream)) {
7520 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7521 } else {
7522 return ParseStatus::Failure;
7523 }
7524 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7525 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7526 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7527 } else {
7528 return ParseStatus::Failure;
7529 }
7530
7531 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7532 return ParseStatus::Success;
7533}
7534
7535bool AMDGPUOperand::isSendMsg() const {
7536 return isImmTy(ImmTySendMsg);
7537}
7538
7539//===----------------------------------------------------------------------===//
7540// v_interp
7541//===----------------------------------------------------------------------===//
7542
7543ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7544 StringRef Str;
7545 SMLoc S = getLoc();
7546
7547 if (!parseId(Str))
7548 return ParseStatus::NoMatch;
7549
7550 int Slot = StringSwitch<int>(Str)
7551 .Case("p10", 0)
7552 .Case("p20", 1)
7553 .Case("p0", 2)
7554 .Default(-1);
7555
7556 if (Slot == -1)
7557 return Error(S, "invalid interpolation slot");
7558
7559 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7560 AMDGPUOperand::ImmTyInterpSlot));
7561 return ParseStatus::Success;
7562}
7563
7564ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7565 StringRef Str;
7566 SMLoc S = getLoc();
7567
7568 if (!parseId(Str))
7569 return ParseStatus::NoMatch;
7570
7571 if (!Str.starts_with("attr"))
7572 return Error(S, "invalid interpolation attribute");
7573
7574 StringRef Chan = Str.take_back(2);
7575 int AttrChan = StringSwitch<int>(Chan)
7576 .Case(".x", 0)
7577 .Case(".y", 1)
7578 .Case(".z", 2)
7579 .Case(".w", 3)
7580 .Default(-1);
7581 if (AttrChan == -1)
7582 return Error(S, "invalid or missing interpolation attribute channel");
7583
7584 Str = Str.drop_back(2).drop_front(4);
7585
7586 uint8_t Attr;
7587 if (Str.getAsInteger(10, Attr))
7588 return Error(S, "invalid or missing interpolation attribute number");
7589
7590 if (Attr > 32)
7591 return Error(S, "out of bounds interpolation attribute number");
7592
7593 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7594
7595 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7596 AMDGPUOperand::ImmTyInterpAttr));
7597 Operands.push_back(AMDGPUOperand::CreateImm(
7598 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7599 return ParseStatus::Success;
7600}
7601
7602//===----------------------------------------------------------------------===//
7603// exp
7604//===----------------------------------------------------------------------===//
7605
7606ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7607 using namespace llvm::AMDGPU::Exp;
7608
7609 StringRef Str;
7610 SMLoc S = getLoc();
7611
7612 if (!parseId(Str))
7613 return ParseStatus::NoMatch;
7614
7615 unsigned Id = getTgtId(Str);
7616 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7617 return Error(S, (Id == ET_INVALID)
7618 ? "invalid exp target"
7619 : "exp target is not supported on this GPU");
7620
7621 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7622 AMDGPUOperand::ImmTyExpTgt));
7623 return ParseStatus::Success;
7624}
7625
7626//===----------------------------------------------------------------------===//
7627// parser helpers
7628//===----------------------------------------------------------------------===//
7629
7630bool
7631AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7632 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7633}
7634
7635bool
7636AMDGPUAsmParser::isId(const StringRef Id) const {
7637 return isId(getToken(), Id);
7638}
7639
7640bool
7641AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7642 return getTokenKind() == Kind;
7643}
7644
7645StringRef AMDGPUAsmParser::getId() const {
7646 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7647}
7648
7649bool
7650AMDGPUAsmParser::trySkipId(const StringRef Id) {
7651 if (isId(Id)) {
7652 lex();
7653 return true;
7654 }
7655 return false;
7656}
7657
7658bool
7659AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7660 if (isToken(AsmToken::Identifier)) {
7661 StringRef Tok = getTokenStr();
7662 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7663 lex();
7664 return true;
7665 }
7666 }
7667 return false;
7668}
7669
7670bool
7671AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7672 if (isId(Id) && peekToken().is(Kind)) {
7673 lex();
7674 lex();
7675 return true;
7676 }
7677 return false;
7678}
7679
7680bool
7681AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7682 if (isToken(Kind)) {
7683 lex();
7684 return true;
7685 }
7686 return false;
7687}
7688
7689bool
7690AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7691 const StringRef ErrMsg) {
7692 if (!trySkipToken(Kind)) {
7693 Error(getLoc(), ErrMsg);
7694 return false;
7695 }
7696 return true;
7697}
7698
7699bool
7700AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7701 SMLoc S = getLoc();
7702
7703 const MCExpr *Expr;
7704 if (Parser.parseExpression(Expr))
7705 return false;
7706
7707 if (Expr->evaluateAsAbsolute(Imm))
7708 return true;
7709
7710 if (Expected.empty()) {
7711 Error(S, "expected absolute expression");
7712 } else {
7713 Error(S, Twine("expected ", Expected) +
7714 Twine(" or an absolute expression"));
7715 }
7716 return false;
7717}
7718
7719bool
7720AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7721 SMLoc S = getLoc();
7722
7723 const MCExpr *Expr;
7724 if (Parser.parseExpression(Expr))
7725 return false;
7726
7727 int64_t IntVal;
7728 if (Expr->evaluateAsAbsolute(IntVal)) {
7729 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7730 } else {
7731 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7732 }
7733 return true;
7734}
7735
7736bool
7737AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7738 if (isToken(AsmToken::String)) {
7739 Val = getToken().getStringContents();
7740 lex();
7741 return true;
7742 } else {
7743 Error(getLoc(), ErrMsg);
7744 return false;
7745 }
7746}
7747
7748bool
7749AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7750 if (isToken(AsmToken::Identifier)) {
7751 Val = getTokenStr();
7752 lex();
7753 return true;
7754 } else {
7755 if (!ErrMsg.empty())
7756 Error(getLoc(), ErrMsg);
7757 return false;
7758 }
7759}
7760
7762AMDGPUAsmParser::getToken() const {
7763 return Parser.getTok();
7764}
7765
7766AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7767 return isToken(AsmToken::EndOfStatement)
7768 ? getToken()
7769 : getLexer().peekTok(ShouldSkipSpace);
7770}
7771
7772void
7773AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7774 auto TokCount = getLexer().peekTokens(Tokens);
7775
7776 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7777 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7778}
7779
7781AMDGPUAsmParser::getTokenKind() const {
7782 return getLexer().getKind();
7783}
7784
7785SMLoc
7786AMDGPUAsmParser::getLoc() const {
7787 return getToken().getLoc();
7788}
7789
7791AMDGPUAsmParser::getTokenStr() const {
7792 return getToken().getString();
7793}
7794
7795void
7796AMDGPUAsmParser::lex() {
7797 Parser.Lex();
7798}
7799
7800SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7801 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7802}
7803
7804SMLoc
7805AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7806 const OperandVector &Operands) const {
7807 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7808 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7809 if (Test(Op))
7810 return Op.getStartLoc();
7811 }
7812 return getInstLoc(Operands);
7813}
7814
7815SMLoc
7816AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7817 const OperandVector &Operands) const {
7818 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7819 return getOperandLoc(Test, Operands);
7820}
7821
7822SMLoc
7823AMDGPUAsmParser::getRegLoc(unsigned Reg,
7824 const OperandVector &Operands) const {
7825 auto Test = [=](const AMDGPUOperand& Op) {
7826 return Op.isRegKind() && Op.getReg() == Reg;
7827 };
7828 return getOperandLoc(Test, Operands);
7829}
7830
7831SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7832 bool SearchMandatoryLiterals) const {
7833 auto Test = [](const AMDGPUOperand& Op) {
7834 return Op.IsImmKindLiteral() || Op.isExpr();
7835 };
7836 SMLoc Loc = getOperandLoc(Test, Operands);
7837 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7838 Loc = getMandatoryLitLoc(Operands);
7839 return Loc;
7840}
7841
7842SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7843 auto Test = [](const AMDGPUOperand &Op) {
7844 return Op.IsImmKindMandatoryLiteral();
7845 };
7846 return getOperandLoc(Test, Operands);
7847}
7848
7849SMLoc
7850AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7851 auto Test = [](const AMDGPUOperand& Op) {
7852 return Op.isImmKindConst();
7853 };
7854 return getOperandLoc(Test, Operands);
7855}
7856
7858AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7859 if (!trySkipToken(AsmToken::LCurly))
7860 return ParseStatus::NoMatch;
7861
7862 bool First = true;
7863 while (!trySkipToken(AsmToken::RCurly)) {
7864 if (!First &&
7865 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7866 return ParseStatus::Failure;
7867
7868 StringRef Id = getTokenStr();
7869 SMLoc IdLoc = getLoc();
7870 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7871 !skipToken(AsmToken::Colon, "colon expected"))
7872 return ParseStatus::Failure;
7873
7874 auto I =
7875 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7876 if (I == Fields.end())
7877 return Error(IdLoc, "unknown field");
7878 if ((*I)->IsDefined)
7879 return Error(IdLoc, "duplicate field");
7880
7881 // TODO: Support symbolic values.
7882 (*I)->Loc = getLoc();
7883 if (!parseExpr((*I)->Val))
7884 return ParseStatus::Failure;
7885 (*I)->IsDefined = true;
7886
7887 First = false;
7888 }
7889 return ParseStatus::Success;
7890}
7891
7892bool AMDGPUAsmParser::validateStructuredOpFields(
7894 return all_of(Fields, [this](const StructuredOpField *F) {
7895 return F->validate(*this);
7896 });
7897}
7898
7899//===----------------------------------------------------------------------===//
7900// swizzle
7901//===----------------------------------------------------------------------===//
7902
7904static unsigned
7905encodeBitmaskPerm(const unsigned AndMask,
7906 const unsigned OrMask,
7907 const unsigned XorMask) {
7908 using namespace llvm::AMDGPU::Swizzle;
7909
7910 return BITMASK_PERM_ENC |
7911 (AndMask << BITMASK_AND_SHIFT) |
7912 (OrMask << BITMASK_OR_SHIFT) |
7913 (XorMask << BITMASK_XOR_SHIFT);
7914}
7915
7916bool
7917AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7918 const unsigned MinVal,
7919 const unsigned MaxVal,
7920 const StringRef ErrMsg,
7921 SMLoc &Loc) {
7922 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7923 return false;
7924 }
7925 Loc = getLoc();
7926 if (!parseExpr(Op)) {
7927 return false;
7928 }
7929 if (Op < MinVal || Op > MaxVal) {
7930 Error(Loc, ErrMsg);
7931 return false;
7932 }
7933
7934 return true;
7935}
7936
7937bool
7938AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7939 const unsigned MinVal,
7940 const unsigned MaxVal,
7941 const StringRef ErrMsg) {
7942 SMLoc Loc;
7943 for (unsigned i = 0; i < OpNum; ++i) {
7944 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7945 return false;
7946 }
7947
7948 return true;
7949}
7950
7951bool
7952AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7953 using namespace llvm::AMDGPU::Swizzle;
7954
7955 int64_t Lane[LANE_NUM];
7956 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7957 "expected a 2-bit lane id")) {
7959 for (unsigned I = 0; I < LANE_NUM; ++I) {
7960 Imm |= Lane[I] << (LANE_SHIFT * I);
7961 }
7962 return true;
7963 }
7964 return false;
7965}
7966
7967bool
7968AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7969 using namespace llvm::AMDGPU::Swizzle;
7970
7971 SMLoc Loc;
7972 int64_t GroupSize;
7973 int64_t LaneIdx;
7974
7975 if (!parseSwizzleOperand(GroupSize,
7976 2, 32,
7977 "group size must be in the interval [2,32]",
7978 Loc)) {
7979 return false;
7980 }
7981 if (!isPowerOf2_64(GroupSize)) {
7982 Error(Loc, "group size must be a power of two");
7983 return false;
7984 }
7985 if (parseSwizzleOperand(LaneIdx,
7986 0, GroupSize - 1,
7987 "lane id must be in the interval [0,group size - 1]",
7988 Loc)) {
7989 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7990 return true;
7991 }
7992 return false;
7993}
7994
7995bool
7996AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7997 using namespace llvm::AMDGPU::Swizzle;
7998
7999 SMLoc Loc;
8000 int64_t GroupSize;
8001
8002 if (!parseSwizzleOperand(GroupSize,
8003 2, 32,
8004 "group size must be in the interval [2,32]",
8005 Loc)) {
8006 return false;
8007 }
8008 if (!isPowerOf2_64(GroupSize)) {
8009 Error(Loc, "group size must be a power of two");
8010 return false;
8011 }
8012
8013 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8014 return true;
8015}
8016
8017bool
8018AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8019 using namespace llvm::AMDGPU::Swizzle;
8020
8021 SMLoc Loc;
8022 int64_t GroupSize;
8023
8024 if (!parseSwizzleOperand(GroupSize,
8025 1, 16,
8026 "group size must be in the interval [1,16]",
8027 Loc)) {
8028 return false;
8029 }
8030 if (!isPowerOf2_64(GroupSize)) {
8031 Error(Loc, "group size must be a power of two");
8032 return false;
8033 }
8034
8035 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8036 return true;
8037}
8038
8039bool
8040AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8041 using namespace llvm::AMDGPU::Swizzle;
8042
8043 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8044 return false;
8045 }
8046
8047 StringRef Ctl;
8048 SMLoc StrLoc = getLoc();
8049 if (!parseString(Ctl)) {
8050 return false;
8051 }
8052 if (Ctl.size() != BITMASK_WIDTH) {
8053 Error(StrLoc, "expected a 5-character mask");
8054 return false;
8055 }
8056
8057 unsigned AndMask = 0;
8058 unsigned OrMask = 0;
8059 unsigned XorMask = 0;
8060
8061 for (size_t i = 0; i < Ctl.size(); ++i) {
8062 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8063 switch(Ctl[i]) {
8064 default:
8065 Error(StrLoc, "invalid mask");
8066 return false;
8067 case '0':
8068 break;
8069 case '1':
8070 OrMask |= Mask;
8071 break;
8072 case 'p':
8073 AndMask |= Mask;
8074 break;
8075 case 'i':
8076 AndMask |= Mask;
8077 XorMask |= Mask;
8078 break;
8079 }
8080 }
8081
8082 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8083 return true;
8084}
8085
8086bool
8087AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8088
8089 SMLoc OffsetLoc = getLoc();
8090
8091 if (!parseExpr(Imm, "a swizzle macro")) {
8092 return false;
8093 }
8094 if (!isUInt<16>(Imm)) {
8095 Error(OffsetLoc, "expected a 16-bit offset");
8096 return false;
8097 }
8098 return true;
8099}
8100
8101bool
8102AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8103 using namespace llvm::AMDGPU::Swizzle;
8104
8105 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8106
8107 SMLoc ModeLoc = getLoc();
8108 bool Ok = false;
8109
8110 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8111 Ok = parseSwizzleQuadPerm(Imm);
8112 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8113 Ok = parseSwizzleBitmaskPerm(Imm);
8114 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8115 Ok = parseSwizzleBroadcast(Imm);
8116 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8117 Ok = parseSwizzleSwap(Imm);
8118 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8119 Ok = parseSwizzleReverse(Imm);
8120 } else {
8121 Error(ModeLoc, "expected a swizzle mode");
8122 }
8123
8124 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8125 }
8126
8127 return false;
8128}
8129
8130ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8131 SMLoc S = getLoc();
8132 int64_t Imm = 0;
8133
8134 if (trySkipId("offset")) {
8135
8136 bool Ok = false;
8137 if (skipToken(AsmToken::Colon, "expected a colon")) {
8138 if (trySkipId("swizzle")) {
8139 Ok = parseSwizzleMacro(Imm);
8140 } else {
8141 Ok = parseSwizzleOffset(Imm);
8142 }
8143 }
8144
8145 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8146
8148 }
8149 return ParseStatus::NoMatch;
8150}
8151
8152bool
8153AMDGPUOperand::isSwizzle() const {
8154 return isImmTy(ImmTySwizzle);
8155}
8156
8157//===----------------------------------------------------------------------===//
8158// VGPR Index Mode
8159//===----------------------------------------------------------------------===//
8160
8161int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8162
8163 using namespace llvm::AMDGPU::VGPRIndexMode;
8164
8165 if (trySkipToken(AsmToken::RParen)) {
8166 return OFF;
8167 }
8168
8169 int64_t Imm = 0;
8170
8171 while (true) {
8172 unsigned Mode = 0;
8173 SMLoc S = getLoc();
8174
8175 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8176 if (trySkipId(IdSymbolic[ModeId])) {
8177 Mode = 1 << ModeId;
8178 break;
8179 }
8180 }
8181
8182 if (Mode == 0) {
8183 Error(S, (Imm == 0)?
8184 "expected a VGPR index mode or a closing parenthesis" :
8185 "expected a VGPR index mode");
8186 return UNDEF;
8187 }
8188
8189 if (Imm & Mode) {
8190 Error(S, "duplicate VGPR index mode");
8191 return UNDEF;
8192 }
8193 Imm |= Mode;
8194
8195 if (trySkipToken(AsmToken::RParen))
8196 break;
8197 if (!skipToken(AsmToken::Comma,
8198 "expected a comma or a closing parenthesis"))
8199 return UNDEF;
8200 }
8201
8202 return Imm;
8203}
8204
8205ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8206
8207 using namespace llvm::AMDGPU::VGPRIndexMode;
8208
8209 int64_t Imm = 0;
8210 SMLoc S = getLoc();
8211
8212 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8213 Imm = parseGPRIdxMacro();
8214 if (Imm == UNDEF)
8215 return ParseStatus::Failure;
8216 } else {
8217 if (getParser().parseAbsoluteExpression(Imm))
8218 return ParseStatus::Failure;
8219 if (Imm < 0 || !isUInt<4>(Imm))
8220 return Error(S, "invalid immediate: only 4-bit values are legal");
8221 }
8222
8223 Operands.push_back(
8224 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8225 return ParseStatus::Success;
8226}
8227
8228bool AMDGPUOperand::isGPRIdxMode() const {
8229 return isImmTy(ImmTyGprIdxMode);
8230}
8231
8232//===----------------------------------------------------------------------===//
8233// sopp branch targets
8234//===----------------------------------------------------------------------===//
8235
8236ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8237
8238 // Make sure we are not parsing something
8239 // that looks like a label or an expression but is not.
8240 // This will improve error messages.
8241 if (isRegister() || isModifier())
8242 return ParseStatus::NoMatch;
8243
8244 if (!parseExpr(Operands))
8245 return ParseStatus::Failure;
8246
8247 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8248 assert(Opr.isImm() || Opr.isExpr());
8249 SMLoc Loc = Opr.getStartLoc();
8250
8251 // Currently we do not support arbitrary expressions as branch targets.
8252 // Only labels and absolute expressions are accepted.
8253 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8254 Error(Loc, "expected an absolute expression or a label");
8255 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8256 Error(Loc, "expected a 16-bit signed jump offset");
8257 }
8258
8259 return ParseStatus::Success;
8260}
8261
8262//===----------------------------------------------------------------------===//
8263// Boolean holding registers
8264//===----------------------------------------------------------------------===//
8265
8266ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8267 return parseReg(Operands);
8268}
8269
8270//===----------------------------------------------------------------------===//
8271// mubuf
8272//===----------------------------------------------------------------------===//
8273
8274void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8275 const OperandVector &Operands,
8276 bool IsAtomic) {
8277 OptionalImmIndexMap OptionalIdx;
8278 unsigned FirstOperandIdx = 1;
8279 bool IsAtomicReturn = false;
8280
8281 if (IsAtomic) {
8282 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8284 }
8285
8286 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8287 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8288
8289 // Add the register arguments
8290 if (Op.isReg()) {
8291 Op.addRegOperands(Inst, 1);
8292 // Insert a tied src for atomic return dst.
8293 // This cannot be postponed as subsequent calls to
8294 // addImmOperands rely on correct number of MC operands.
8295 if (IsAtomicReturn && i == FirstOperandIdx)
8296 Op.addRegOperands(Inst, 1);
8297 continue;
8298 }
8299
8300 // Handle the case where soffset is an immediate
8301 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8302 Op.addImmOperands(Inst, 1);
8303 continue;
8304 }
8305
8306 // Handle tokens like 'offen' which are sometimes hard-coded into the
8307 // asm string. There are no MCInst operands for these.
8308 if (Op.isToken()) {
8309 continue;
8310 }
8311 assert(Op.isImm());
8312
8313 // Handle optional arguments
8314 OptionalIdx[Op.getImmTy()] = i;
8315 }
8316
8317 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8318 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8319}
8320
8321//===----------------------------------------------------------------------===//
8322// smrd
8323//===----------------------------------------------------------------------===//
8324
8325bool AMDGPUOperand::isSMRDOffset8() const {
8326 return isImmLiteral() && isUInt<8>(getImm());
8327}
8328
8329bool AMDGPUOperand::isSMEMOffset() const {
8330 // Offset range is checked later by validator.
8331 return isImmLiteral();
8332}
8333
8334bool AMDGPUOperand::isSMRDLiteralOffset() const {
8335 // 32-bit literals are only supported on CI and we only want to use them
8336 // when the offset is > 8-bits.
8337 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8338}
8339
8340//===----------------------------------------------------------------------===//
8341// vop3
8342//===----------------------------------------------------------------------===//
8343
8344static bool ConvertOmodMul(int64_t &Mul) {
8345 if (Mul != 1 && Mul != 2 && Mul != 4)
8346 return false;
8347
8348 Mul >>= 1;
8349 return true;
8350}
8351
8352static bool ConvertOmodDiv(int64_t &Div) {
8353 if (Div == 1) {
8354 Div = 0;
8355 return true;
8356 }
8357
8358 if (Div == 2) {
8359 Div = 3;
8360 return true;
8361 }
8362
8363 return false;
8364}
8365
8366// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8367// This is intentional and ensures compatibility with sp3.
8368// See bug 35397 for details.
8369bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8370 if (BoundCtrl == 0 || BoundCtrl == 1) {
8371 if (!isGFX11Plus())
8372 BoundCtrl = 1;
8373 return true;
8374 }
8375 return false;
8376}
8377
8378void AMDGPUAsmParser::onBeginOfFile() {
8379 if (!getParser().getStreamer().getTargetStreamer() ||
8380 getSTI().getTargetTriple().getArch() == Triple::r600)
8381 return;
8382
8383 if (!getTargetStreamer().getTargetID())
8384 getTargetStreamer().initializeTargetID(getSTI(),
8385 getSTI().getFeatureString());
8386
8387 if (isHsaAbi(getSTI()))
8388 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8389}
8390
8391/// Parse AMDGPU specific expressions.
8392///
8393/// expr ::= or(expr, ...) |
8394/// max(expr, ...)
8395///
8396bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8398
8399 if (isToken(AsmToken::Identifier)) {
8400 StringRef TokenId = getTokenStr();
8401 AGVK VK = StringSwitch<AGVK>(TokenId)
8402 .Case("max", AGVK::AGVK_Max)
8403 .Case("or", AGVK::AGVK_Or)
8404 .Default(AGVK::AGVK_None);
8405
8406 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8408 uint64_t CommaCount = 0;
8409 lex(); // Eat 'max'/'or'
8410 lex(); // Eat '('
8411 while (true) {
8412 if (trySkipToken(AsmToken::RParen)) {
8413 if (Exprs.empty()) {
8414 Error(getToken().getLoc(),
8415 "empty " + Twine(TokenId) + " expression");
8416 return true;
8417 }
8418 if (CommaCount + 1 != Exprs.size()) {
8419 Error(getToken().getLoc(),
8420 "mismatch of commas in " + Twine(TokenId) + " expression");
8421 return true;
8422 }
8423 Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
8424 return false;
8425 }
8426 const MCExpr *Expr;
8427 if (getParser().parseExpression(Expr, EndLoc))
8428 return true;
8429 Exprs.push_back(Expr);
8430 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8431 if (LastTokenWasComma)
8432 CommaCount++;
8433 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8434 Error(getToken().getLoc(),
8435 "unexpected token in " + Twine(TokenId) + " expression");
8436 return true;
8437 }
8438 }
8439 }
8440 }
8441 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8442}
8443
8444ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8445 StringRef Name = getTokenStr();
8446 if (Name == "mul") {
8447 return parseIntWithPrefix("mul", Operands,
8448 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8449 }
8450
8451 if (Name == "div") {
8452 return parseIntWithPrefix("div", Operands,
8453 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8454 }
8455
8456 return ParseStatus::NoMatch;
8457}
8458
8459// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8460// the number of src operands present, then copies that bit into src0_modifiers.
8461static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8462 int Opc = Inst.getOpcode();
8463 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8464 if (OpSelIdx == -1)
8465 return;
8466
8467 int SrcNum;
8468 const int Ops[] = { AMDGPU::OpName::src0,
8469 AMDGPU::OpName::src1,
8470 AMDGPU::OpName::src2 };
8471 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8472 ++SrcNum)
8473 ;
8474 assert(SrcNum > 0);
8475
8476 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8477
8478 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8479 if (DstIdx == -1)
8480 return;
8481
8482 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8483 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8484 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8485 if (DstOp.isReg() &&
8486 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8487 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8488 ModVal |= SISrcMods::DST_OP_SEL;
8489 } else {
8490 if ((OpSel & (1 << SrcNum)) != 0)
8491 ModVal |= SISrcMods::DST_OP_SEL;
8492 }
8493 Inst.getOperand(ModIdx).setImm(ModVal);
8494}
8495
8496void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8497 const OperandVector &Operands) {
8498 cvtVOP3P(Inst, Operands);
8499 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8500}
8501
8502void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8503 OptionalImmIndexMap &OptionalIdx) {
8504 cvtVOP3P(Inst, Operands, OptionalIdx);
8505 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8506}
8507
8508static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8509 return
8510 // 1. This operand is input modifiers
8511 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8512 // 2. This is not last operand
8513 && Desc.NumOperands > (OpNum + 1)
8514 // 3. Next operand is register class
8515 && Desc.operands()[OpNum + 1].RegClass != -1
8516 // 4. Next register is not tied to any other operand
8517 && Desc.getOperandConstraint(OpNum + 1,
8518 MCOI::OperandConstraint::TIED_TO) == -1;
8519}
8520
8521void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8522{
8523 OptionalImmIndexMap OptionalIdx;
8524 unsigned Opc = Inst.getOpcode();
8525
8526 unsigned I = 1;
8527 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8528 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8529 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8530 }
8531
8532 for (unsigned E = Operands.size(); I != E; ++I) {
8533 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8535 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8536 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8537 Op.isInterpAttrChan()) {
8538 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8539 } else if (Op.isImmModifier()) {
8540 OptionalIdx[Op.getImmTy()] = I;
8541 } else {
8542 llvm_unreachable("unhandled operand type");
8543 }
8544 }
8545
8546 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8547 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8548 AMDGPUOperand::ImmTyHigh);
8549
8550 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8551 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8552 AMDGPUOperand::ImmTyClampSI);
8553
8554 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8555 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8556 AMDGPUOperand::ImmTyOModSI);
8557}
8558
8559void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8560{
8561 OptionalImmIndexMap OptionalIdx;
8562 unsigned Opc = Inst.getOpcode();
8563
8564 unsigned I = 1;
8565 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8566 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8567 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8568 }
8569
8570 for (unsigned E = Operands.size(); I != E; ++I) {
8571 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8573 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8574 } else if (Op.isImmModifier()) {
8575 OptionalIdx[Op.getImmTy()] = I;
8576 } else {
8577 llvm_unreachable("unhandled operand type");
8578 }
8579 }
8580
8581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8582
8583 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8584 if (OpSelIdx != -1)
8585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8586
8587 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8588
8589 if (OpSelIdx == -1)
8590 return;
8591
8592 const int Ops[] = { AMDGPU::OpName::src0,
8593 AMDGPU::OpName::src1,
8594 AMDGPU::OpName::src2 };
8595 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8596 AMDGPU::OpName::src1_modifiers,
8597 AMDGPU::OpName::src2_modifiers };
8598
8599 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8600
8601 for (int J = 0; J < 3; ++J) {
8602 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8603 if (OpIdx == -1)
8604 break;
8605
8606 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8607 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8608
8609 if ((OpSel & (1 << J)) != 0)
8610 ModVal |= SISrcMods::OP_SEL_0;
8611 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8612 (OpSel & (1 << 3)) != 0)
8613 ModVal |= SISrcMods::DST_OP_SEL;
8614
8615 Inst.getOperand(ModIdx).setImm(ModVal);
8616 }
8617}
8618
8619void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8620 OptionalImmIndexMap &OptionalIdx) {
8621 unsigned Opc = Inst.getOpcode();
8622
8623 unsigned I = 1;
8624 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8625 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8626 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8627 }
8628
8629 for (unsigned E = Operands.size(); I != E; ++I) {
8630 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8632 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8633 } else if (Op.isImmModifier()) {
8634 OptionalIdx[Op.getImmTy()] = I;
8635 } else if (Op.isRegOrImm()) {
8636 Op.addRegOrImmOperands(Inst, 1);
8637 } else {
8638 llvm_unreachable("unhandled operand type");
8639 }
8640 }
8641
8642 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8643 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8644 AMDGPUOperand::ImmTyClampSI);
8645
8646 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8647 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8648 AMDGPUOperand::ImmTyOModSI);
8649
8650 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8651 // it has src2 register operand that is tied to dst operand
8652 // we don't allow modifiers for this operand in assembler so src2_modifiers
8653 // should be 0.
8654 if (isMAC(Opc)) {
8655 auto it = Inst.begin();
8656 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8657 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8658 ++it;
8659 // Copy the operand to ensure it's not invalidated when Inst grows.
8660 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8661 }
8662}
8663
8664void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8665 OptionalImmIndexMap OptionalIdx;
8666 cvtVOP3(Inst, Operands, OptionalIdx);
8667}
8668
8669void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8670 OptionalImmIndexMap &OptIdx) {
8671 const int Opc = Inst.getOpcode();
8672 const MCInstrDesc &Desc = MII.get(Opc);
8673
8674 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8675
8676 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8677 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8678 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
8679 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
8680 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8681 Inst.addOperand(Inst.getOperand(0));
8682 }
8683
8684 // Adding vdst_in operand is already covered for these DPP instructions in
8685 // cvtVOP3DPP.
8686 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8687 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8688 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8689 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8690 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
8691 assert(!IsPacked);
8692 Inst.addOperand(Inst.getOperand(0));
8693 }
8694
8695 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8696 // instruction, and then figure out where to actually put the modifiers
8697
8698 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8699 if (OpSelIdx != -1) {
8700 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8701 }
8702
8703 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8704 if (OpSelHiIdx != -1) {
8705 int DefaultVal = IsPacked ? -1 : 0;
8706 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8707 DefaultVal);
8708 }
8709
8710 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8711 if (NegLoIdx != -1)
8712 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8713
8714 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8715 if (NegHiIdx != -1)
8716 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8717
8718 const int Ops[] = { AMDGPU::OpName::src0,
8719 AMDGPU::OpName::src1,
8720 AMDGPU::OpName::src2 };
8721 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8722 AMDGPU::OpName::src1_modifiers,
8723 AMDGPU::OpName::src2_modifiers };
8724
8725 unsigned OpSel = 0;
8726 unsigned OpSelHi = 0;
8727 unsigned NegLo = 0;
8728 unsigned NegHi = 0;
8729
8730 if (OpSelIdx != -1)
8731 OpSel = Inst.getOperand(OpSelIdx).getImm();
8732
8733 if (OpSelHiIdx != -1)
8734 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8735
8736 if (NegLoIdx != -1)
8737 NegLo = Inst.getOperand(NegLoIdx).getImm();
8738
8739 if (NegHiIdx != -1)
8740 NegHi = Inst.getOperand(NegHiIdx).getImm();
8741
8742 for (int J = 0; J < 3; ++J) {
8743 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8744 if (OpIdx == -1)
8745 break;
8746
8747 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8748
8749 if (ModIdx == -1)
8750 continue;
8751
8752 uint32_t ModVal = 0;
8753
8754 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8755 if (SrcOp.isReg() && getMRI()
8756 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8757 .contains(SrcOp.getReg())) {
8758 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8759 if (VGPRSuffixIsHi)
8760 ModVal |= SISrcMods::OP_SEL_0;
8761 } else {
8762 if ((OpSel & (1 << J)) != 0)
8763 ModVal |= SISrcMods::OP_SEL_0;
8764 }
8765
8766 if ((OpSelHi & (1 << J)) != 0)
8767 ModVal |= SISrcMods::OP_SEL_1;
8768
8769 if ((NegLo & (1 << J)) != 0)
8770 ModVal |= SISrcMods::NEG;
8771
8772 if ((NegHi & (1 << J)) != 0)
8773 ModVal |= SISrcMods::NEG_HI;
8774
8775 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8776 }
8777}
8778
8779void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8780 OptionalImmIndexMap OptIdx;
8781 cvtVOP3(Inst, Operands, OptIdx);
8782 cvtVOP3P(Inst, Operands, OptIdx);
8783}
8784
8786 unsigned i, unsigned Opc, unsigned OpName) {
8787 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8788 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8789 else
8790 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8791}
8792
8793void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8794 unsigned Opc = Inst.getOpcode();
8795
8796 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8797 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8798 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8799 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8800 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8801
8802 OptionalImmIndexMap OptIdx;
8803 for (unsigned i = 5; i < Operands.size(); ++i) {
8804 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8805 OptIdx[Op.getImmTy()] = i;
8806 }
8807
8808 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8809 addOptionalImmOperand(Inst, Operands, OptIdx,
8810 AMDGPUOperand::ImmTyIndexKey8bit);
8811
8812 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8813 addOptionalImmOperand(Inst, Operands, OptIdx,
8814 AMDGPUOperand::ImmTyIndexKey16bit);
8815
8816 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8817 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8818
8819 cvtVOP3P(Inst, Operands, OptIdx);
8820}
8821
8822//===----------------------------------------------------------------------===//
8823// VOPD
8824//===----------------------------------------------------------------------===//
8825
8826ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8827 if (!hasVOPD(getSTI()))
8828 return ParseStatus::NoMatch;
8829
8830 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8831 SMLoc S = getLoc();
8832 lex();
8833 lex();
8834 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8835 SMLoc OpYLoc = getLoc();
8836 StringRef OpYName;
8837 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8838 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8839 return ParseStatus::Success;
8840 }
8841 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8842 }
8843 return ParseStatus::NoMatch;
8844}
8845
8846// Create VOPD MCInst operands using parsed assembler operands.
8847void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8848 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8849 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8850 if (Op.isReg()) {
8851 Op.addRegOperands(Inst, 1);
8852 return;
8853 }
8854 if (Op.isImm()) {
8855 Op.addImmOperands(Inst, 1);
8856 return;
8857 }
8858 llvm_unreachable("Unhandled operand type in cvtVOPD");
8859 };
8860
8861 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8862
8863 // MCInst operands are ordered as follows:
8864 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8865
8866 for (auto CompIdx : VOPD::COMPONENTS) {
8867 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8868 }
8869
8870 for (auto CompIdx : VOPD::COMPONENTS) {
8871 const auto &CInfo = InstInfo[CompIdx];
8872 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8873 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8874 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8875 if (CInfo.hasSrc2Acc())
8876 addOp(CInfo.getIndexOfDstInParsedOperands());
8877 }
8878}
8879
8880//===----------------------------------------------------------------------===//
8881// dpp
8882//===----------------------------------------------------------------------===//
8883
8884bool AMDGPUOperand::isDPP8() const {
8885 return isImmTy(ImmTyDPP8);
8886}
8887
8888bool AMDGPUOperand::isDPPCtrl() const {
8889 using namespace AMDGPU::DPP;
8890
8891 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8892 if (result) {
8893 int64_t Imm = getImm();
8894 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8895 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8896 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8897 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8898 (Imm == DppCtrl::WAVE_SHL1) ||
8899 (Imm == DppCtrl::WAVE_ROL1) ||
8900 (Imm == DppCtrl::WAVE_SHR1) ||
8901 (Imm == DppCtrl::WAVE_ROR1) ||
8902 (Imm == DppCtrl::ROW_MIRROR) ||
8903 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8904 (Imm == DppCtrl::BCAST15) ||
8905 (Imm == DppCtrl::BCAST31) ||
8906 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8907 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8908 }
8909 return false;
8910}
8911
8912//===----------------------------------------------------------------------===//
8913// mAI
8914//===----------------------------------------------------------------------===//
8915
8916bool AMDGPUOperand::isBLGP() const {
8917 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8918}
8919
8920bool AMDGPUOperand::isCBSZ() const {
8921 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8922}
8923
8924bool AMDGPUOperand::isABID() const {
8925 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8926}
8927
8928bool AMDGPUOperand::isS16Imm() const {
8929 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8930}
8931
8932bool AMDGPUOperand::isU16Imm() const {
8933 return isImmLiteral() && isUInt<16>(getImm());
8934}
8935
8936//===----------------------------------------------------------------------===//
8937// dim
8938//===----------------------------------------------------------------------===//
8939
8940bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8941 // We want to allow "dim:1D" etc.,
8942 // but the initial 1 is tokenized as an integer.
8943 std::string Token;
8944 if (isToken(AsmToken::Integer)) {
8945 SMLoc Loc = getToken().getEndLoc();
8946 Token = std::string(getTokenStr());
8947 lex();
8948 if (getLoc() != Loc)
8949 return false;
8950 }
8951
8952 StringRef Suffix;
8953 if (!parseId(Suffix))
8954 return false;
8955 Token += Suffix;
8956
8957 StringRef DimId = Token;
8958 if (DimId.starts_with("SQ_RSRC_IMG_"))
8959 DimId = DimId.drop_front(12);
8960
8962 if (!DimInfo)
8963 return false;
8964
8965 Encoding = DimInfo->Encoding;
8966 return true;
8967}
8968
8969ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8970 if (!isGFX10Plus())
8971 return ParseStatus::NoMatch;
8972
8973 SMLoc S = getLoc();
8974
8975 if (!trySkipId("dim", AsmToken::Colon))
8976 return ParseStatus::NoMatch;
8977
8978 unsigned Encoding;
8979 SMLoc Loc = getLoc();
8980 if (!parseDimId(Encoding))
8981 return Error(Loc, "invalid dim value");
8982
8983 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8984 AMDGPUOperand::ImmTyDim));
8985 return ParseStatus::Success;
8986}
8987
8988//===----------------------------------------------------------------------===//
8989// dpp
8990//===----------------------------------------------------------------------===//
8991
8992ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8993 SMLoc S = getLoc();
8994
8995 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8996 return ParseStatus::NoMatch;
8997
8998 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8999
9000 int64_t Sels[8];
9001
9002 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9003 return ParseStatus::Failure;
9004
9005 for (size_t i = 0; i < 8; ++i) {
9006 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9007 return ParseStatus::Failure;
9008
9009 SMLoc Loc = getLoc();
9010 if (getParser().parseAbsoluteExpression(Sels[i]))
9011 return ParseStatus::Failure;
9012 if (0 > Sels[i] || 7 < Sels[i])
9013 return Error(Loc, "expected a 3-bit value");
9014 }
9015
9016 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9017 return ParseStatus::Failure;
9018
9019 unsigned DPP8 = 0;
9020 for (size_t i = 0; i < 8; ++i)
9021 DPP8 |= (Sels[i] << (i * 3));
9022
9023 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9024 return ParseStatus::Success;
9025}
9026
9027bool
9028AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9029 const OperandVector &Operands) {
9030 if (Ctrl == "row_newbcast")
9031 return isGFX90A();
9032
9033 if (Ctrl == "row_share" ||
9034 Ctrl == "row_xmask")
9035 return isGFX10Plus();
9036
9037 if (Ctrl == "wave_shl" ||
9038 Ctrl == "wave_shr" ||
9039 Ctrl == "wave_rol" ||
9040 Ctrl == "wave_ror" ||
9041 Ctrl == "row_bcast")
9042 return isVI() || isGFX9();
9043
9044 return Ctrl == "row_mirror" ||
9045 Ctrl == "row_half_mirror" ||
9046 Ctrl == "quad_perm" ||
9047 Ctrl == "row_shl" ||
9048 Ctrl == "row_shr" ||
9049 Ctrl == "row_ror";
9050}
9051
9052int64_t
9053AMDGPUAsmParser::parseDPPCtrlPerm() {
9054 // quad_perm:[%d,%d,%d,%d]
9055
9056 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9057 return -1;
9058
9059 int64_t Val = 0;
9060 for (int i = 0; i < 4; ++i) {
9061 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9062 return -1;
9063
9064 int64_t Temp;
9065 SMLoc Loc = getLoc();
9066 if (getParser().parseAbsoluteExpression(Temp))
9067 return -1;
9068 if (Temp < 0 || Temp > 3) {
9069 Error(Loc, "expected a 2-bit value");
9070 return -1;
9071 }
9072
9073 Val += (Temp << i * 2);
9074 }
9075
9076 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9077 return -1;
9078
9079 return Val;
9080}
9081
9082int64_t
9083AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9084 using namespace AMDGPU::DPP;
9085
9086 // sel:%d
9087
9088 int64_t Val;
9089 SMLoc Loc = getLoc();
9090
9091 if (getParser().parseAbsoluteExpression(Val))
9092 return -1;
9093
9094 struct DppCtrlCheck {
9095 int64_t Ctrl;
9096 int Lo;
9097 int Hi;
9098 };
9099
9100 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9101 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9102 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9103 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9104 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9105 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9106 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9107 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9108 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9109 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9110 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9111 .Default({-1, 0, 0});
9112
9113 bool Valid;
9114 if (Check.Ctrl == -1) {
9115 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9116 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9117 } else {
9118 Valid = Check.Lo <= Val && Val <= Check.Hi;
9119 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9120 }
9121
9122 if (!Valid) {
9123 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9124 return -1;
9125 }
9126
9127 return Val;
9128}
9129
9130ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9131 using namespace AMDGPU::DPP;
9132
9133 if (!isToken(AsmToken::Identifier) ||
9134 !isSupportedDPPCtrl(getTokenStr(), Operands))
9135 return ParseStatus::NoMatch;
9136
9137 SMLoc S = getLoc();
9138 int64_t Val = -1;
9140
9141 parseId(Ctrl);
9142
9143 if (Ctrl == "row_mirror") {
9144 Val = DppCtrl::ROW_MIRROR;
9145 } else if (Ctrl == "row_half_mirror") {
9146 Val = DppCtrl::ROW_HALF_MIRROR;
9147 } else {
9148 if (skipToken(AsmToken::Colon, "expected a colon")) {
9149 if (Ctrl == "quad_perm") {
9150 Val = parseDPPCtrlPerm();
9151 } else {
9152 Val = parseDPPCtrlSel(Ctrl);
9153 }
9154 }
9155 }
9156
9157 if (Val == -1)
9158 return ParseStatus::Failure;
9159
9160 Operands.push_back(
9161 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9162 return ParseStatus::Success;
9163}
9164
9165void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9166 bool IsDPP8) {
9167 OptionalImmIndexMap OptionalIdx;
9168 unsigned Opc = Inst.getOpcode();
9169 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9170
9171 // MAC instructions are special because they have 'old'
9172 // operand which is not tied to dst (but assumed to be).
9173 // They also have dummy unused src2_modifiers.
9174 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9175 int Src2ModIdx =
9176 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9177 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9178 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9179
9180 unsigned I = 1;
9181 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9182 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9183 }
9184
9185 int Fi = 0;
9186 for (unsigned E = Operands.size(); I != E; ++I) {
9187
9188 if (IsMAC) {
9189 int NumOperands = Inst.getNumOperands();
9190 if (OldIdx == NumOperands) {
9191 // Handle old operand
9192 constexpr int DST_IDX = 0;
9193 Inst.addOperand(Inst.getOperand(DST_IDX));
9194 } else if (Src2ModIdx == NumOperands) {
9195 // Add unused dummy src2_modifiers
9197 }
9198 }
9199
9200 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9201 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9202 Inst.addOperand(Inst.getOperand(0));
9203 }
9204
9205 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
9206 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
9207 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
9208 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
9209 if (IsVOP3CvtSrDpp) {
9210 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9213 }
9214 }
9215
9216 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9218 if (TiedTo != -1) {
9219 assert((unsigned)TiedTo < Inst.getNumOperands());
9220 // handle tied old or src2 for MAC instructions
9221 Inst.addOperand(Inst.getOperand(TiedTo));
9222 }
9223 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9224 // Add the register arguments
9225 if (IsDPP8 && Op.isDppFI()) {
9226 Fi = Op.getImm();
9227 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9228 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9229 } else if (Op.isReg()) {
9230 Op.addRegOperands(Inst, 1);
9231 } else if (Op.isImm() &&
9232 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9233 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9234 Op.addImmOperands(Inst, 1);
9235 } else if (Op.isImm()) {
9236 OptionalIdx[Op.getImmTy()] = I;
9237 } else {
9238 llvm_unreachable("unhandled operand type");
9239 }
9240 }
9241 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
9243
9244 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9246
9247 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9248 cvtVOP3P(Inst, Operands, OptionalIdx);
9249 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9250 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9251 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9252 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9253 }
9254
9255 if (IsDPP8) {
9256 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9257 using namespace llvm::AMDGPU::DPP;
9258 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9259 } else {
9260 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9264
9265 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9266 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9267 AMDGPUOperand::ImmTyDppFI);
9268 }
9269}
9270
9271void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9272 OptionalImmIndexMap OptionalIdx;
9273
9274 unsigned I = 1;
9275 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9276 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9277 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9278 }
9279
9280 int Fi = 0;
9281 for (unsigned E = Operands.size(); I != E; ++I) {
9282 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9284 if (TiedTo != -1) {
9285 assert((unsigned)TiedTo < Inst.getNumOperands());
9286 // handle tied old or src2 for MAC instructions
9287 Inst.addOperand(Inst.getOperand(TiedTo));
9288 }
9289 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9290 // Add the register arguments
9291 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9292 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9293 // Skip it.
9294 continue;
9295 }
9296
9297 if (IsDPP8) {
9298 if (Op.isDPP8()) {
9299 Op.addImmOperands(Inst, 1);
9300 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9301 Op.addRegWithFPInputModsOperands(Inst, 2);
9302 } else if (Op.isDppFI()) {
9303 Fi = Op.getImm();
9304 } else if (Op.isReg()) {
9305 Op.addRegOperands(Inst, 1);
9306 } else {
9307 llvm_unreachable("Invalid operand type");
9308 }
9309 } else {
9311 Op.addRegWithFPInputModsOperands(Inst, 2);
9312 } else if (Op.isReg()) {
9313 Op.addRegOperands(Inst, 1);
9314 } else if (Op.isDPPCtrl()) {
9315 Op.addImmOperands(Inst, 1);
9316 } else if (Op.isImm()) {
9317 // Handle optional arguments
9318 OptionalIdx[Op.getImmTy()] = I;
9319 } else {
9320 llvm_unreachable("Invalid operand type");
9321 }
9322 }
9323 }
9324
9325 if (IsDPP8) {
9326 using namespace llvm::AMDGPU::DPP;
9327 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9328 } else {
9329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9332 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9333 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9334 AMDGPUOperand::ImmTyDppFI);
9335 }
9336 }
9337}
9338
9339//===----------------------------------------------------------------------===//
9340// sdwa
9341//===----------------------------------------------------------------------===//
9342
9343ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9344 StringRef Prefix,
9345 AMDGPUOperand::ImmTy Type) {
9346 using namespace llvm::AMDGPU::SDWA;
9347
9348 SMLoc S = getLoc();
9350
9351 SMLoc StringLoc;
9352 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9353 if (!Res.isSuccess())
9354 return Res;
9355
9356 int64_t Int;
9358 .Case("BYTE_0", SdwaSel::BYTE_0)
9359 .Case("BYTE_1", SdwaSel::BYTE_1)
9360 .Case("BYTE_2", SdwaSel::BYTE_2)
9361 .Case("BYTE_3", SdwaSel::BYTE_3)
9362 .Case("WORD_0", SdwaSel::WORD_0)
9363 .Case("WORD_1", SdwaSel::WORD_1)
9364 .Case("DWORD", SdwaSel::DWORD)
9365 .Default(0xffffffff);
9366
9367 if (Int == 0xffffffff)
9368 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9369
9370 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9371 return ParseStatus::Success;
9372}
9373
9374ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9375 using namespace llvm::AMDGPU::SDWA;
9376
9377 SMLoc S = getLoc();
9379
9380 SMLoc StringLoc;
9381 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9382 if (!Res.isSuccess())
9383 return Res;
9384
9385 int64_t Int;
9387 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9388 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9389 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9390 .Default(0xffffffff);
9391
9392 if (Int == 0xffffffff)
9393 return Error(StringLoc, "invalid dst_unused value");
9394
9395 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9396 return ParseStatus::Success;
9397}
9398
9399void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9400 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9401}
9402
9403void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9404 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9405}
9406
9407void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9408 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9409}
9410
9411void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9412 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9413}
9414
9415void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9416 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9417}
9418
9419void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9420 uint64_t BasicInstType,
9421 bool SkipDstVcc,
9422 bool SkipSrcVcc) {
9423 using namespace llvm::AMDGPU::SDWA;
9424
9425 OptionalImmIndexMap OptionalIdx;
9426 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9427 bool SkippedVcc = false;
9428
9429 unsigned I = 1;
9430 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9431 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9432 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9433 }
9434
9435 for (unsigned E = Operands.size(); I != E; ++I) {
9436 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9437 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9438 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9439 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9440 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9441 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9442 // Skip VCC only if we didn't skip it on previous iteration.
9443 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9444 if (BasicInstType == SIInstrFlags::VOP2 &&
9445 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9446 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9447 SkippedVcc = true;
9448 continue;
9449 } else if (BasicInstType == SIInstrFlags::VOPC &&
9450 Inst.getNumOperands() == 0) {
9451 SkippedVcc = true;
9452 continue;
9453 }
9454 }
9456 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9457 } else if (Op.isImm()) {
9458 // Handle optional arguments
9459 OptionalIdx[Op.getImmTy()] = I;
9460 } else {
9461 llvm_unreachable("Invalid operand type");
9462 }
9463 SkippedVcc = false;
9464 }
9465
9466 const unsigned Opc = Inst.getOpcode();
9467 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9468 Opc != AMDGPU::V_NOP_sdwa_vi) {
9469 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9470 switch (BasicInstType) {
9471 case SIInstrFlags::VOP1:
9472 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9473 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9474 AMDGPUOperand::ImmTyClampSI, 0);
9475
9476 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9477 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9478 AMDGPUOperand::ImmTyOModSI, 0);
9479
9480 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9481 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9482 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9483
9484 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9485 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9486 AMDGPUOperand::ImmTySDWADstUnused,
9487 DstUnused::UNUSED_PRESERVE);
9488
9489 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9490 break;
9491
9492 case SIInstrFlags::VOP2:
9493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9494
9495 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9496 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9497
9498 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9500 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9502 break;
9503
9504 case SIInstrFlags::VOPC:
9505 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9507 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9509 break;
9510
9511 default:
9512 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9513 }
9514 }
9515
9516 // special case v_mac_{f16, f32}:
9517 // it has src2 register operand that is tied to dst operand
9518 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9519 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9520 auto it = Inst.begin();
9521 std::advance(
9522 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9523 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9524 }
9525}
9526
9527/// Force static initialization.
9531}
9532
9533#define GET_REGISTER_MATCHER
9534#define GET_MATCHER_IMPLEMENTATION
9535#define GET_MNEMONIC_SPELL_CHECKER
9536#define GET_MNEMONIC_CHECKER
9537#include "AMDGPUGenAsmMatcher.inc"
9538
9539ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9540 unsigned MCK) {
9541 switch (MCK) {
9542 case MCK_addr64:
9543 return parseTokenOp("addr64", Operands);
9544 case MCK_done:
9545 return parseTokenOp("done", Operands);
9546 case MCK_idxen:
9547 return parseTokenOp("idxen", Operands);
9548 case MCK_lds:
9549 return parseTokenOp("lds", Operands);
9550 case MCK_offen:
9551 return parseTokenOp("offen", Operands);
9552 case MCK_off:
9553 return parseTokenOp("off", Operands);
9554 case MCK_row_95_en:
9555 return parseTokenOp("row_en", Operands);
9556 case MCK_gds:
9557 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9558 case MCK_tfe:
9559 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9560 }
9561 return tryCustomParseOperand(Operands, MCK);
9562}
9563
9564// This function should be defined after auto-generated include so that we have
9565// MatchClassKind enum defined
9566unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9567 unsigned Kind) {
9568 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9569 // But MatchInstructionImpl() expects to meet token and fails to validate
9570 // operand. This method checks if we are given immediate operand but expect to
9571 // get corresponding token.
9572 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9573 switch (Kind) {
9574 case MCK_addr64:
9575 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9576 case MCK_gds:
9577 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9578 case MCK_lds:
9579 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9580 case MCK_idxen:
9581 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9582 case MCK_offen:
9583 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9584 case MCK_tfe:
9585 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9586 case MCK_SSrc_b32:
9587 // When operands have expression values, they will return true for isToken,
9588 // because it is not possible to distinguish between a token and an
9589 // expression at parse time. MatchInstructionImpl() will always try to
9590 // match an operand as a token, when isToken returns true, and when the
9591 // name of the expression is not a valid token, the match will fail,
9592 // so we need to handle it here.
9593 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9594 case MCK_SSrc_f32:
9595 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9596 case MCK_SOPPBrTarget:
9597 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9598 case MCK_VReg32OrOff:
9599 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9600 case MCK_InterpSlot:
9601 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9602 case MCK_InterpAttr:
9603 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9604 case MCK_InterpAttrChan:
9605 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9606 case MCK_SReg_64:
9607 case MCK_SReg_64_XEXEC:
9608 // Null is defined as a 32-bit register but
9609 // it should also be enabled with 64-bit operands.
9610 // The following code enables it for SReg_64 operands
9611 // used as source and destination. Remaining source
9612 // operands are handled in isInlinableImm.
9613 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9614 default:
9615 return Match_InvalidOperand;
9616 }
9617}
9618
9619//===----------------------------------------------------------------------===//
9620// endpgm
9621//===----------------------------------------------------------------------===//
9622
9623ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9624 SMLoc S = getLoc();
9625 int64_t Imm = 0;
9626
9627 if (!parseExpr(Imm)) {
9628 // The operand is optional, if not present default to 0
9629 Imm = 0;
9630 }
9631
9632 if (!isUInt<16>(Imm))
9633 return Error(S, "expected a 16-bit value");
9634
9635 Operands.push_back(
9636 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9637 return ParseStatus::Success;
9638}
9639
9640bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9641
9642//===----------------------------------------------------------------------===//
9643// LDSDIR
9644//===----------------------------------------------------------------------===//
9645
9646bool AMDGPUOperand::isWaitVDST() const {
9647 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9648}
9649
9650bool AMDGPUOperand::isWaitVAVDst() const {
9651 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9652}
9653
9654bool AMDGPUOperand::isWaitVMVSrc() const {
9655 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9656}
9657
9658//===----------------------------------------------------------------------===//
9659// VINTERP
9660//===----------------------------------------------------------------------===//
9661
9662bool AMDGPUOperand::isWaitEXP() const {
9663 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9664}
9665
9666//===----------------------------------------------------------------------===//
9667// Split Barrier
9668//===----------------------------------------------------------------------===//
9669
9670bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
LLVMContext & Context
#define P(N)
#define G_00B848_FWD_PROGRESS(x)
Definition: SIDefines.h:1157
#define G_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1154
#define G_00B848_IEEE_MODE(x)
Definition: SIDefines.h:1148
#define G_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:1139
#define G_00B848_WGP_MODE(x)
Definition: SIDefines.h:1151
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:352
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:76
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:200
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:452
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:304
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:93
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:849
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:651
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:605
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:271
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1336
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
const uint64_t Version
Definition: InstrProf.h:1047
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1071
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:228
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:136
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:174
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:141
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:233
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
#define N
RegisterKind Kind
StringLiteral Name
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:125
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...