LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyIndexKey32bit,
160 ImmTyDPP8,
161 ImmTyDppCtrl,
162 ImmTyDppRowMask,
163 ImmTyDppBankMask,
164 ImmTyDppBoundCtrl,
165 ImmTyDppFI,
166 ImmTySwizzle,
167 ImmTyGprIdxMode,
168 ImmTyHigh,
169 ImmTyBLGP,
170 ImmTyCBSZ,
171 ImmTyABID,
172 ImmTyEndpgm,
173 ImmTyWaitVDST,
174 ImmTyWaitEXP,
175 ImmTyWaitVAVDst,
176 ImmTyWaitVMVSrc,
177 ImmTyBitOp3,
178 ImmTyMatrixAFMT,
179 ImmTyMatrixBFMT,
180 ImmTyMatrixAScale,
181 ImmTyMatrixBScale,
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
184 ImmTyMatrixAReuse,
185 ImmTyMatrixBReuse,
186 ImmTyScaleSel,
187 ImmTyByteSel,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 MCRegister RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 // The index of the associated MCInst operand.
216 mutable int MCOpIdx = -1;
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 bool isInlinableImm(MVT type) const;
230 bool isLiteralImm(MVT type) const;
231
232 bool isRegKind() const {
233 return Kind == Register;
234 }
235
236 bool isReg() const override {
237 return isRegKind() && !hasModifiers();
238 }
239
240 bool isRegOrInline(unsigned RCID, MVT type) const {
241 return isRegClass(RCID) || isInlinableImm(type);
242 }
243
244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
246 }
247
248 bool isRegOrImmWithInt16InputMods() const {
249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250 }
251
252 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrInlineImmWithInt16InputMods() const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
263 }
264
265 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
268 }
269
270 bool isRegOrInlineImmWithInt32InputMods() const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
272 }
273
274 bool isRegOrImmWithInt64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
276 }
277
278 bool isRegOrImmWithFP16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
280 }
281
282 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
285 }
286
287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
295 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
298 }
299
300 bool isRegOrInlineImmWithFP32InputMods() const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
302 }
303
304 bool isRegOrInlineImmWithFP64InputMods() const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
306 }
307
308 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
309
310 bool isVRegWithFP32InputMods() const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
312 }
313
314 bool isVRegWithFP64InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
316 }
317
318 bool isPackedFP16InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
320 }
321
322 bool isPackedVGPRFP32InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
324 }
325
326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isVRegWithInputMods() const;
351 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
352 template <bool IsFake16> bool isT16VRegWithInputMods() const;
353
354 bool isSDWAOperand(MVT type) const;
355 bool isSDWAFP16Operand() const;
356 bool isSDWAFP32Operand() const;
357 bool isSDWAInt16Operand() const;
358 bool isSDWAInt32Operand() const;
359
360 bool isImmTy(ImmTy ImmT) const {
361 return isImm() && Imm.Type == ImmT;
362 }
363
364 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
365
366 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
367
368 bool isImmModifier() const {
369 return isImm() && Imm.Type != ImmTyNone;
370 }
371
372 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
373 bool isDim() const { return isImmTy(ImmTyDim); }
374 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
375 bool isOff() const { return isImmTy(ImmTyOff); }
376 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
377 bool isOffen() const { return isImmTy(ImmTyOffen); }
378 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
379 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
380 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
381 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
382 bool isGDS() const { return isImmTy(ImmTyGDS); }
383 bool isLDS() const { return isImmTy(ImmTyLDS); }
384 bool isCPol() const { return isImmTy(ImmTyCPol); }
385 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
386 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
387 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
388 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
389 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
390 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
391 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
392 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
393 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
394 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
395 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
396 bool isTFE() const { return isImmTy(ImmTyTFE); }
397 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
399 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
400 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
401 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
402 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
403 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
404 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
405 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
406 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
407 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
408 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
409 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
410 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
411
412 bool isRegOrImm() const {
413 return isReg() || isImm();
414 }
415
416 bool isRegClass(unsigned RCID) const;
417
418 bool isInlineValue() const;
419
420 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
421 return isRegOrInline(RCID, type) && !hasModifiers();
422 }
423
424 bool isSCSrcB16() const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
426 }
427
428 bool isSCSrcV2B16() const {
429 return isSCSrcB16();
430 }
431
432 bool isSCSrc_b32() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
434 }
435
436 bool isSCSrc_b64() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
438 }
439
440 bool isBoolReg() const;
441
442 bool isSCSrcF16() const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
444 }
445
446 bool isSCSrcV2F16() const {
447 return isSCSrcF16();
448 }
449
450 bool isSCSrcF32() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
452 }
453
454 bool isSCSrcF64() const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
456 }
457
458 bool isSSrc_b32() const {
459 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
460 }
461
462 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
463
464 bool isSSrcV2B16() const {
465 llvm_unreachable("cannot happen");
466 return isSSrc_b16();
467 }
468
469 bool isSSrc_b64() const {
470 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
471 // See isVSrc64().
472 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
473 (((const MCTargetAsmParser *)AsmParser)
474 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
475 isExpr());
476 }
477
478 bool isSSrc_f32() const {
479 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
480 }
481
482 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
483
484 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
485
486 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
487
488 bool isSSrcV2F16() const {
489 llvm_unreachable("cannot happen");
490 return isSSrc_f16();
491 }
492
493 bool isSSrcV2FP32() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f32();
496 }
497
498 bool isSCSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSCSrcF32();
501 }
502
503 bool isSSrcV2INT32() const {
504 llvm_unreachable("cannot happen");
505 return isSSrc_b32();
506 }
507
508 bool isSCSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSCSrc_b32();
511 }
512
513 bool isSSrcOrLds_b32() const {
514 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
515 isLiteralImm(MVT::i32) || isExpr();
516 }
517
518 bool isVCSrc_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
520 }
521
522 bool isVCSrc_b32_Lo256() const {
523 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
524 }
525
526 bool isVCSrc_b64_Lo256() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
528 }
529
530 bool isVCSrc_b64() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
532 }
533
534 bool isVCSrcT_b16() const {
535 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
536 }
537
538 bool isVCSrcTB16_Lo128() const {
539 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
540 }
541
542 bool isVCSrcFake16B16_Lo128() const {
543 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
544 }
545
546 bool isVCSrc_b16() const {
547 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
548 }
549
550 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
551
552 bool isVCSrc_f32() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
554 }
555
556 bool isVCSrc_f64() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
558 }
559
560 bool isVCSrcTBF16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
562 }
563
564 bool isVCSrcT_f16() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
566 }
567
568 bool isVCSrcT_bf16() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
570 }
571
572 bool isVCSrcTBF16_Lo128() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
574 }
575
576 bool isVCSrcTF16_Lo128() const {
577 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
578 }
579
580 bool isVCSrcFake16BF16_Lo128() const {
581 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
582 }
583
584 bool isVCSrcFake16F16_Lo128() const {
585 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
586 }
587
588 bool isVCSrc_bf16() const {
589 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
590 }
591
592 bool isVCSrc_f16() const {
593 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
594 }
595
596 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
597
598 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
599
600 bool isVSrc_b32() const {
601 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
602 }
603
604 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
605
606 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
607
608 bool isVSrcT_b16_Lo128() const {
609 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
610 }
611
612 bool isVSrcFake16_b16_Lo128() const {
613 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
614 }
615
616 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
617
618 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
619
620 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
621
622 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
623
624 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
625
626 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
627
628 bool isVSrc_f32() const {
629 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
630 }
631
632 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
633
634 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
635
636 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
637
638 bool isVSrcT_bf16_Lo128() const {
639 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
640 }
641
642 bool isVSrcT_f16_Lo128() const {
643 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
644 }
645
646 bool isVSrcFake16_bf16_Lo128() const {
647 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
648 }
649
650 bool isVSrcFake16_f16_Lo128() const {
651 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
652 }
653
654 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
655
656 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
657
658 bool isVSrc_v2bf16() const {
659 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
660 }
661
662 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
663
664 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
665
666 bool isVISrcB32() const {
667 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
668 }
669
670 bool isVISrcB16() const {
671 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
672 }
673
674 bool isVISrcV2B16() const {
675 return isVISrcB16();
676 }
677
678 bool isVISrcF32() const {
679 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
680 }
681
682 bool isVISrcF16() const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
684 }
685
686 bool isVISrcV2F16() const {
687 return isVISrcF16() || isVISrcB32();
688 }
689
690 bool isVISrc_64_bf16() const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
692 }
693
694 bool isVISrc_64_f16() const {
695 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
696 }
697
698 bool isVISrc_64_b32() const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
700 }
701
702 bool isVISrc_64B64() const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
704 }
705
706 bool isVISrc_64_f64() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
708 }
709
710 bool isVISrc_64V2FP32() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
712 }
713
714 bool isVISrc_64V2INT32() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
716 }
717
718 bool isVISrc_256_b32() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
720 }
721
722 bool isVISrc_256_f32() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
724 }
725
726 bool isVISrc_256B64() const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
728 }
729
730 bool isVISrc_256_f64() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
732 }
733
734 bool isVISrc_512_f64() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
736 }
737
738 bool isVISrc_128B16() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
740 }
741
742 bool isVISrc_128V2B16() const {
743 return isVISrc_128B16();
744 }
745
746 bool isVISrc_128_b32() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
748 }
749
750 bool isVISrc_128_f32() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
752 }
753
754 bool isVISrc_256V2FP32() const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
756 }
757
758 bool isVISrc_256V2INT32() const {
759 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
760 }
761
762 bool isVISrc_512_b32() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
764 }
765
766 bool isVISrc_512B16() const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
768 }
769
770 bool isVISrc_512V2B16() const {
771 return isVISrc_512B16();
772 }
773
774 bool isVISrc_512_f32() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
776 }
777
778 bool isVISrc_512F16() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
780 }
781
782 bool isVISrc_512V2F16() const {
783 return isVISrc_512F16() || isVISrc_512_b32();
784 }
785
786 bool isVISrc_1024_b32() const {
787 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
788 }
789
790 bool isVISrc_1024B16() const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
792 }
793
794 bool isVISrc_1024V2B16() const {
795 return isVISrc_1024B16();
796 }
797
798 bool isVISrc_1024_f32() const {
799 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
800 }
801
802 bool isVISrc_1024F16() const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
804 }
805
806 bool isVISrc_1024V2F16() const {
807 return isVISrc_1024F16() || isVISrc_1024_b32();
808 }
809
810 bool isAISrcB32() const {
811 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
812 }
813
814 bool isAISrcB16() const {
815 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
816 }
817
818 bool isAISrcV2B16() const {
819 return isAISrcB16();
820 }
821
822 bool isAISrcF32() const {
823 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
824 }
825
826 bool isAISrcF16() const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
828 }
829
830 bool isAISrcV2F16() const {
831 return isAISrcF16() || isAISrcB32();
832 }
833
834 bool isAISrc_64B64() const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
836 }
837
838 bool isAISrc_64_f64() const {
839 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
840 }
841
842 bool isAISrc_128_b32() const {
843 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
844 }
845
846 bool isAISrc_128B16() const {
847 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
848 }
849
850 bool isAISrc_128V2B16() const {
851 return isAISrc_128B16();
852 }
853
854 bool isAISrc_128_f32() const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
856 }
857
858 bool isAISrc_128F16() const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
860 }
861
862 bool isAISrc_128V2F16() const {
863 return isAISrc_128F16() || isAISrc_128_b32();
864 }
865
866 bool isVISrc_128_bf16() const {
867 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
868 }
869
870 bool isVISrc_128_f16() const {
871 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
872 }
873
874 bool isVISrc_128V2F16() const {
875 return isVISrc_128_f16() || isVISrc_128_b32();
876 }
877
878 bool isAISrc_256B64() const {
879 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
880 }
881
882 bool isAISrc_256_f64() const {
883 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
884 }
885
886 bool isAISrc_512_b32() const {
887 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
888 }
889
890 bool isAISrc_512B16() const {
891 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
892 }
893
894 bool isAISrc_512V2B16() const {
895 return isAISrc_512B16();
896 }
897
898 bool isAISrc_512_f32() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
900 }
901
902 bool isAISrc_512F16() const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
904 }
905
906 bool isAISrc_512V2F16() const {
907 return isAISrc_512F16() || isAISrc_512_b32();
908 }
909
910 bool isAISrc_1024_b32() const {
911 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
912 }
913
914 bool isAISrc_1024B16() const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
916 }
917
918 bool isAISrc_1024V2B16() const {
919 return isAISrc_1024B16();
920 }
921
922 bool isAISrc_1024_f32() const {
923 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
924 }
925
926 bool isAISrc_1024F16() const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
928 }
929
930 bool isAISrc_1024V2F16() const {
931 return isAISrc_1024F16() || isAISrc_1024_b32();
932 }
933
934 bool isKImmFP32() const {
935 return isLiteralImm(MVT::f32);
936 }
937
938 bool isKImmFP16() const {
939 return isLiteralImm(MVT::f16);
940 }
941
942 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
943
944 bool isMem() const override {
945 return false;
946 }
947
948 bool isExpr() const {
949 return Kind == Expression;
950 }
951
952 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
953
954 bool isSWaitCnt() const;
955 bool isDepCtr() const;
956 bool isSDelayALU() const;
957 bool isHwreg() const;
958 bool isSendMsg() const;
959 bool isSplitBarrier() const;
960 bool isSwizzle() const;
961 bool isSMRDOffset8() const;
962 bool isSMEMOffset() const;
963 bool isSMRDLiteralOffset() const;
964 bool isDPP8() const;
965 bool isDPPCtrl() const;
966 bool isBLGP() const;
967 bool isGPRIdxMode() const;
968 bool isS16Imm() const;
969 bool isU16Imm() const;
970 bool isEndpgm() const;
971
972 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
973 return [this, P]() { return P(*this); };
974 }
975
976 StringRef getToken() const {
977 assert(isToken());
978 return StringRef(Tok.Data, Tok.Length);
979 }
980
981 int64_t getImm() const {
982 assert(isImm());
983 return Imm.Val;
984 }
985
986 void setImm(int64_t Val) {
987 assert(isImm());
988 Imm.Val = Val;
989 }
990
991 ImmTy getImmTy() const {
992 assert(isImm());
993 return Imm.Type;
994 }
995
996 MCRegister getReg() const override {
997 assert(isRegKind());
998 return Reg.RegNo;
999 }
1000
1001 SMLoc getStartLoc() const override {
1002 return StartLoc;
1003 }
1004
1005 SMLoc getEndLoc() const override {
1006 return EndLoc;
1007 }
1008
1009 SMRange getLocRange() const {
1010 return SMRange(StartLoc, EndLoc);
1011 }
1012
1013 int getMCOpIdx() const { return MCOpIdx; }
1014
1015 Modifiers getModifiers() const {
1016 assert(isRegKind() || isImmTy(ImmTyNone));
1017 return isRegKind() ? Reg.Mods : Imm.Mods;
1018 }
1019
1020 void setModifiers(Modifiers Mods) {
1021 assert(isRegKind() || isImmTy(ImmTyNone));
1022 if (isRegKind())
1023 Reg.Mods = Mods;
1024 else
1025 Imm.Mods = Mods;
1026 }
1027
1028 bool hasModifiers() const {
1029 return getModifiers().hasModifiers();
1030 }
1031
1032 bool hasFPModifiers() const {
1033 return getModifiers().hasFPModifiers();
1034 }
1035
1036 bool hasIntModifiers() const {
1037 return getModifiers().hasIntModifiers();
1038 }
1039
1040 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1041
1042 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1043
1044 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1045
1046 void addRegOperands(MCInst &Inst, unsigned N) const;
1047
1048 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1049 if (isRegKind())
1050 addRegOperands(Inst, N);
1051 else
1052 addImmOperands(Inst, N);
1053 }
1054
1055 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1056 Modifiers Mods = getModifiers();
1057 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1058 if (isRegKind()) {
1059 addRegOperands(Inst, N);
1060 } else {
1061 addImmOperands(Inst, N, false);
1062 }
1063 }
1064
1065 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1066 assert(!hasIntModifiers());
1067 addRegOrImmWithInputModsOperands(Inst, N);
1068 }
1069
1070 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1071 assert(!hasFPModifiers());
1072 addRegOrImmWithInputModsOperands(Inst, N);
1073 }
1074
1075 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1076 Modifiers Mods = getModifiers();
1077 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1078 assert(isRegKind());
1079 addRegOperands(Inst, N);
1080 }
1081
1082 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1083 assert(!hasIntModifiers());
1084 addRegWithInputModsOperands(Inst, N);
1085 }
1086
1087 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1088 assert(!hasFPModifiers());
1089 addRegWithInputModsOperands(Inst, N);
1090 }
1091
1092 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1093 // clang-format off
1094 switch (Type) {
1095 case ImmTyNone: OS << "None"; break;
1096 case ImmTyGDS: OS << "GDS"; break;
1097 case ImmTyLDS: OS << "LDS"; break;
1098 case ImmTyOffen: OS << "Offen"; break;
1099 case ImmTyIdxen: OS << "Idxen"; break;
1100 case ImmTyAddr64: OS << "Addr64"; break;
1101 case ImmTyOffset: OS << "Offset"; break;
1102 case ImmTyInstOffset: OS << "InstOffset"; break;
1103 case ImmTyOffset0: OS << "Offset0"; break;
1104 case ImmTyOffset1: OS << "Offset1"; break;
1105 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1106 case ImmTyCPol: OS << "CPol"; break;
1107 case ImmTyIndexKey8bit: OS << "index_key"; break;
1108 case ImmTyIndexKey16bit: OS << "index_key"; break;
1109 case ImmTyIndexKey32bit: OS << "index_key"; break;
1110 case ImmTyTFE: OS << "TFE"; break;
1111 case ImmTyD16: OS << "D16"; break;
1112 case ImmTyFORMAT: OS << "FORMAT"; break;
1113 case ImmTyClamp: OS << "Clamp"; break;
1114 case ImmTyOModSI: OS << "OModSI"; break;
1115 case ImmTyDPP8: OS << "DPP8"; break;
1116 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1117 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1118 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1119 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1120 case ImmTyDppFI: OS << "DppFI"; break;
1121 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1122 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1123 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1124 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1125 case ImmTyDMask: OS << "DMask"; break;
1126 case ImmTyDim: OS << "Dim"; break;
1127 case ImmTyUNorm: OS << "UNorm"; break;
1128 case ImmTyDA: OS << "DA"; break;
1129 case ImmTyR128A16: OS << "R128A16"; break;
1130 case ImmTyA16: OS << "A16"; break;
1131 case ImmTyLWE: OS << "LWE"; break;
1132 case ImmTyOff: OS << "Off"; break;
1133 case ImmTyExpTgt: OS << "ExpTgt"; break;
1134 case ImmTyExpCompr: OS << "ExpCompr"; break;
1135 case ImmTyExpVM: OS << "ExpVM"; break;
1136 case ImmTyHwreg: OS << "Hwreg"; break;
1137 case ImmTySendMsg: OS << "SendMsg"; break;
1138 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1139 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1140 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1141 case ImmTyOpSel: OS << "OpSel"; break;
1142 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1143 case ImmTyNegLo: OS << "NegLo"; break;
1144 case ImmTyNegHi: OS << "NegHi"; break;
1145 case ImmTySwizzle: OS << "Swizzle"; break;
1146 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1147 case ImmTyHigh: OS << "High"; break;
1148 case ImmTyBLGP: OS << "BLGP"; break;
1149 case ImmTyCBSZ: OS << "CBSZ"; break;
1150 case ImmTyABID: OS << "ABID"; break;
1151 case ImmTyEndpgm: OS << "Endpgm"; break;
1152 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1153 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1154 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1155 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1156 case ImmTyBitOp3: OS << "BitOp3"; break;
1157 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1158 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1159 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1160 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1161 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1162 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1163 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1164 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1165 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1166 case ImmTyByteSel: OS << "ByteSel" ; break;
1167 }
1168 // clang-format on
1169 }
1170
1171 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1172 switch (Kind) {
1173 case Register:
1174 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1175 << " mods: " << Reg.Mods << '>';
1176 break;
1177 case Immediate:
1178 OS << '<' << getImm();
1179 if (getImmTy() != ImmTyNone) {
1180 OS << " type: "; printImmTy(OS, getImmTy());
1181 }
1182 OS << " mods: " << Imm.Mods << '>';
1183 break;
1184 case Token:
1185 OS << '\'' << getToken() << '\'';
1186 break;
1187 case Expression:
1188 OS << "<expr ";
1189 MAI.printExpr(OS, *Expr);
1190 OS << '>';
1191 break;
1192 }
1193 }
1194
1195 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1196 int64_t Val, SMLoc Loc,
1197 ImmTy Type = ImmTyNone,
1198 bool IsFPImm = false) {
1199 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1200 Op->Imm.Val = Val;
1201 Op->Imm.IsFPImm = IsFPImm;
1202 Op->Imm.Type = Type;
1203 Op->Imm.Mods = Modifiers();
1204 Op->StartLoc = Loc;
1205 Op->EndLoc = Loc;
1206 return Op;
1207 }
1208
1209 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1210 StringRef Str, SMLoc Loc,
1211 bool HasExplicitEncodingSize = true) {
1212 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1213 Res->Tok.Data = Str.data();
1214 Res->Tok.Length = Str.size();
1215 Res->StartLoc = Loc;
1216 Res->EndLoc = Loc;
1217 return Res;
1218 }
1219
1220 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1221 MCRegister Reg, SMLoc S, SMLoc E) {
1222 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1223 Op->Reg.RegNo = Reg;
1224 Op->Reg.Mods = Modifiers();
1225 Op->StartLoc = S;
1226 Op->EndLoc = E;
1227 return Op;
1228 }
1229
1230 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1231 const class MCExpr *Expr, SMLoc S) {
1232 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1233 Op->Expr = Expr;
1234 Op->StartLoc = S;
1235 Op->EndLoc = S;
1236 return Op;
1237 }
1238};
1239
1240raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1241 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1242 return OS;
1243}
1244
1245//===----------------------------------------------------------------------===//
1246// AsmParser
1247//===----------------------------------------------------------------------===//
1248
1249// TODO: define GET_SUBTARGET_FEATURE_NAME
1250#define GET_REGISTER_MATCHER
1251#include "AMDGPUGenAsmMatcher.inc"
1252#undef GET_REGISTER_MATCHER
1253#undef GET_SUBTARGET_FEATURE_NAME
1254
1255// Holds info related to the current kernel, e.g. count of SGPRs used.
1256// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1257// .amdgpu_hsa_kernel or at EOF.
1258class KernelScopeInfo {
1259 int SgprIndexUnusedMin = -1;
1260 int VgprIndexUnusedMin = -1;
1261 int AgprIndexUnusedMin = -1;
1262 MCContext *Ctx = nullptr;
1263 MCSubtargetInfo const *MSTI = nullptr;
1264
1265 void usesSgprAt(int i) {
1266 if (i >= SgprIndexUnusedMin) {
1267 SgprIndexUnusedMin = ++i;
1268 if (Ctx) {
1269 MCSymbol* const Sym =
1270 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1271 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1272 }
1273 }
1274 }
1275
1276 void usesVgprAt(int i) {
1277 if (i >= VgprIndexUnusedMin) {
1278 VgprIndexUnusedMin = ++i;
1279 if (Ctx) {
1280 MCSymbol* const Sym =
1281 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1282 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1283 VgprIndexUnusedMin);
1284 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1285 }
1286 }
1287 }
1288
1289 void usesAgprAt(int i) {
1290 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1291 if (!hasMAIInsts(*MSTI))
1292 return;
1293
1294 if (i >= AgprIndexUnusedMin) {
1295 AgprIndexUnusedMin = ++i;
1296 if (Ctx) {
1297 MCSymbol* const Sym =
1298 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1299 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1300
1301 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1302 MCSymbol* const vSym =
1303 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1304 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1305 VgprIndexUnusedMin);
1306 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1307 }
1308 }
1309 }
1310
1311public:
1312 KernelScopeInfo() = default;
1313
1314 void initialize(MCContext &Context) {
1315 Ctx = &Context;
1316 MSTI = Ctx->getSubtargetInfo();
1317
1318 usesSgprAt(SgprIndexUnusedMin = -1);
1319 usesVgprAt(VgprIndexUnusedMin = -1);
1320 if (hasMAIInsts(*MSTI)) {
1321 usesAgprAt(AgprIndexUnusedMin = -1);
1322 }
1323 }
1324
1325 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1326 unsigned RegWidth) {
1327 switch (RegKind) {
1328 case IS_SGPR:
1329 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1330 break;
1331 case IS_AGPR:
1332 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1333 break;
1334 case IS_VGPR:
1335 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1336 break;
1337 default:
1338 break;
1339 }
1340 }
1341};
1342
1343class AMDGPUAsmParser : public MCTargetAsmParser {
1344 MCAsmParser &Parser;
1345
1346 unsigned ForcedEncodingSize = 0;
1347 bool ForcedDPP = false;
1348 bool ForcedSDWA = false;
1349 KernelScopeInfo KernelScope;
1350 const unsigned HwMode;
1351
1352 /// @name Auto-generated Match Functions
1353 /// {
1354
1355#define GET_ASSEMBLER_HEADER
1356#include "AMDGPUGenAsmMatcher.inc"
1357
1358 /// }
1359
1360 /// Get size of register operand
1361 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1362 assert(OpNo < Desc.NumOperands);
1363 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1364 return getRegBitWidth(RCID) / 8;
1365 }
1366
1367private:
1368 void createConstantSymbol(StringRef Id, int64_t Val);
1369
1370 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1371 bool OutOfRangeError(SMRange Range);
1372 /// Calculate VGPR/SGPR blocks required for given target, reserved
1373 /// registers, and user-specified NextFreeXGPR values.
1374 ///
1375 /// \param Features [in] Target features, used for bug corrections.
1376 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1377 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1378 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1379 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1380 /// descriptor field, if valid.
1381 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1382 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1383 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1384 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1385 /// \param VGPRBlocks [out] Result VGPR block count.
1386 /// \param SGPRBlocks [out] Result SGPR block count.
1387 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1388 const MCExpr *FlatScrUsed, bool XNACKUsed,
1389 std::optional<bool> EnableWavefrontSize32,
1390 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1391 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1392 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1393 bool ParseDirectiveAMDGCNTarget();
1394 bool ParseDirectiveAMDHSACodeObjectVersion();
1395 bool ParseDirectiveAMDHSAKernel();
1396 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1397 bool ParseDirectiveAMDKernelCodeT();
1398 // TODO: Possibly make subtargetHasRegister const.
1399 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1400 bool ParseDirectiveAMDGPUHsaKernel();
1401
1402 bool ParseDirectiveISAVersion();
1403 bool ParseDirectiveHSAMetadata();
1404 bool ParseDirectivePALMetadataBegin();
1405 bool ParseDirectivePALMetadata();
1406 bool ParseDirectiveAMDGPULDS();
1407
1408 /// Common code to parse out a block of text (typically YAML) between start and
1409 /// end directives.
1410 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1411 const char *AssemblerDirectiveEnd,
1412 std::string &CollectString);
1413
1414 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1415 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1416 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1417 unsigned &RegNum, unsigned &RegWidth,
1418 bool RestoreOnFailure = false);
1419 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1420 unsigned &RegNum, unsigned &RegWidth,
1421 SmallVectorImpl<AsmToken> &Tokens);
1422 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1423 unsigned &RegWidth,
1424 SmallVectorImpl<AsmToken> &Tokens);
1425 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1426 unsigned &RegWidth,
1427 SmallVectorImpl<AsmToken> &Tokens);
1428 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1429 unsigned &RegWidth,
1430 SmallVectorImpl<AsmToken> &Tokens);
1431 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1432 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1433 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1434
1435 bool isRegister();
1436 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1437 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1438 void initializeGprCountSymbol(RegisterKind RegKind);
1439 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1440 unsigned RegWidth);
1441 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1442 bool IsAtomic);
1443
1444public:
1445 enum OperandMode {
1446 OperandMode_Default,
1447 OperandMode_NSA,
1448 };
1449
1450 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1451
1452 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1453 const MCInstrInfo &MII, const MCTargetOptions &Options)
1454 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1455 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1457
1458 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1459
1460 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1461 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1462 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1463 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1464 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1465 } else {
1466 createConstantSymbol(".option.machine_version_major", ISA.Major);
1467 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1468 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1469 }
1470 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1471 initializeGprCountSymbol(IS_VGPR);
1472 initializeGprCountSymbol(IS_SGPR);
1473 } else
1474 KernelScope.initialize(getContext());
1475
1476 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1477 createConstantSymbol(Symbol, Code);
1478
1479 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1480 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1481 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1482 }
1483
1484 bool hasMIMG_R128() const {
1485 return AMDGPU::hasMIMG_R128(getSTI());
1486 }
1487
1488 bool hasPackedD16() const {
1489 return AMDGPU::hasPackedD16(getSTI());
1490 }
1491
1492 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1493
1494 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1495
1496 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1497
1498 bool isSI() const {
1499 return AMDGPU::isSI(getSTI());
1500 }
1501
1502 bool isCI() const {
1503 return AMDGPU::isCI(getSTI());
1504 }
1505
1506 bool isVI() const {
1507 return AMDGPU::isVI(getSTI());
1508 }
1509
1510 bool isGFX9() const {
1511 return AMDGPU::isGFX9(getSTI());
1512 }
1513
1514 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1515 bool isGFX90A() const {
1516 return AMDGPU::isGFX90A(getSTI());
1517 }
1518
1519 bool isGFX940() const {
1520 return AMDGPU::isGFX940(getSTI());
1521 }
1522
1523 bool isGFX9Plus() const {
1524 return AMDGPU::isGFX9Plus(getSTI());
1525 }
1526
1527 bool isGFX10() const {
1528 return AMDGPU::isGFX10(getSTI());
1529 }
1530
1531 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1532
1533 bool isGFX11() const {
1534 return AMDGPU::isGFX11(getSTI());
1535 }
1536
1537 bool isGFX11Plus() const {
1538 return AMDGPU::isGFX11Plus(getSTI());
1539 }
1540
1541 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1542
1543 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1544
1545 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1546
1547 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1548
1549 bool isGFX10_BEncoding() const {
1550 return AMDGPU::isGFX10_BEncoding(getSTI());
1551 }
1552
1553 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1554
1555 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1556
1557 bool hasInv2PiInlineImm() const {
1558 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1559 }
1560
1561 bool has64BitLiterals() const {
1562 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1563 }
1564
1565 bool hasFlatOffsets() const {
1566 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1567 }
1568
1569 bool hasTrue16Insts() const {
1570 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1571 }
1572
1573 bool hasArchitectedFlatScratch() const {
1574 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1575 }
1576
1577 bool hasSGPR102_SGPR103() const {
1578 return !isVI() && !isGFX9();
1579 }
1580
1581 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1582
1583 bool hasIntClamp() const {
1584 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1585 }
1586
1587 bool hasPartialNSAEncoding() const {
1588 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1589 }
1590
1591 bool hasGloballyAddressableScratch() const {
1592 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1593 }
1594
1595 unsigned getNSAMaxSize(bool HasSampler = false) const {
1596 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1597 }
1598
1599 unsigned getMaxNumUserSGPRs() const {
1600 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1601 }
1602
1603 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1604
1605 AMDGPUTargetStreamer &getTargetStreamer() {
1606 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1607 return static_cast<AMDGPUTargetStreamer &>(TS);
1608 }
1609
1610 MCContext &getContext() const {
1611 // We need this const_cast because for some reason getContext() is not const
1612 // in MCAsmParser.
1613 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1614 }
1615
1616 const MCRegisterInfo *getMRI() const {
1617 return getContext().getRegisterInfo();
1618 }
1619
1620 const MCInstrInfo *getMII() const {
1621 return &MII;
1622 }
1623
1624 // FIXME: This should not be used. Instead, should use queries derived from
1625 // getAvailableFeatures().
1626 const FeatureBitset &getFeatureBits() const {
1627 return getSTI().getFeatureBits();
1628 }
1629
1630 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1631 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1632 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1633
1634 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1635 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1636 bool isForcedDPP() const { return ForcedDPP; }
1637 bool isForcedSDWA() const { return ForcedSDWA; }
1638 ArrayRef<unsigned> getMatchedVariants() const;
1639 StringRef getMatchedVariantName() const;
1640
1641 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1642 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1643 bool RestoreOnFailure);
1644 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1645 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1646 SMLoc &EndLoc) override;
1647 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1648 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1649 unsigned Kind) override;
1650 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1651 OperandVector &Operands, MCStreamer &Out,
1652 uint64_t &ErrorInfo,
1653 bool MatchingInlineAsm) override;
1654 bool ParseDirective(AsmToken DirectiveID) override;
1655 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1656 OperandMode Mode = OperandMode_Default);
1657 StringRef parseMnemonicSuffix(StringRef Name);
1658 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1659 SMLoc NameLoc, OperandVector &Operands) override;
1660 //bool ProcessInstruction(MCInst &Inst);
1661
1662 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1663
1664 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1665
1666 ParseStatus
1667 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1668 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1669 std::function<bool(int64_t &)> ConvertResult = nullptr);
1670
1671 ParseStatus parseOperandArrayWithPrefix(
1672 const char *Prefix, OperandVector &Operands,
1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674 bool (*ConvertResult)(int64_t &) = nullptr);
1675
1676 ParseStatus
1677 parseNamedBit(StringRef Name, OperandVector &Operands,
1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1679 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1680 ParseStatus parseCPol(OperandVector &Operands);
1681 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1682 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1683 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1684 SMLoc &StringLoc);
1685 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1686 StringRef Name,
1687 ArrayRef<const char *> Ids,
1688 int64_t &IntVal);
1689 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1690 StringRef Name,
1691 ArrayRef<const char *> Ids,
1692 AMDGPUOperand::ImmTy Type);
1693
1694 bool isModifier();
1695 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1696 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1697 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1698 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1699 bool parseSP3NegModifier();
1700 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1701 LitModifier Lit = LitModifier::None);
1702 ParseStatus parseReg(OperandVector &Operands);
1703 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1704 LitModifier Lit = LitModifier::None);
1705 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1706 bool AllowImm = true);
1707 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1708 bool AllowImm = true);
1709 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1710 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1711 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1712 ParseStatus tryParseIndexKey(OperandVector &Operands,
1713 AMDGPUOperand::ImmTy ImmTy);
1714 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1715 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1716 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1717 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1718 AMDGPUOperand::ImmTy Type);
1719 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1720 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1721 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1722 AMDGPUOperand::ImmTy Type);
1723 ParseStatus parseMatrixAScale(OperandVector &Operands);
1724 ParseStatus parseMatrixBScale(OperandVector &Operands);
1725 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1726 AMDGPUOperand::ImmTy Type);
1727 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1728 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1729
1730 ParseStatus parseDfmtNfmt(int64_t &Format);
1731 ParseStatus parseUfmt(int64_t &Format);
1732 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1733 int64_t &Format);
1734 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1735 int64_t &Format);
1736 ParseStatus parseFORMAT(OperandVector &Operands);
1737 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1738 ParseStatus parseNumericFormat(int64_t &Format);
1739 ParseStatus parseFlatOffset(OperandVector &Operands);
1740 ParseStatus parseR128A16(OperandVector &Operands);
1741 ParseStatus parseBLGP(OperandVector &Operands);
1742 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1743 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1744
1745 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1746
1747 bool parseCnt(int64_t &IntVal);
1748 ParseStatus parseSWaitCnt(OperandVector &Operands);
1749
1750 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1751 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1752 ParseStatus parseDepCtr(OperandVector &Operands);
1753
1754 bool parseDelay(int64_t &Delay);
1755 ParseStatus parseSDelayALU(OperandVector &Operands);
1756
1757 ParseStatus parseHwreg(OperandVector &Operands);
1758
1759private:
1760 struct OperandInfoTy {
1761 SMLoc Loc;
1762 int64_t Val;
1763 bool IsSymbolic = false;
1764 bool IsDefined = false;
1765
1766 OperandInfoTy(int64_t Val) : Val(Val) {}
1767 };
1768
1769 struct StructuredOpField : OperandInfoTy {
1770 StringLiteral Id;
1771 StringLiteral Desc;
1772 unsigned Width;
1773 bool IsDefined = false;
1774
1775 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1776 int64_t Default)
1777 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1778 virtual ~StructuredOpField() = default;
1779
1780 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1781 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1782 return false;
1783 }
1784
1785 virtual bool validate(AMDGPUAsmParser &Parser) const {
1786 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1787 return Error(Parser, "not supported on this GPU");
1788 if (!isUIntN(Width, Val))
1789 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1790 return true;
1791 }
1792 };
1793
1794 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1795 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1796
1797 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1798 bool validateSendMsg(const OperandInfoTy &Msg,
1799 const OperandInfoTy &Op,
1800 const OperandInfoTy &Stream);
1801
1802 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1803 OperandInfoTy &Width);
1804
1805 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1806
1807 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1808 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1809 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1810
1811 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1812 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1813 const OperandVector &Operands) const;
1814 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1815 const OperandVector &Operands) const;
1816 SMLoc getInstLoc(const OperandVector &Operands) const;
1817
1818 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1819 const OperandVector &Operands);
1820 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1821 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1822 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1823 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1824 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1825 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1826 bool AsVOPD3);
1827 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1828 bool tryVOPD(const MCInst &Inst);
1829 bool tryVOPD3(const MCInst &Inst);
1830 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1831
1832 bool validateIntClampSupported(const MCInst &Inst);
1833 bool validateMIMGAtomicDMask(const MCInst &Inst);
1834 bool validateMIMGGatherDMask(const MCInst &Inst);
1835 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1836 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1837 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1838 bool validateMIMGD16(const MCInst &Inst);
1839 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1840 bool validateTensorR128(const MCInst &Inst);
1841 bool validateMIMGMSAA(const MCInst &Inst);
1842 bool validateOpSel(const MCInst &Inst);
1843 bool validateTrue16OpSel(const MCInst &Inst);
1844 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1845 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateVccOperand(MCRegister Reg) const;
1847 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1850 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1851 bool validateAGPRLdSt(const MCInst &Inst) const;
1852 bool validateVGPRAlign(const MCInst &Inst) const;
1853 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1854 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateDivScale(const MCInst &Inst);
1857 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1858 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1859 SMLoc IDLoc);
1860 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1861 const unsigned CPol);
1862 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1864 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1865 unsigned getConstantBusLimit(unsigned Opcode) const;
1866 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1867 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1868 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1869
1870 bool isSupportedMnemo(StringRef Mnemo,
1871 const FeatureBitset &FBS);
1872 bool isSupportedMnemo(StringRef Mnemo,
1873 const FeatureBitset &FBS,
1874 ArrayRef<unsigned> Variants);
1875 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1876
1877 bool isId(const StringRef Id) const;
1878 bool isId(const AsmToken &Token, const StringRef Id) const;
1879 bool isToken(const AsmToken::TokenKind Kind) const;
1880 StringRef getId() const;
1881 bool trySkipId(const StringRef Id);
1882 bool trySkipId(const StringRef Pref, const StringRef Id);
1883 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1884 bool trySkipToken(const AsmToken::TokenKind Kind);
1885 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1886 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1887 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1888
1889 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1890 AsmToken::TokenKind getTokenKind() const;
1891 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1892 bool parseExpr(OperandVector &Operands);
1893 StringRef getTokenStr() const;
1894 AsmToken peekToken(bool ShouldSkipSpace = true);
1895 AsmToken getToken() const;
1896 SMLoc getLoc() const;
1897 void lex();
1898
1899public:
1900 void onBeginOfFile() override;
1901 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1902
1903 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1904
1905 ParseStatus parseExpTgt(OperandVector &Operands);
1906 ParseStatus parseSendMsg(OperandVector &Operands);
1907 ParseStatus parseInterpSlot(OperandVector &Operands);
1908 ParseStatus parseInterpAttr(OperandVector &Operands);
1909 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1910 ParseStatus parseBoolReg(OperandVector &Operands);
1911
1912 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1913 const unsigned MaxVal, const Twine &ErrMsg,
1914 SMLoc &Loc);
1915 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1916 const unsigned MinVal,
1917 const unsigned MaxVal,
1918 const StringRef ErrMsg);
1919 ParseStatus parseSwizzle(OperandVector &Operands);
1920 bool parseSwizzleOffset(int64_t &Imm);
1921 bool parseSwizzleMacro(int64_t &Imm);
1922 bool parseSwizzleQuadPerm(int64_t &Imm);
1923 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1924 bool parseSwizzleBroadcast(int64_t &Imm);
1925 bool parseSwizzleSwap(int64_t &Imm);
1926 bool parseSwizzleReverse(int64_t &Imm);
1927 bool parseSwizzleFFT(int64_t &Imm);
1928 bool parseSwizzleRotate(int64_t &Imm);
1929
1930 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1931 int64_t parseGPRIdxMacro();
1932
1933 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1934 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1935
1936 ParseStatus parseOModSI(OperandVector &Operands);
1937
1938 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1939 OptionalImmIndexMap &OptionalIdx);
1940 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1941 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1942 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1943 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1944 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1945
1946 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1947 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1948 OptionalImmIndexMap &OptionalIdx);
1949 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1950 OptionalImmIndexMap &OptionalIdx);
1951
1952 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1953 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1954 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1955
1956 bool parseDimId(unsigned &Encoding);
1957 ParseStatus parseDim(OperandVector &Operands);
1958 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1959 ParseStatus parseDPP8(OperandVector &Operands);
1960 ParseStatus parseDPPCtrl(OperandVector &Operands);
1961 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1962 int64_t parseDPPCtrlSel(StringRef Ctrl);
1963 int64_t parseDPPCtrlPerm();
1964 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1965 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1966 cvtDPP(Inst, Operands, true);
1967 }
1968 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1969 bool IsDPP8 = false);
1970 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1971 cvtVOP3DPP(Inst, Operands, true);
1972 }
1973
1974 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1975 AMDGPUOperand::ImmTy Type);
1976 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1977 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1978 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1979 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1980 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1981 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1982 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1983 uint64_t BasicInstType,
1984 bool SkipDstVcc = false,
1985 bool SkipSrcVcc = false);
1986
1987 ParseStatus parseEndpgm(OperandVector &Operands);
1988
1989 ParseStatus parseVOPD(OperandVector &Operands);
1990};
1991
1992} // end anonymous namespace
1993
1994// May be called with integer type with equivalent bitwidth.
1995static const fltSemantics *getFltSemantics(unsigned Size) {
1996 switch (Size) {
1997 case 4:
1998 return &APFloat::IEEEsingle();
1999 case 8:
2000 return &APFloat::IEEEdouble();
2001 case 2:
2002 return &APFloat::IEEEhalf();
2003 default:
2004 llvm_unreachable("unsupported fp type");
2005 }
2006}
2007
2009 return getFltSemantics(VT.getSizeInBits() / 8);
2010}
2011
2013 switch (OperandType) {
2014 // When floating-point immediate is used as operand of type i16, the 32-bit
2015 // representation of the constant truncated to the 16 LSBs should be used.
2030 return &APFloat::IEEEsingle();
2037 return &APFloat::IEEEdouble();
2044 return &APFloat::IEEEhalf();
2049 return &APFloat::BFloat();
2050 default:
2051 llvm_unreachable("unsupported fp type");
2052 }
2053}
2054
2055//===----------------------------------------------------------------------===//
2056// Operand
2057//===----------------------------------------------------------------------===//
2058
2059static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2060 bool Lost;
2061
2062 // Convert literal to single precision
2065 &Lost);
2066 // We allow precision lost but not overflow or underflow
2067 if (Status != APFloat::opOK &&
2068 Lost &&
2069 ((Status & APFloat::opOverflow) != 0 ||
2070 (Status & APFloat::opUnderflow) != 0)) {
2071 return false;
2072 }
2073
2074 return true;
2075}
2076
2077static bool isSafeTruncation(int64_t Val, unsigned Size) {
2078 return isUIntN(Size, Val) || isIntN(Size, Val);
2079}
2080
2081static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2082 if (VT.getScalarType() == MVT::i16)
2083 return isInlinableLiteral32(Val, HasInv2Pi);
2084
2085 if (VT.getScalarType() == MVT::f16)
2086 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2087
2088 assert(VT.getScalarType() == MVT::bf16);
2089
2090 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2091}
2092
2093bool AMDGPUOperand::isInlinableImm(MVT type) const {
2094
2095 // This is a hack to enable named inline values like
2096 // shared_base with both 32-bit and 64-bit operands.
2097 // Note that these values are defined as
2098 // 32-bit operands only.
2099 if (isInlineValue()) {
2100 return true;
2101 }
2102
2103 if (!isImmTy(ImmTyNone)) {
2104 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2105 return false;
2106 }
2107
2108 if (getModifiers().Lit != LitModifier::None)
2109 return false;
2110
2111 // TODO: We should avoid using host float here. It would be better to
2112 // check the float bit values which is what a few other places do.
2113 // We've had bot failures before due to weird NaN support on mips hosts.
2114
2115 APInt Literal(64, Imm.Val);
2116
2117 if (Imm.IsFPImm) { // We got fp literal token
2118 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2120 AsmParser->hasInv2PiInlineImm());
2121 }
2122
2123 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2124 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2125 return false;
2126
2127 if (type.getScalarSizeInBits() == 16) {
2128 bool Lost = false;
2129 switch (type.getScalarType().SimpleTy) {
2130 default:
2131 llvm_unreachable("unknown 16-bit type");
2132 case MVT::bf16:
2133 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2134 &Lost);
2135 break;
2136 case MVT::f16:
2137 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2138 &Lost);
2139 break;
2140 case MVT::i16:
2141 FPLiteral.convert(APFloatBase::IEEEsingle(),
2142 APFloat::rmNearestTiesToEven, &Lost);
2143 break;
2144 }
2145 // We need to use 32-bit representation here because when a floating-point
2146 // inline constant is used as an i16 operand, its 32-bit representation
2147 // representation will be used. We will need the 32-bit value to check if
2148 // it is FP inline constant.
2149 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2150 return isInlineableLiteralOp16(ImmVal, type,
2151 AsmParser->hasInv2PiInlineImm());
2152 }
2153
2154 // Check if single precision literal is inlinable
2156 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2157 AsmParser->hasInv2PiInlineImm());
2158 }
2159
2160 // We got int literal token.
2161 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2163 AsmParser->hasInv2PiInlineImm());
2164 }
2165
2166 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2167 return false;
2168 }
2169
2170 if (type.getScalarSizeInBits() == 16) {
2172 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2173 type, AsmParser->hasInv2PiInlineImm());
2174 }
2175
2177 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2178 AsmParser->hasInv2PiInlineImm());
2179}
2180
2181bool AMDGPUOperand::isLiteralImm(MVT type) const {
2182 // Check that this immediate can be added as literal
2183 if (!isImmTy(ImmTyNone)) {
2184 return false;
2185 }
2186
2187 bool Allow64Bit =
2188 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2189
2190 if (!Imm.IsFPImm) {
2191 // We got int literal token.
2192
2193 if (type == MVT::f64 && hasFPModifiers()) {
2194 // Cannot apply fp modifiers to int literals preserving the same semantics
2195 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2196 // disable these cases.
2197 return false;
2198 }
2199
2200 unsigned Size = type.getSizeInBits();
2201 if (Size == 64) {
2202 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2203 return true;
2204 Size = 32;
2205 }
2206
2207 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2208 // types.
2209 return isSafeTruncation(Imm.Val, Size);
2210 }
2211
2212 // We got fp literal token
2213 if (type == MVT::f64) { // Expected 64-bit fp operand
2214 // We would set low 64-bits of literal to zeroes but we accept this literals
2215 return true;
2216 }
2217
2218 if (type == MVT::i64) { // Expected 64-bit int operand
2219 // We don't allow fp literals in 64-bit integer instructions. It is
2220 // unclear how we should encode them.
2221 return false;
2222 }
2223
2224 // We allow fp literals with f16x2 operands assuming that the specified
2225 // literal goes into the lower half and the upper half is zero. We also
2226 // require that the literal may be losslessly converted to f16.
2227 //
2228 // For i16x2 operands, we assume that the specified literal is encoded as a
2229 // single-precision float. This is pretty odd, but it matches SP3 and what
2230 // happens in hardware.
2231 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2232 : (type == MVT::v2i16) ? MVT::f32
2233 : (type == MVT::v2f32) ? MVT::f32
2234 : type;
2235
2236 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2237 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2238}
2239
2240bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2241 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2242}
2243
2244bool AMDGPUOperand::isVRegWithInputMods() const {
2245 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2246 // GFX90A allows DPP on 64-bit operands.
2247 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2248 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2249}
2250
2251template <bool IsFake16>
2252bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2253 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2254 : AMDGPU::VGPR_16_Lo128RegClassID);
2255}
2256
2257template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2258 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2259 : AMDGPU::VGPR_16RegClassID);
2260}
2261
2262bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2263 if (AsmParser->isVI())
2264 return isVReg32();
2265 if (AsmParser->isGFX9Plus())
2266 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2267 return false;
2268}
2269
2270bool AMDGPUOperand::isSDWAFP16Operand() const {
2271 return isSDWAOperand(MVT::f16);
2272}
2273
2274bool AMDGPUOperand::isSDWAFP32Operand() const {
2275 return isSDWAOperand(MVT::f32);
2276}
2277
2278bool AMDGPUOperand::isSDWAInt16Operand() const {
2279 return isSDWAOperand(MVT::i16);
2280}
2281
2282bool AMDGPUOperand::isSDWAInt32Operand() const {
2283 return isSDWAOperand(MVT::i32);
2284}
2285
2286bool AMDGPUOperand::isBoolReg() const {
2287 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2288 (AsmParser->isWave32() && isSCSrc_b32()));
2289}
2290
2291uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2292{
2293 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2294 assert(Size == 2 || Size == 4 || Size == 8);
2295
2296 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2297
2298 if (Imm.Mods.Abs) {
2299 Val &= ~FpSignMask;
2300 }
2301 if (Imm.Mods.Neg) {
2302 Val ^= FpSignMask;
2303 }
2304
2305 return Val;
2306}
2307
2308void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2309 MCOpIdx = Inst.getNumOperands();
2310
2311 if (isExpr()) {
2313 return;
2314 }
2315
2316 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2317 Inst.getNumOperands())) {
2318 addLiteralImmOperand(Inst, Imm.Val,
2319 ApplyModifiers &
2320 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2321 } else {
2322 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2324 }
2325}
2326
2327void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2328 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2329 auto OpNum = Inst.getNumOperands();
2330 // Check that this operand accepts literals
2331 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2332
2333 if (ApplyModifiers) {
2334 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2335 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2336 Val = applyInputFPModifiers(Val, Size);
2337 }
2338
2339 APInt Literal(64, Val);
2340 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2341
2342 bool CanUse64BitLiterals =
2343 AsmParser->has64BitLiterals() &&
2344 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2345 LitModifier Lit = getModifiers().Lit;
2346 MCContext &Ctx = AsmParser->getContext();
2347
2348 if (Imm.IsFPImm) { // We got fp literal token
2349 switch (OpTy) {
2355 if (Lit == LitModifier::None &&
2357 AsmParser->hasInv2PiInlineImm())) {
2358 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2359 return;
2360 }
2361
2362 // Non-inlineable
2363 if (AMDGPU::isSISrcFPOperand(InstDesc,
2364 OpNum)) { // Expected 64-bit fp operand
2365 bool HasMandatoryLiteral =
2366 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2367 // For fp operands we check if low 32 bits are zeros
2368 if (Literal.getLoBits(32) != 0 &&
2369 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2370 !HasMandatoryLiteral) {
2371 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2372 Inst.getLoc(),
2373 "Can't encode literal as exact 64-bit floating-point operand. "
2374 "Low 32-bits will be set to zero");
2375 Val &= 0xffffffff00000000u;
2376 }
2377
2378 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2381 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2382 (isInt<32>(Val) || isUInt<32>(Val))) {
2383 // The floating-point operand will be verbalized as an
2384 // integer one. If that integer happens to fit 32 bits, on
2385 // re-assembling it will be intepreted as the high half of
2386 // the actual value, so we have to wrap it into lit64().
2387 Lit = LitModifier::Lit64;
2388 } else if (Lit == LitModifier::Lit) {
2389 // For FP64 operands lit() specifies the high half of the value.
2390 Val = Hi_32(Val);
2391 }
2392 }
2393 break;
2394 }
2395
2396 // We don't allow fp literals in 64-bit integer instructions. It is
2397 // unclear how we should encode them. This case should be checked earlier
2398 // in predicate methods (isLiteralImm())
2399 llvm_unreachable("fp literal in 64-bit integer instruction.");
2400
2402 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2403 (isInt<32>(Val) || isUInt<32>(Val)))
2404 Lit = LitModifier::Lit64;
2405 break;
2406
2411 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2412 Literal == 0x3fc45f306725feed) {
2413 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2414 // loss of precision. The constant represents ideomatic fp32 value of
2415 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2416 // bits. Prevent rounding below.
2417 Inst.addOperand(MCOperand::createImm(0x3e22));
2418 return;
2419 }
2420 [[fallthrough]];
2421
2442 bool lost;
2443 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2444 // Convert literal to single precision
2445 FPLiteral.convert(*getOpFltSemantics(OpTy),
2446 APFloat::rmNearestTiesToEven, &lost);
2447 // We allow precision lost but not overflow or underflow. This should be
2448 // checked earlier in isLiteralImm()
2449
2450 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2451 break;
2452 }
2453 default:
2454 llvm_unreachable("invalid operand size");
2455 }
2456
2457 if (Lit != LitModifier::None) {
2458 Inst.addOperand(
2460 } else {
2462 }
2463 return;
2464 }
2465
2466 // We got int literal token.
2467 // Only sign extend inline immediates.
2468 switch (OpTy) {
2482 break;
2483
2486 if (Lit == LitModifier::None &&
2487 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2489 return;
2490 }
2491
2492 // When the 32 MSBs are not zero (effectively means it can't be safely
2493 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2494 // the lit modifier is explicitly used, we need to truncate it to the 32
2495 // LSBs.
2496 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2497 Val = Lo_32(Val);
2498 break;
2499
2503 if (Lit == LitModifier::None &&
2504 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2506 return;
2507 }
2508
2509 // If the target doesn't support 64-bit literals, we need to use the
2510 // constant as the high 32 MSBs of a double-precision floating point value.
2511 if (!AsmParser->has64BitLiterals()) {
2512 Val = static_cast<uint64_t>(Val) << 32;
2513 } else {
2514 // Now the target does support 64-bit literals, there are two cases
2515 // where we still want to use src_literal encoding:
2516 // 1) explicitly forced by using lit modifier;
2517 // 2) the value is a valid 32-bit representation (signed or unsigned),
2518 // meanwhile not forced by lit64 modifier.
2519 if (Lit == LitModifier::Lit ||
2520 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2521 Val = static_cast<uint64_t>(Val) << 32;
2522 }
2523
2524 // For FP64 operands lit() specifies the high half of the value.
2525 if (Lit == LitModifier::Lit)
2526 Val = Hi_32(Val);
2527 break;
2528
2540 break;
2541
2543 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2544 Val <<= 32;
2545 break;
2546
2547 default:
2548 llvm_unreachable("invalid operand type");
2549 }
2550
2551 if (Lit != LitModifier::None) {
2552 Inst.addOperand(
2554 } else {
2556 }
2557}
2558
2559void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2560 MCOpIdx = Inst.getNumOperands();
2561 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2562}
2563
2564bool AMDGPUOperand::isInlineValue() const {
2565 return isRegKind() && ::isInlineValue(getReg());
2566}
2567
2568//===----------------------------------------------------------------------===//
2569// AsmParser
2570//===----------------------------------------------------------------------===//
2571
2572void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2573 // TODO: make those pre-defined variables read-only.
2574 // Currently there is none suitable machinery in the core llvm-mc for this.
2575 // MCSymbol::isRedefinable is intended for another purpose, and
2576 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2577 MCContext &Ctx = getContext();
2578 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2580}
2581
2582static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2583 if (Is == IS_VGPR) {
2584 switch (RegWidth) {
2585 default: return -1;
2586 case 32:
2587 return AMDGPU::VGPR_32RegClassID;
2588 case 64:
2589 return AMDGPU::VReg_64RegClassID;
2590 case 96:
2591 return AMDGPU::VReg_96RegClassID;
2592 case 128:
2593 return AMDGPU::VReg_128RegClassID;
2594 case 160:
2595 return AMDGPU::VReg_160RegClassID;
2596 case 192:
2597 return AMDGPU::VReg_192RegClassID;
2598 case 224:
2599 return AMDGPU::VReg_224RegClassID;
2600 case 256:
2601 return AMDGPU::VReg_256RegClassID;
2602 case 288:
2603 return AMDGPU::VReg_288RegClassID;
2604 case 320:
2605 return AMDGPU::VReg_320RegClassID;
2606 case 352:
2607 return AMDGPU::VReg_352RegClassID;
2608 case 384:
2609 return AMDGPU::VReg_384RegClassID;
2610 case 512:
2611 return AMDGPU::VReg_512RegClassID;
2612 case 1024:
2613 return AMDGPU::VReg_1024RegClassID;
2614 }
2615 } else if (Is == IS_TTMP) {
2616 switch (RegWidth) {
2617 default: return -1;
2618 case 32:
2619 return AMDGPU::TTMP_32RegClassID;
2620 case 64:
2621 return AMDGPU::TTMP_64RegClassID;
2622 case 128:
2623 return AMDGPU::TTMP_128RegClassID;
2624 case 256:
2625 return AMDGPU::TTMP_256RegClassID;
2626 case 512:
2627 return AMDGPU::TTMP_512RegClassID;
2628 }
2629 } else if (Is == IS_SGPR) {
2630 switch (RegWidth) {
2631 default: return -1;
2632 case 32:
2633 return AMDGPU::SGPR_32RegClassID;
2634 case 64:
2635 return AMDGPU::SGPR_64RegClassID;
2636 case 96:
2637 return AMDGPU::SGPR_96RegClassID;
2638 case 128:
2639 return AMDGPU::SGPR_128RegClassID;
2640 case 160:
2641 return AMDGPU::SGPR_160RegClassID;
2642 case 192:
2643 return AMDGPU::SGPR_192RegClassID;
2644 case 224:
2645 return AMDGPU::SGPR_224RegClassID;
2646 case 256:
2647 return AMDGPU::SGPR_256RegClassID;
2648 case 288:
2649 return AMDGPU::SGPR_288RegClassID;
2650 case 320:
2651 return AMDGPU::SGPR_320RegClassID;
2652 case 352:
2653 return AMDGPU::SGPR_352RegClassID;
2654 case 384:
2655 return AMDGPU::SGPR_384RegClassID;
2656 case 512:
2657 return AMDGPU::SGPR_512RegClassID;
2658 }
2659 } else if (Is == IS_AGPR) {
2660 switch (RegWidth) {
2661 default: return -1;
2662 case 32:
2663 return AMDGPU::AGPR_32RegClassID;
2664 case 64:
2665 return AMDGPU::AReg_64RegClassID;
2666 case 96:
2667 return AMDGPU::AReg_96RegClassID;
2668 case 128:
2669 return AMDGPU::AReg_128RegClassID;
2670 case 160:
2671 return AMDGPU::AReg_160RegClassID;
2672 case 192:
2673 return AMDGPU::AReg_192RegClassID;
2674 case 224:
2675 return AMDGPU::AReg_224RegClassID;
2676 case 256:
2677 return AMDGPU::AReg_256RegClassID;
2678 case 288:
2679 return AMDGPU::AReg_288RegClassID;
2680 case 320:
2681 return AMDGPU::AReg_320RegClassID;
2682 case 352:
2683 return AMDGPU::AReg_352RegClassID;
2684 case 384:
2685 return AMDGPU::AReg_384RegClassID;
2686 case 512:
2687 return AMDGPU::AReg_512RegClassID;
2688 case 1024:
2689 return AMDGPU::AReg_1024RegClassID;
2690 }
2691 }
2692 return -1;
2693}
2694
2697 .Case("exec", AMDGPU::EXEC)
2698 .Case("vcc", AMDGPU::VCC)
2699 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2700 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2701 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2702 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2703 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2704 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2705 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2706 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2707 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2708 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2709 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2710 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2711 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2712 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2713 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2714 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2715 .Case("m0", AMDGPU::M0)
2716 .Case("vccz", AMDGPU::SRC_VCCZ)
2717 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2718 .Case("execz", AMDGPU::SRC_EXECZ)
2719 .Case("src_execz", AMDGPU::SRC_EXECZ)
2720 .Case("scc", AMDGPU::SRC_SCC)
2721 .Case("src_scc", AMDGPU::SRC_SCC)
2722 .Case("tba", AMDGPU::TBA)
2723 .Case("tma", AMDGPU::TMA)
2724 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2725 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2726 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2727 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2728 .Case("vcc_lo", AMDGPU::VCC_LO)
2729 .Case("vcc_hi", AMDGPU::VCC_HI)
2730 .Case("exec_lo", AMDGPU::EXEC_LO)
2731 .Case("exec_hi", AMDGPU::EXEC_HI)
2732 .Case("tma_lo", AMDGPU::TMA_LO)
2733 .Case("tma_hi", AMDGPU::TMA_HI)
2734 .Case("tba_lo", AMDGPU::TBA_LO)
2735 .Case("tba_hi", AMDGPU::TBA_HI)
2736 .Case("pc", AMDGPU::PC_REG)
2737 .Case("null", AMDGPU::SGPR_NULL)
2738 .Default(AMDGPU::NoRegister);
2739}
2740
2741bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2742 SMLoc &EndLoc, bool RestoreOnFailure) {
2743 auto R = parseRegister();
2744 if (!R) return true;
2745 assert(R->isReg());
2746 RegNo = R->getReg();
2747 StartLoc = R->getStartLoc();
2748 EndLoc = R->getEndLoc();
2749 return false;
2750}
2751
2752bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2753 SMLoc &EndLoc) {
2754 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2755}
2756
2757ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2758 SMLoc &EndLoc) {
2759 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2760 bool PendingErrors = getParser().hasPendingError();
2761 getParser().clearPendingErrors();
2762 if (PendingErrors)
2763 return ParseStatus::Failure;
2764 if (Result)
2765 return ParseStatus::NoMatch;
2766 return ParseStatus::Success;
2767}
2768
2769bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2770 RegisterKind RegKind,
2771 MCRegister Reg1, SMLoc Loc) {
2772 switch (RegKind) {
2773 case IS_SPECIAL:
2774 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2775 Reg = AMDGPU::EXEC;
2776 RegWidth = 64;
2777 return true;
2778 }
2779 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2780 Reg = AMDGPU::FLAT_SCR;
2781 RegWidth = 64;
2782 return true;
2783 }
2784 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2785 Reg = AMDGPU::XNACK_MASK;
2786 RegWidth = 64;
2787 return true;
2788 }
2789 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2790 Reg = AMDGPU::VCC;
2791 RegWidth = 64;
2792 return true;
2793 }
2794 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2795 Reg = AMDGPU::TBA;
2796 RegWidth = 64;
2797 return true;
2798 }
2799 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2800 Reg = AMDGPU::TMA;
2801 RegWidth = 64;
2802 return true;
2803 }
2804 Error(Loc, "register does not fit in the list");
2805 return false;
2806 case IS_VGPR:
2807 case IS_SGPR:
2808 case IS_AGPR:
2809 case IS_TTMP:
2810 if (Reg1 != Reg + RegWidth / 32) {
2811 Error(Loc, "registers in a list must have consecutive indices");
2812 return false;
2813 }
2814 RegWidth += 32;
2815 return true;
2816 default:
2817 llvm_unreachable("unexpected register kind");
2818 }
2819}
2820
2821struct RegInfo {
2823 RegisterKind Kind;
2824};
2825
2826static constexpr RegInfo RegularRegisters[] = {
2827 {{"v"}, IS_VGPR},
2828 {{"s"}, IS_SGPR},
2829 {{"ttmp"}, IS_TTMP},
2830 {{"acc"}, IS_AGPR},
2831 {{"a"}, IS_AGPR},
2832};
2833
2834static bool isRegularReg(RegisterKind Kind) {
2835 return Kind == IS_VGPR ||
2836 Kind == IS_SGPR ||
2837 Kind == IS_TTMP ||
2838 Kind == IS_AGPR;
2839}
2840
2842 for (const RegInfo &Reg : RegularRegisters)
2843 if (Str.starts_with(Reg.Name))
2844 return &Reg;
2845 return nullptr;
2846}
2847
2848static bool getRegNum(StringRef Str, unsigned& Num) {
2849 return !Str.getAsInteger(10, Num);
2850}
2851
2852bool
2853AMDGPUAsmParser::isRegister(const AsmToken &Token,
2854 const AsmToken &NextToken) const {
2855
2856 // A list of consecutive registers: [s0,s1,s2,s3]
2857 if (Token.is(AsmToken::LBrac))
2858 return true;
2859
2860 if (!Token.is(AsmToken::Identifier))
2861 return false;
2862
2863 // A single register like s0 or a range of registers like s[0:1]
2864
2865 StringRef Str = Token.getString();
2866 const RegInfo *Reg = getRegularRegInfo(Str);
2867 if (Reg) {
2868 StringRef RegName = Reg->Name;
2869 StringRef RegSuffix = Str.substr(RegName.size());
2870 if (!RegSuffix.empty()) {
2871 RegSuffix.consume_back(".l");
2872 RegSuffix.consume_back(".h");
2873 unsigned Num;
2874 // A single register with an index: rXX
2875 if (getRegNum(RegSuffix, Num))
2876 return true;
2877 } else {
2878 // A range of registers: r[XX:YY].
2879 if (NextToken.is(AsmToken::LBrac))
2880 return true;
2881 }
2882 }
2883
2884 return getSpecialRegForName(Str).isValid();
2885}
2886
2887bool
2888AMDGPUAsmParser::isRegister()
2889{
2890 return isRegister(getToken(), peekToken());
2891}
2892
2893MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2894 unsigned SubReg, unsigned RegWidth,
2895 SMLoc Loc) {
2896 assert(isRegularReg(RegKind));
2897
2898 unsigned AlignSize = 1;
2899 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2900 // SGPR and TTMP registers must be aligned.
2901 // Max required alignment is 4 dwords.
2902 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2903 }
2904
2905 if (RegNum % AlignSize != 0) {
2906 Error(Loc, "invalid register alignment");
2907 return MCRegister();
2908 }
2909
2910 unsigned RegIdx = RegNum / AlignSize;
2911 int RCID = getRegClass(RegKind, RegWidth);
2912 if (RCID == -1) {
2913 Error(Loc, "invalid or unsupported register size");
2914 return MCRegister();
2915 }
2916
2917 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2918 const MCRegisterClass RC = TRI->getRegClass(RCID);
2919 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2920 Error(Loc, "register index is out of range");
2921 return AMDGPU::NoRegister;
2922 }
2923
2924 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2925 Error(Loc, "register index is out of range");
2926 return MCRegister();
2927 }
2928
2929 MCRegister Reg = RC.getRegister(RegIdx);
2930
2931 if (SubReg) {
2932 Reg = TRI->getSubReg(Reg, SubReg);
2933
2934 // Currently all regular registers have their .l and .h subregisters, so
2935 // we should never need to generate an error here.
2936 assert(Reg && "Invalid subregister!");
2937 }
2938
2939 return Reg;
2940}
2941
2942bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2943 unsigned &SubReg) {
2944 int64_t RegLo, RegHi;
2945 if (!skipToken(AsmToken::LBrac, "missing register index"))
2946 return false;
2947
2948 SMLoc FirstIdxLoc = getLoc();
2949 SMLoc SecondIdxLoc;
2950
2951 if (!parseExpr(RegLo))
2952 return false;
2953
2954 if (trySkipToken(AsmToken::Colon)) {
2955 SecondIdxLoc = getLoc();
2956 if (!parseExpr(RegHi))
2957 return false;
2958 } else {
2959 RegHi = RegLo;
2960 }
2961
2962 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2963 return false;
2964
2965 if (!isUInt<32>(RegLo)) {
2966 Error(FirstIdxLoc, "invalid register index");
2967 return false;
2968 }
2969
2970 if (!isUInt<32>(RegHi)) {
2971 Error(SecondIdxLoc, "invalid register index");
2972 return false;
2973 }
2974
2975 if (RegLo > RegHi) {
2976 Error(FirstIdxLoc, "first register index should not exceed second index");
2977 return false;
2978 }
2979
2980 if (RegHi == RegLo) {
2981 StringRef RegSuffix = getTokenStr();
2982 if (RegSuffix == ".l") {
2983 SubReg = AMDGPU::lo16;
2984 lex();
2985 } else if (RegSuffix == ".h") {
2986 SubReg = AMDGPU::hi16;
2987 lex();
2988 }
2989 }
2990
2991 Num = static_cast<unsigned>(RegLo);
2992 RegWidth = 32 * ((RegHi - RegLo) + 1);
2993
2994 return true;
2995}
2996
2997MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2998 unsigned &RegNum,
2999 unsigned &RegWidth,
3000 SmallVectorImpl<AsmToken> &Tokens) {
3001 assert(isToken(AsmToken::Identifier));
3002 MCRegister Reg = getSpecialRegForName(getTokenStr());
3003 if (Reg) {
3004 RegNum = 0;
3005 RegWidth = 32;
3006 RegKind = IS_SPECIAL;
3007 Tokens.push_back(getToken());
3008 lex(); // skip register name
3009 }
3010 return Reg;
3011}
3012
3013MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3014 unsigned &RegNum,
3015 unsigned &RegWidth,
3016 SmallVectorImpl<AsmToken> &Tokens) {
3017 assert(isToken(AsmToken::Identifier));
3018 StringRef RegName = getTokenStr();
3019 auto Loc = getLoc();
3020
3021 const RegInfo *RI = getRegularRegInfo(RegName);
3022 if (!RI) {
3023 Error(Loc, "invalid register name");
3024 return MCRegister();
3025 }
3026
3027 Tokens.push_back(getToken());
3028 lex(); // skip register name
3029
3030 RegKind = RI->Kind;
3031 StringRef RegSuffix = RegName.substr(RI->Name.size());
3032 unsigned SubReg = NoSubRegister;
3033 if (!RegSuffix.empty()) {
3034 if (RegSuffix.consume_back(".l"))
3035 SubReg = AMDGPU::lo16;
3036 else if (RegSuffix.consume_back(".h"))
3037 SubReg = AMDGPU::hi16;
3038
3039 // Single 32-bit register: vXX.
3040 if (!getRegNum(RegSuffix, RegNum)) {
3041 Error(Loc, "invalid register index");
3042 return MCRegister();
3043 }
3044 RegWidth = 32;
3045 } else {
3046 // Range of registers: v[XX:YY]. ":YY" is optional.
3047 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3048 return MCRegister();
3049 }
3050
3051 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3052}
3053
3054MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3055 unsigned &RegNum, unsigned &RegWidth,
3056 SmallVectorImpl<AsmToken> &Tokens) {
3057 MCRegister Reg;
3058 auto ListLoc = getLoc();
3059
3060 if (!skipToken(AsmToken::LBrac,
3061 "expected a register or a list of registers")) {
3062 return MCRegister();
3063 }
3064
3065 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3066
3067 auto Loc = getLoc();
3068 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3069 return MCRegister();
3070 if (RegWidth != 32) {
3071 Error(Loc, "expected a single 32-bit register");
3072 return MCRegister();
3073 }
3074
3075 for (; trySkipToken(AsmToken::Comma); ) {
3076 RegisterKind NextRegKind;
3077 MCRegister NextReg;
3078 unsigned NextRegNum, NextRegWidth;
3079 Loc = getLoc();
3080
3081 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3082 NextRegNum, NextRegWidth,
3083 Tokens)) {
3084 return MCRegister();
3085 }
3086 if (NextRegWidth != 32) {
3087 Error(Loc, "expected a single 32-bit register");
3088 return MCRegister();
3089 }
3090 if (NextRegKind != RegKind) {
3091 Error(Loc, "registers in a list must be of the same kind");
3092 return MCRegister();
3093 }
3094 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3095 return MCRegister();
3096 }
3097
3098 if (!skipToken(AsmToken::RBrac,
3099 "expected a comma or a closing square bracket")) {
3100 return MCRegister();
3101 }
3102
3103 if (isRegularReg(RegKind))
3104 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3105
3106 return Reg;
3107}
3108
3109bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3110 MCRegister &Reg, unsigned &RegNum,
3111 unsigned &RegWidth,
3112 SmallVectorImpl<AsmToken> &Tokens) {
3113 auto Loc = getLoc();
3114 Reg = MCRegister();
3115
3116 if (isToken(AsmToken::Identifier)) {
3117 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3118 if (!Reg)
3119 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3120 } else {
3121 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3122 }
3123
3124 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3125 if (!Reg) {
3126 assert(Parser.hasPendingError());
3127 return false;
3128 }
3129
3130 if (!subtargetHasRegister(*TRI, Reg)) {
3131 if (Reg == AMDGPU::SGPR_NULL) {
3132 Error(Loc, "'null' operand is not supported on this GPU");
3133 } else {
3135 " register not available on this GPU");
3136 }
3137 return false;
3138 }
3139
3140 return true;
3141}
3142
3143bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3144 MCRegister &Reg, unsigned &RegNum,
3145 unsigned &RegWidth,
3146 bool RestoreOnFailure /*=false*/) {
3147 Reg = MCRegister();
3148
3150 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3151 if (RestoreOnFailure) {
3152 while (!Tokens.empty()) {
3153 getLexer().UnLex(Tokens.pop_back_val());
3154 }
3155 }
3156 return true;
3157 }
3158 return false;
3159}
3160
3161std::optional<StringRef>
3162AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3163 switch (RegKind) {
3164 case IS_VGPR:
3165 return StringRef(".amdgcn.next_free_vgpr");
3166 case IS_SGPR:
3167 return StringRef(".amdgcn.next_free_sgpr");
3168 default:
3169 return std::nullopt;
3170 }
3171}
3172
3173void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3174 auto SymbolName = getGprCountSymbolName(RegKind);
3175 assert(SymbolName && "initializing invalid register kind");
3176 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3178 Sym->setRedefinable(true);
3179}
3180
3181bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3182 unsigned DwordRegIndex,
3183 unsigned RegWidth) {
3184 // Symbols are only defined for GCN targets
3185 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3186 return true;
3187
3188 auto SymbolName = getGprCountSymbolName(RegKind);
3189 if (!SymbolName)
3190 return true;
3191 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3192
3193 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3194 int64_t OldCount;
3195
3196 if (!Sym->isVariable())
3197 return !Error(getLoc(),
3198 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3199 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3200 return !Error(
3201 getLoc(),
3202 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3203
3204 if (OldCount <= NewMax)
3206
3207 return true;
3208}
3209
3210std::unique_ptr<AMDGPUOperand>
3211AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3212 const auto &Tok = getToken();
3213 SMLoc StartLoc = Tok.getLoc();
3214 SMLoc EndLoc = Tok.getEndLoc();
3215 RegisterKind RegKind;
3216 MCRegister Reg;
3217 unsigned RegNum, RegWidth;
3218
3219 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3220 return nullptr;
3221 }
3222 if (isHsaAbi(getSTI())) {
3223 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3224 return nullptr;
3225 } else
3226 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3227 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3228}
3229
3230ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3231 bool HasSP3AbsModifier, LitModifier Lit) {
3232 // TODO: add syntactic sugar for 1/(2*PI)
3233
3234 if (isRegister() || isModifier())
3235 return ParseStatus::NoMatch;
3236
3237 if (Lit == LitModifier::None) {
3238 if (trySkipId("lit"))
3239 Lit = LitModifier::Lit;
3240 else if (trySkipId("lit64"))
3241 Lit = LitModifier::Lit64;
3242
3243 if (Lit != LitModifier::None) {
3244 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3245 return ParseStatus::Failure;
3246 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3247 if (S.isSuccess() &&
3248 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3249 return ParseStatus::Failure;
3250 return S;
3251 }
3252 }
3253
3254 const auto& Tok = getToken();
3255 const auto& NextTok = peekToken();
3256 bool IsReal = Tok.is(AsmToken::Real);
3257 SMLoc S = getLoc();
3258 bool Negate = false;
3259
3260 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3261 lex();
3262 IsReal = true;
3263 Negate = true;
3264 }
3265
3266 AMDGPUOperand::Modifiers Mods;
3267 Mods.Lit = Lit;
3268
3269 if (IsReal) {
3270 // Floating-point expressions are not supported.
3271 // Can only allow floating-point literals with an
3272 // optional sign.
3273
3274 StringRef Num = getTokenStr();
3275 lex();
3276
3277 APFloat RealVal(APFloat::IEEEdouble());
3278 auto roundMode = APFloat::rmNearestTiesToEven;
3279 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3280 return ParseStatus::Failure;
3281 if (Negate)
3282 RealVal.changeSign();
3283
3284 Operands.push_back(
3285 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3286 AMDGPUOperand::ImmTyNone, true));
3287 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3288 Op.setModifiers(Mods);
3289
3290 return ParseStatus::Success;
3291
3292 } else {
3293 int64_t IntVal;
3294 const MCExpr *Expr;
3295 SMLoc S = getLoc();
3296
3297 if (HasSP3AbsModifier) {
3298 // This is a workaround for handling expressions
3299 // as arguments of SP3 'abs' modifier, for example:
3300 // |1.0|
3301 // |-1|
3302 // |1+x|
3303 // This syntax is not compatible with syntax of standard
3304 // MC expressions (due to the trailing '|').
3305 SMLoc EndLoc;
3306 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3307 return ParseStatus::Failure;
3308 } else {
3309 if (Parser.parseExpression(Expr))
3310 return ParseStatus::Failure;
3311 }
3312
3313 if (Expr->evaluateAsAbsolute(IntVal)) {
3314 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3315 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3316 Op.setModifiers(Mods);
3317 } else {
3318 if (Lit != LitModifier::None)
3319 return ParseStatus::NoMatch;
3320 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3321 }
3322
3323 return ParseStatus::Success;
3324 }
3325
3326 return ParseStatus::NoMatch;
3327}
3328
3329ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3330 if (!isRegister())
3331 return ParseStatus::NoMatch;
3332
3333 if (auto R = parseRegister()) {
3334 assert(R->isReg());
3335 Operands.push_back(std::move(R));
3336 return ParseStatus::Success;
3337 }
3338 return ParseStatus::Failure;
3339}
3340
3341ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3342 bool HasSP3AbsMod, LitModifier Lit) {
3343 ParseStatus Res = parseReg(Operands);
3344 if (!Res.isNoMatch())
3345 return Res;
3346 if (isModifier())
3347 return ParseStatus::NoMatch;
3348 return parseImm(Operands, HasSP3AbsMod, Lit);
3349}
3350
3351bool
3352AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3353 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3354 const auto &str = Token.getString();
3355 return str == "abs" || str == "neg" || str == "sext";
3356 }
3357 return false;
3358}
3359
3360bool
3361AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3362 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3363}
3364
3365bool
3366AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3367 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3368}
3369
3370bool
3371AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3372 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3373}
3374
3375// Check if this is an operand modifier or an opcode modifier
3376// which may look like an expression but it is not. We should
3377// avoid parsing these modifiers as expressions. Currently
3378// recognized sequences are:
3379// |...|
3380// abs(...)
3381// neg(...)
3382// sext(...)
3383// -reg
3384// -|...|
3385// -abs(...)
3386// name:...
3387//
3388bool
3389AMDGPUAsmParser::isModifier() {
3390
3391 AsmToken Tok = getToken();
3392 AsmToken NextToken[2];
3393 peekTokens(NextToken);
3394
3395 return isOperandModifier(Tok, NextToken[0]) ||
3396 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3397 isOpcodeModifierWithVal(Tok, NextToken[0]);
3398}
3399
3400// Check if the current token is an SP3 'neg' modifier.
3401// Currently this modifier is allowed in the following context:
3402//
3403// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3404// 2. Before an 'abs' modifier: -abs(...)
3405// 3. Before an SP3 'abs' modifier: -|...|
3406//
3407// In all other cases "-" is handled as a part
3408// of an expression that follows the sign.
3409//
3410// Note: When "-" is followed by an integer literal,
3411// this is interpreted as integer negation rather
3412// than a floating-point NEG modifier applied to N.
3413// Beside being contr-intuitive, such use of floating-point
3414// NEG modifier would have resulted in different meaning
3415// of integer literals used with VOP1/2/C and VOP3,
3416// for example:
3417// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3418// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3419// Negative fp literals with preceding "-" are
3420// handled likewise for uniformity
3421//
3422bool
3423AMDGPUAsmParser::parseSP3NegModifier() {
3424
3425 AsmToken NextToken[2];
3426 peekTokens(NextToken);
3427
3428 if (isToken(AsmToken::Minus) &&
3429 (isRegister(NextToken[0], NextToken[1]) ||
3430 NextToken[0].is(AsmToken::Pipe) ||
3431 isId(NextToken[0], "abs"))) {
3432 lex();
3433 return true;
3434 }
3435
3436 return false;
3437}
3438
3439ParseStatus
3440AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3441 bool AllowImm) {
3442 bool Neg, SP3Neg;
3443 bool Abs, SP3Abs;
3444 SMLoc Loc;
3445
3446 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3447 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3448 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3449
3450 SP3Neg = parseSP3NegModifier();
3451
3452 Loc = getLoc();
3453 Neg = trySkipId("neg");
3454 if (Neg && SP3Neg)
3455 return Error(Loc, "expected register or immediate");
3456 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3457 return ParseStatus::Failure;
3458
3459 Abs = trySkipId("abs");
3460 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3461 return ParseStatus::Failure;
3462
3463 LitModifier Lit = LitModifier::None;
3464 if (trySkipId("lit")) {
3465 Lit = LitModifier::Lit;
3466 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3467 return ParseStatus::Failure;
3468 } else if (trySkipId("lit64")) {
3469 Lit = LitModifier::Lit64;
3470 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3471 return ParseStatus::Failure;
3472 if (!has64BitLiterals())
3473 return Error(Loc, "lit64 is not supported on this GPU");
3474 }
3475
3476 Loc = getLoc();
3477 SP3Abs = trySkipToken(AsmToken::Pipe);
3478 if (Abs && SP3Abs)
3479 return Error(Loc, "expected register or immediate");
3480
3481 ParseStatus Res;
3482 if (AllowImm) {
3483 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3484 } else {
3485 Res = parseReg(Operands);
3486 }
3487 if (!Res.isSuccess())
3488 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3490 : Res;
3491
3492 if (Lit != LitModifier::None && !Operands.back()->isImm())
3493 Error(Loc, "expected immediate with lit modifier");
3494
3495 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3496 return ParseStatus::Failure;
3497 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3498 return ParseStatus::Failure;
3499 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3500 return ParseStatus::Failure;
3501 if (Lit != LitModifier::None &&
3502 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3503 return ParseStatus::Failure;
3504
3505 AMDGPUOperand::Modifiers Mods;
3506 Mods.Abs = Abs || SP3Abs;
3507 Mods.Neg = Neg || SP3Neg;
3508 Mods.Lit = Lit;
3509
3510 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3511 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3512 if (Op.isExpr())
3513 return Error(Op.getStartLoc(), "expected an absolute expression");
3514 Op.setModifiers(Mods);
3515 }
3516 return ParseStatus::Success;
3517}
3518
3519ParseStatus
3520AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3521 bool AllowImm) {
3522 bool Sext = trySkipId("sext");
3523 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3524 return ParseStatus::Failure;
3525
3526 ParseStatus Res;
3527 if (AllowImm) {
3528 Res = parseRegOrImm(Operands);
3529 } else {
3530 Res = parseReg(Operands);
3531 }
3532 if (!Res.isSuccess())
3533 return Sext ? ParseStatus::Failure : Res;
3534
3535 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3536 return ParseStatus::Failure;
3537
3538 AMDGPUOperand::Modifiers Mods;
3539 Mods.Sext = Sext;
3540
3541 if (Mods.hasIntModifiers()) {
3542 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3543 if (Op.isExpr())
3544 return Error(Op.getStartLoc(), "expected an absolute expression");
3545 Op.setModifiers(Mods);
3546 }
3547
3548 return ParseStatus::Success;
3549}
3550
3551ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3552 return parseRegOrImmWithFPInputMods(Operands, false);
3553}
3554
3555ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3556 return parseRegOrImmWithIntInputMods(Operands, false);
3557}
3558
3559ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3560 auto Loc = getLoc();
3561 if (trySkipId("off")) {
3562 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3563 AMDGPUOperand::ImmTyOff, false));
3564 return ParseStatus::Success;
3565 }
3566
3567 if (!isRegister())
3568 return ParseStatus::NoMatch;
3569
3570 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3571 if (Reg) {
3572 Operands.push_back(std::move(Reg));
3573 return ParseStatus::Success;
3574 }
3575
3576 return ParseStatus::Failure;
3577}
3578
3579unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3580 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3581
3582 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3583 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3584 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3585 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3586 return Match_InvalidOperand;
3587
3588 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3589 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3590 // v_mac_f32/16 allow only dst_sel == DWORD;
3591 auto OpNum =
3592 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3593 const auto &Op = Inst.getOperand(OpNum);
3594 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3595 return Match_InvalidOperand;
3596 }
3597 }
3598
3599 // Asm can first try to match VOPD or VOPD3. By failing early here with
3600 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3601 // Checking later during validateInstruction does not give a chance to retry
3602 // parsing as a different encoding.
3603 if (tryAnotherVOPDEncoding(Inst))
3604 return Match_InvalidOperand;
3605
3606 return Match_Success;
3607}
3608
3618
3619// What asm variants we should check
3620ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3621 if (isForcedDPP() && isForcedVOP3()) {
3622 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3623 return ArrayRef(Variants);
3624 }
3625 if (getForcedEncodingSize() == 32) {
3626 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3627 return ArrayRef(Variants);
3628 }
3629
3630 if (isForcedVOP3()) {
3631 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3632 return ArrayRef(Variants);
3633 }
3634
3635 if (isForcedSDWA()) {
3636 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3638 return ArrayRef(Variants);
3639 }
3640
3641 if (isForcedDPP()) {
3642 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3643 return ArrayRef(Variants);
3644 }
3645
3646 return getAllVariants();
3647}
3648
3649StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3650 if (isForcedDPP() && isForcedVOP3())
3651 return "e64_dpp";
3652
3653 if (getForcedEncodingSize() == 32)
3654 return "e32";
3655
3656 if (isForcedVOP3())
3657 return "e64";
3658
3659 if (isForcedSDWA())
3660 return "sdwa";
3661
3662 if (isForcedDPP())
3663 return "dpp";
3664
3665 return "";
3666}
3667
3668MCRegister
3669AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3670 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3671 for (MCPhysReg Reg : Desc.implicit_uses()) {
3672 switch (Reg) {
3673 case AMDGPU::FLAT_SCR:
3674 case AMDGPU::VCC:
3675 case AMDGPU::VCC_LO:
3676 case AMDGPU::VCC_HI:
3677 case AMDGPU::M0:
3678 return Reg;
3679 default:
3680 break;
3681 }
3682 }
3683 return MCRegister();
3684}
3685
3686// NB: This code is correct only when used to check constant
3687// bus limitations because GFX7 support no f16 inline constants.
3688// Note that there are no cases when a GFX7 opcode violates
3689// constant bus limitations due to the use of an f16 constant.
3690bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3691 unsigned OpIdx) const {
3692 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3693
3696 return false;
3697 }
3698
3699 const MCOperand &MO = Inst.getOperand(OpIdx);
3700
3701 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3702 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3703
3704 switch (OpSize) { // expected operand size
3705 case 8:
3706 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3707 case 4:
3708 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3709 case 2: {
3710 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3713 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3714
3718
3722
3726
3729 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3730
3733 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3734
3736 return false;
3737
3738 llvm_unreachable("invalid operand type");
3739 }
3740 default:
3741 llvm_unreachable("invalid operand size");
3742 }
3743}
3744
3745unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3746 if (!isGFX10Plus())
3747 return 1;
3748
3749 switch (Opcode) {
3750 // 64-bit shift instructions can use only one scalar value input
3751 case AMDGPU::V_LSHLREV_B64_e64:
3752 case AMDGPU::V_LSHLREV_B64_gfx10:
3753 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3754 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3755 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3756 case AMDGPU::V_LSHRREV_B64_e64:
3757 case AMDGPU::V_LSHRREV_B64_gfx10:
3758 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3759 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3760 case AMDGPU::V_ASHRREV_I64_e64:
3761 case AMDGPU::V_ASHRREV_I64_gfx10:
3762 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3763 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3764 case AMDGPU::V_LSHL_B64_e64:
3765 case AMDGPU::V_LSHR_B64_e64:
3766 case AMDGPU::V_ASHR_I64_e64:
3767 return 1;
3768 default:
3769 return 2;
3770 }
3771}
3772
3773constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3775
3776// Get regular operand indices in the same order as specified
3777// in the instruction (but append mandatory literals to the end).
3779 bool AddMandatoryLiterals = false) {
3780
3781 int16_t ImmIdx =
3782 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3783
3784 if (isVOPD(Opcode)) {
3785 int16_t ImmXIdx =
3786 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3787
3788 return {getNamedOperandIdx(Opcode, OpName::src0X),
3789 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3790 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3791 getNamedOperandIdx(Opcode, OpName::src0Y),
3792 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3793 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3794 ImmXIdx,
3795 ImmIdx};
3796 }
3797
3798 return {getNamedOperandIdx(Opcode, OpName::src0),
3799 getNamedOperandIdx(Opcode, OpName::src1),
3800 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3801}
3802
3803bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3804 const MCOperand &MO = Inst.getOperand(OpIdx);
3805 if (MO.isImm())
3806 return !isInlineConstant(Inst, OpIdx);
3807 if (MO.isReg()) {
3808 auto Reg = MO.getReg();
3809 if (!Reg)
3810 return false;
3811 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3812 auto PReg = mc2PseudoReg(Reg);
3813 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3814 }
3815 return true;
3816}
3817
3818// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3819// Writelane is special in that it can use SGPR and M0 (which would normally
3820// count as using the constant bus twice - but in this case it is allowed since
3821// the lane selector doesn't count as a use of the constant bus). However, it is
3822// still required to abide by the 1 SGPR rule.
3823static bool checkWriteLane(const MCInst &Inst) {
3824 const unsigned Opcode = Inst.getOpcode();
3825 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3826 return false;
3827 const MCOperand &LaneSelOp = Inst.getOperand(2);
3828 if (!LaneSelOp.isReg())
3829 return false;
3830 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3831 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3832}
3833
3834bool AMDGPUAsmParser::validateConstantBusLimitations(
3835 const MCInst &Inst, const OperandVector &Operands) {
3836 const unsigned Opcode = Inst.getOpcode();
3837 const MCInstrDesc &Desc = MII.get(Opcode);
3838 MCRegister LastSGPR;
3839 unsigned ConstantBusUseCount = 0;
3840 unsigned NumLiterals = 0;
3841 unsigned LiteralSize;
3842
3843 if (!(Desc.TSFlags &
3846 !isVOPD(Opcode))
3847 return true;
3848
3849 if (checkWriteLane(Inst))
3850 return true;
3851
3852 // Check special imm operands (used by madmk, etc)
3853 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3854 ++NumLiterals;
3855 LiteralSize = 4;
3856 }
3857
3858 SmallDenseSet<MCRegister> SGPRsUsed;
3859 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3860 if (SGPRUsed) {
3861 SGPRsUsed.insert(SGPRUsed);
3862 ++ConstantBusUseCount;
3863 }
3864
3865 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3866
3867 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3868
3869 for (int OpIdx : OpIndices) {
3870 if (OpIdx == -1)
3871 continue;
3872
3873 const MCOperand &MO = Inst.getOperand(OpIdx);
3874 if (usesConstantBus(Inst, OpIdx)) {
3875 if (MO.isReg()) {
3876 LastSGPR = mc2PseudoReg(MO.getReg());
3877 // Pairs of registers with a partial intersections like these
3878 // s0, s[0:1]
3879 // flat_scratch_lo, flat_scratch
3880 // flat_scratch_lo, flat_scratch_hi
3881 // are theoretically valid but they are disabled anyway.
3882 // Note that this code mimics SIInstrInfo::verifyInstruction
3883 if (SGPRsUsed.insert(LastSGPR).second) {
3884 ++ConstantBusUseCount;
3885 }
3886 } else { // Expression or a literal
3887
3888 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3889 continue; // special operand like VINTERP attr_chan
3890
3891 // An instruction may use only one literal.
3892 // This has been validated on the previous step.
3893 // See validateVOPLiteral.
3894 // This literal may be used as more than one operand.
3895 // If all these operands are of the same size,
3896 // this literal counts as one scalar value.
3897 // Otherwise it counts as 2 scalar values.
3898 // See "GFX10 Shader Programming", section 3.6.2.3.
3899
3901 if (Size < 4)
3902 Size = 4;
3903
3904 if (NumLiterals == 0) {
3905 NumLiterals = 1;
3906 LiteralSize = Size;
3907 } else if (LiteralSize != Size) {
3908 NumLiterals = 2;
3909 }
3910 }
3911 }
3912
3913 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3914 Error(getOperandLoc(Operands, OpIdx),
3915 "invalid operand (violates constant bus restrictions)");
3916 return false;
3917 }
3918 }
3919 return true;
3920}
3921
3922std::optional<unsigned>
3923AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3924
3925 const unsigned Opcode = Inst.getOpcode();
3926 if (!isVOPD(Opcode))
3927 return {};
3928
3929 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3930
3931 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3932 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3933 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3934 ? Opr.getReg()
3935 : MCRegister();
3936 };
3937
3938 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3939 // source-cache.
3940 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3941 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3942 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3943 bool AllowSameVGPR = isGFX1250();
3944
3945 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3946 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3947 int I = getNamedOperandIdx(Opcode, OpName);
3948 const MCOperand &Op = Inst.getOperand(I);
3949 if (!Op.isImm())
3950 continue;
3951 int64_t Imm = Op.getImm();
3952 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3953 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3954 return (unsigned)I;
3955 }
3956
3957 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3958 OpName::vsrc2Y, OpName::imm}) {
3959 int I = getNamedOperandIdx(Opcode, OpName);
3960 if (I == -1)
3961 continue;
3962 const MCOperand &Op = Inst.getOperand(I);
3963 if (Op.isImm())
3964 return (unsigned)I;
3965 }
3966 }
3967
3968 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3969 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3970 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3971
3972 return InvalidCompOprIdx;
3973}
3974
3975bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3976 const OperandVector &Operands) {
3977
3978 unsigned Opcode = Inst.getOpcode();
3979 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3980
3981 if (AsVOPD3) {
3982 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3983 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3984 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3985 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3986 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3987 }
3988 }
3989
3990 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3991 if (!InvalidCompOprIdx.has_value())
3992 return true;
3993
3994 auto CompOprIdx = *InvalidCompOprIdx;
3995 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3996 auto ParsedIdx =
3997 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3998 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3999 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4000
4001 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4002 if (CompOprIdx == VOPD::Component::DST) {
4003 if (AsVOPD3)
4004 Error(Loc, "dst registers must be distinct");
4005 else
4006 Error(Loc, "one dst register must be even and the other odd");
4007 } else {
4008 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4009 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4010 " operands must use different VGPR banks");
4011 }
4012
4013 return false;
4014}
4015
4016// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4017// potentially used as VOPD3 with the same operands.
4018bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4019 // First check if it fits VOPD
4020 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4021 if (!InvalidCompOprIdx.has_value())
4022 return false;
4023
4024 // Then if it fits VOPD3
4025 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4026 if (InvalidCompOprIdx.has_value()) {
4027 // If failed operand is dst it is better to show error about VOPD3
4028 // instruction as it has more capabilities and error message will be
4029 // more informative. If the dst is not legal for VOPD3, then it is not
4030 // legal for VOPD either.
4031 if (*InvalidCompOprIdx == VOPD::Component::DST)
4032 return true;
4033
4034 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4035 // with a conflict in tied implicit src2 of fmac and no asm operand to
4036 // to point to.
4037 return false;
4038 }
4039 return true;
4040}
4041
4042// \returns true is a VOPD3 instruction can be also represented as a shorter
4043// VOPD encoding.
4044bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4045 const unsigned Opcode = Inst.getOpcode();
4046 const auto &II = getVOPDInstInfo(Opcode, &MII);
4047 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4048 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4049 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4050 return false;
4051
4052 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4053 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4054 // be parsed as VOPD which does not accept src2.
4055 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4056 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4057 return false;
4058
4059 // If any modifiers are set this cannot be VOPD.
4060 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4061 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4062 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4063 int I = getNamedOperandIdx(Opcode, OpName);
4064 if (I == -1)
4065 continue;
4066 if (Inst.getOperand(I).getImm())
4067 return false;
4068 }
4069
4070 return !tryVOPD3(Inst);
4071}
4072
4073// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4074// form but switch to VOPD3 otherwise.
4075bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4076 const unsigned Opcode = Inst.getOpcode();
4077 if (!isGFX1250() || !isVOPD(Opcode))
4078 return false;
4079
4080 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4081 return tryVOPD(Inst);
4082 return tryVOPD3(Inst);
4083}
4084
4085bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4086
4087 const unsigned Opc = Inst.getOpcode();
4088 const MCInstrDesc &Desc = MII.get(Opc);
4089
4090 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4091 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4092 assert(ClampIdx != -1);
4093 return Inst.getOperand(ClampIdx).getImm() == 0;
4094 }
4095
4096 return true;
4097}
4098
4101
4102bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4103
4104 const unsigned Opc = Inst.getOpcode();
4105 const MCInstrDesc &Desc = MII.get(Opc);
4106
4107 if ((Desc.TSFlags & MIMGFlags) == 0)
4108 return true;
4109
4110 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4111 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4112 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4113
4114 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4115 return true;
4116
4117 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4118 return true;
4119
4120 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4121 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4122 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4123 if (DMask == 0)
4124 DMask = 1;
4125
4126 bool IsPackedD16 = false;
4127 unsigned DataSize =
4128 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4129 if (hasPackedD16()) {
4130 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4131 IsPackedD16 = D16Idx >= 0;
4132 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4133 DataSize = (DataSize + 1) / 2;
4134 }
4135
4136 if ((VDataSize / 4) == DataSize + TFESize)
4137 return true;
4138
4139 StringRef Modifiers;
4140 if (isGFX90A())
4141 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4142 else
4143 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4144
4145 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4146 return false;
4147}
4148
4149bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4150 const unsigned Opc = Inst.getOpcode();
4151 const MCInstrDesc &Desc = MII.get(Opc);
4152
4153 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4154 return true;
4155
4156 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4157
4158 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4160 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4161 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4162 ? AMDGPU::OpName::srsrc
4163 : AMDGPU::OpName::rsrc;
4164 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4165 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4166 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4167
4168 assert(VAddr0Idx != -1);
4169 assert(SrsrcIdx != -1);
4170 assert(SrsrcIdx > VAddr0Idx);
4171
4172 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4173 if (BaseOpcode->BVH) {
4174 if (IsA16 == BaseOpcode->A16)
4175 return true;
4176 Error(IDLoc, "image address size does not match a16");
4177 return false;
4178 }
4179
4180 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4181 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4182 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4183 unsigned ActualAddrSize =
4184 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4185
4186 unsigned ExpectedAddrSize =
4187 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4188
4189 if (IsNSA) {
4190 if (hasPartialNSAEncoding() &&
4191 ExpectedAddrSize >
4193 int VAddrLastIdx = SrsrcIdx - 1;
4194 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4195
4196 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4197 }
4198 } else {
4199 if (ExpectedAddrSize > 12)
4200 ExpectedAddrSize = 16;
4201
4202 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4203 // This provides backward compatibility for assembly created
4204 // before 160b/192b/224b types were directly supported.
4205 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4206 return true;
4207 }
4208
4209 if (ActualAddrSize == ExpectedAddrSize)
4210 return true;
4211
4212 Error(IDLoc, "image address size does not match dim and a16");
4213 return false;
4214}
4215
4216bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4217
4218 const unsigned Opc = Inst.getOpcode();
4219 const MCInstrDesc &Desc = MII.get(Opc);
4220
4221 if ((Desc.TSFlags & MIMGFlags) == 0)
4222 return true;
4223 if (!Desc.mayLoad() || !Desc.mayStore())
4224 return true; // Not atomic
4225
4226 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4227 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4228
4229 // This is an incomplete check because image_atomic_cmpswap
4230 // may only use 0x3 and 0xf while other atomic operations
4231 // may use 0x1 and 0x3. However these limitations are
4232 // verified when we check that dmask matches dst size.
4233 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4234}
4235
4236bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4237
4238 const unsigned Opc = Inst.getOpcode();
4239 const MCInstrDesc &Desc = MII.get(Opc);
4240
4241 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4242 return true;
4243
4244 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4245 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4246
4247 // GATHER4 instructions use dmask in a different fashion compared to
4248 // other MIMG instructions. The only useful DMASK values are
4249 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4250 // (red,red,red,red) etc.) The ISA document doesn't mention
4251 // this.
4252 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4253}
4254
4255bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4256 const OperandVector &Operands) {
4257 if (!isGFX10Plus())
4258 return true;
4259
4260 const unsigned Opc = Inst.getOpcode();
4261 const MCInstrDesc &Desc = MII.get(Opc);
4262
4263 if ((Desc.TSFlags & MIMGFlags) == 0)
4264 return true;
4265
4266 // image_bvh_intersect_ray instructions do not have dim
4268 return true;
4269
4270 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4271 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4272 if (Op.isDim())
4273 return true;
4274 }
4275 return false;
4276}
4277
4278bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4279 const unsigned Opc = Inst.getOpcode();
4280 const MCInstrDesc &Desc = MII.get(Opc);
4281
4282 if ((Desc.TSFlags & MIMGFlags) == 0)
4283 return true;
4284
4285 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4286 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4288
4289 if (!BaseOpcode->MSAA)
4290 return true;
4291
4292 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4293 assert(DimIdx != -1);
4294
4295 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4296 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4297
4298 return DimInfo->MSAA;
4299}
4300
4301static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4302{
4303 switch (Opcode) {
4304 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4305 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4306 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4307 return true;
4308 default:
4309 return false;
4310 }
4311}
4312
4313// movrels* opcodes should only allow VGPRS as src0.
4314// This is specified in .td description for vop1/vop3,
4315// but sdwa is handled differently. See isSDWAOperand.
4316bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4317 const OperandVector &Operands) {
4318
4319 const unsigned Opc = Inst.getOpcode();
4320 const MCInstrDesc &Desc = MII.get(Opc);
4321
4322 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4323 return true;
4324
4325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4326 assert(Src0Idx != -1);
4327
4328 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4329 if (Src0.isReg()) {
4330 auto Reg = mc2PseudoReg(Src0.getReg());
4331 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4332 if (!isSGPR(Reg, TRI))
4333 return true;
4334 }
4335
4336 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4337 return false;
4338}
4339
4340bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4341 const OperandVector &Operands) {
4342
4343 const unsigned Opc = Inst.getOpcode();
4344
4345 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4346 return true;
4347
4348 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4349 assert(Src0Idx != -1);
4350
4351 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4352 if (!Src0.isReg())
4353 return true;
4354
4355 auto Reg = mc2PseudoReg(Src0.getReg());
4356 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4357 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4358 Error(getOperandLoc(Operands, Src0Idx),
4359 "source operand must be either a VGPR or an inline constant");
4360 return false;
4361 }
4362
4363 return true;
4364}
4365
4366bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4367 const OperandVector &Operands) {
4368 unsigned Opcode = Inst.getOpcode();
4369 const MCInstrDesc &Desc = MII.get(Opcode);
4370
4371 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4372 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4373 return true;
4374
4375 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4376 if (Src2Idx == -1)
4377 return true;
4378
4379 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4380 Error(getOperandLoc(Operands, Src2Idx),
4381 "inline constants are not allowed for this operand");
4382 return false;
4383 }
4384
4385 return true;
4386}
4387
4388bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4389 const OperandVector &Operands) {
4390 const unsigned Opc = Inst.getOpcode();
4391 const MCInstrDesc &Desc = MII.get(Opc);
4392
4393 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4394 return true;
4395
4396 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4397 if (BlgpIdx != -1) {
4398 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4399 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4400
4401 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4402 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4403
4404 // Validate the correct register size was used for the floating point
4405 // format operands
4406
4407 bool Success = true;
4408 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4409 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4410 Error(getOperandLoc(Operands, Src0Idx),
4411 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4412 Success = false;
4413 }
4414
4415 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4416 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4417 Error(getOperandLoc(Operands, Src1Idx),
4418 "wrong register tuple size for blgp value " + Twine(BLGP));
4419 Success = false;
4420 }
4421
4422 return Success;
4423 }
4424 }
4425
4426 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4427 if (Src2Idx == -1)
4428 return true;
4429
4430 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4431 if (!Src2.isReg())
4432 return true;
4433
4434 MCRegister Src2Reg = Src2.getReg();
4435 MCRegister DstReg = Inst.getOperand(0).getReg();
4436 if (Src2Reg == DstReg)
4437 return true;
4438
4439 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4440 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4441 .getSizeInBits() <= 128)
4442 return true;
4443
4444 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4445 Error(getOperandLoc(Operands, Src2Idx),
4446 "source 2 operand must not partially overlap with dst");
4447 return false;
4448 }
4449
4450 return true;
4451}
4452
4453bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4454 switch (Inst.getOpcode()) {
4455 default:
4456 return true;
4457 case V_DIV_SCALE_F32_gfx6_gfx7:
4458 case V_DIV_SCALE_F32_vi:
4459 case V_DIV_SCALE_F32_gfx10:
4460 case V_DIV_SCALE_F64_gfx6_gfx7:
4461 case V_DIV_SCALE_F64_vi:
4462 case V_DIV_SCALE_F64_gfx10:
4463 break;
4464 }
4465
4466 // TODO: Check that src0 = src1 or src2.
4467
4468 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4469 AMDGPU::OpName::src2_modifiers,
4470 AMDGPU::OpName::src2_modifiers}) {
4471 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4472 .getImm() &
4474 return false;
4475 }
4476 }
4477
4478 return true;
4479}
4480
4481bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4482
4483 const unsigned Opc = Inst.getOpcode();
4484 const MCInstrDesc &Desc = MII.get(Opc);
4485
4486 if ((Desc.TSFlags & MIMGFlags) == 0)
4487 return true;
4488
4489 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4490 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4491 if (isCI() || isSI())
4492 return false;
4493 }
4494
4495 return true;
4496}
4497
4498bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4499 const unsigned Opc = Inst.getOpcode();
4500 const MCInstrDesc &Desc = MII.get(Opc);
4501
4502 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4503 return true;
4504
4505 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4506
4507 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4508}
4509
4510static bool IsRevOpcode(const unsigned Opcode)
4511{
4512 switch (Opcode) {
4513 case AMDGPU::V_SUBREV_F32_e32:
4514 case AMDGPU::V_SUBREV_F32_e64:
4515 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4516 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4517 case AMDGPU::V_SUBREV_F32_e32_vi:
4518 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4519 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4520 case AMDGPU::V_SUBREV_F32_e64_vi:
4521
4522 case AMDGPU::V_SUBREV_CO_U32_e32:
4523 case AMDGPU::V_SUBREV_CO_U32_e64:
4524 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4525 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4526
4527 case AMDGPU::V_SUBBREV_U32_e32:
4528 case AMDGPU::V_SUBBREV_U32_e64:
4529 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4530 case AMDGPU::V_SUBBREV_U32_e32_vi:
4531 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4532 case AMDGPU::V_SUBBREV_U32_e64_vi:
4533
4534 case AMDGPU::V_SUBREV_U32_e32:
4535 case AMDGPU::V_SUBREV_U32_e64:
4536 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4537 case AMDGPU::V_SUBREV_U32_e32_vi:
4538 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4539 case AMDGPU::V_SUBREV_U32_e64_vi:
4540
4541 case AMDGPU::V_SUBREV_F16_e32:
4542 case AMDGPU::V_SUBREV_F16_e64:
4543 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4544 case AMDGPU::V_SUBREV_F16_e32_vi:
4545 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4546 case AMDGPU::V_SUBREV_F16_e64_vi:
4547
4548 case AMDGPU::V_SUBREV_U16_e32:
4549 case AMDGPU::V_SUBREV_U16_e64:
4550 case AMDGPU::V_SUBREV_U16_e32_vi:
4551 case AMDGPU::V_SUBREV_U16_e64_vi:
4552
4553 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4554 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4555 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4556
4557 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4558 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4559
4560 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4561 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4562
4563 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4564 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4565
4566 case AMDGPU::V_LSHRREV_B32_e32:
4567 case AMDGPU::V_LSHRREV_B32_e64:
4568 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4569 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4570 case AMDGPU::V_LSHRREV_B32_e32_vi:
4571 case AMDGPU::V_LSHRREV_B32_e64_vi:
4572 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4573 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4574
4575 case AMDGPU::V_ASHRREV_I32_e32:
4576 case AMDGPU::V_ASHRREV_I32_e64:
4577 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4578 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4579 case AMDGPU::V_ASHRREV_I32_e32_vi:
4580 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4581 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4582 case AMDGPU::V_ASHRREV_I32_e64_vi:
4583
4584 case AMDGPU::V_LSHLREV_B32_e32:
4585 case AMDGPU::V_LSHLREV_B32_e64:
4586 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4587 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4588 case AMDGPU::V_LSHLREV_B32_e32_vi:
4589 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4590 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4591 case AMDGPU::V_LSHLREV_B32_e64_vi:
4592
4593 case AMDGPU::V_LSHLREV_B16_e32:
4594 case AMDGPU::V_LSHLREV_B16_e64:
4595 case AMDGPU::V_LSHLREV_B16_e32_vi:
4596 case AMDGPU::V_LSHLREV_B16_e64_vi:
4597 case AMDGPU::V_LSHLREV_B16_gfx10:
4598
4599 case AMDGPU::V_LSHRREV_B16_e32:
4600 case AMDGPU::V_LSHRREV_B16_e64:
4601 case AMDGPU::V_LSHRREV_B16_e32_vi:
4602 case AMDGPU::V_LSHRREV_B16_e64_vi:
4603 case AMDGPU::V_LSHRREV_B16_gfx10:
4604
4605 case AMDGPU::V_ASHRREV_I16_e32:
4606 case AMDGPU::V_ASHRREV_I16_e64:
4607 case AMDGPU::V_ASHRREV_I16_e32_vi:
4608 case AMDGPU::V_ASHRREV_I16_e64_vi:
4609 case AMDGPU::V_ASHRREV_I16_gfx10:
4610
4611 case AMDGPU::V_LSHLREV_B64_e64:
4612 case AMDGPU::V_LSHLREV_B64_gfx10:
4613 case AMDGPU::V_LSHLREV_B64_vi:
4614
4615 case AMDGPU::V_LSHRREV_B64_e64:
4616 case AMDGPU::V_LSHRREV_B64_gfx10:
4617 case AMDGPU::V_LSHRREV_B64_vi:
4618
4619 case AMDGPU::V_ASHRREV_I64_e64:
4620 case AMDGPU::V_ASHRREV_I64_gfx10:
4621 case AMDGPU::V_ASHRREV_I64_vi:
4622
4623 case AMDGPU::V_PK_LSHLREV_B16:
4624 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4625 case AMDGPU::V_PK_LSHLREV_B16_vi:
4626
4627 case AMDGPU::V_PK_LSHRREV_B16:
4628 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4629 case AMDGPU::V_PK_LSHRREV_B16_vi:
4630 case AMDGPU::V_PK_ASHRREV_I16:
4631 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4632 case AMDGPU::V_PK_ASHRREV_I16_vi:
4633 return true;
4634 default:
4635 return false;
4636 }
4637}
4638
4639bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4640 const OperandVector &Operands) {
4641 using namespace SIInstrFlags;
4642 const unsigned Opcode = Inst.getOpcode();
4643 const MCInstrDesc &Desc = MII.get(Opcode);
4644
4645 // lds_direct register is defined so that it can be used
4646 // with 9-bit operands only. Ignore encodings which do not accept these.
4647 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4648 if ((Desc.TSFlags & Enc) == 0)
4649 return true;
4650
4651 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4652 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4653 if (SrcIdx == -1)
4654 break;
4655 const auto &Src = Inst.getOperand(SrcIdx);
4656 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4657
4658 if (isGFX90A() || isGFX11Plus()) {
4659 Error(getOperandLoc(Operands, SrcIdx),
4660 "lds_direct is not supported on this GPU");
4661 return false;
4662 }
4663
4664 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4665 Error(getOperandLoc(Operands, SrcIdx),
4666 "lds_direct cannot be used with this instruction");
4667 return false;
4668 }
4669
4670 if (SrcName != OpName::src0) {
4671 Error(getOperandLoc(Operands, SrcIdx),
4672 "lds_direct may be used as src0 only");
4673 return false;
4674 }
4675 }
4676 }
4677
4678 return true;
4679}
4680
4681SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4682 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4683 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4684 if (Op.isFlatOffset())
4685 return Op.getStartLoc();
4686 }
4687 return getLoc();
4688}
4689
4690bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4691 const OperandVector &Operands) {
4692 auto Opcode = Inst.getOpcode();
4693 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4694 if (OpNum == -1)
4695 return true;
4696
4697 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4698 if ((TSFlags & SIInstrFlags::FLAT))
4699 return validateFlatOffset(Inst, Operands);
4700
4701 if ((TSFlags & SIInstrFlags::SMRD))
4702 return validateSMEMOffset(Inst, Operands);
4703
4704 const auto &Op = Inst.getOperand(OpNum);
4705 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4706 if (isGFX12Plus() &&
4707 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4708 const unsigned OffsetSize = 24;
4709 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4710 Error(getFlatOffsetLoc(Operands),
4711 Twine("expected a ") + Twine(OffsetSize - 1) +
4712 "-bit unsigned offset for buffer ops");
4713 return false;
4714 }
4715 } else {
4716 const unsigned OffsetSize = 16;
4717 if (!isUIntN(OffsetSize, Op.getImm())) {
4718 Error(getFlatOffsetLoc(Operands),
4719 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4720 return false;
4721 }
4722 }
4723 return true;
4724}
4725
4726bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4727 const OperandVector &Operands) {
4728 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4729 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4730 return true;
4731
4732 auto Opcode = Inst.getOpcode();
4733 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4734 assert(OpNum != -1);
4735
4736 const auto &Op = Inst.getOperand(OpNum);
4737 if (!hasFlatOffsets() && Op.getImm() != 0) {
4738 Error(getFlatOffsetLoc(Operands),
4739 "flat offset modifier is not supported on this GPU");
4740 return false;
4741 }
4742
4743 // For pre-GFX12 FLAT instructions the offset must be positive;
4744 // MSB is ignored and forced to zero.
4745 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4746 bool AllowNegative =
4748 isGFX12Plus();
4749 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4750 Error(getFlatOffsetLoc(Operands),
4751 Twine("expected a ") +
4752 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4753 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4754 return false;
4755 }
4756
4757 return true;
4758}
4759
4760SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4761 // Start with second operand because SMEM Offset cannot be dst or src0.
4762 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4763 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4764 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4765 return Op.getStartLoc();
4766 }
4767 return getLoc();
4768}
4769
4770bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4771 const OperandVector &Operands) {
4772 if (isCI() || isSI())
4773 return true;
4774
4775 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4776 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4777 return true;
4778
4779 auto Opcode = Inst.getOpcode();
4780 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4781 if (OpNum == -1)
4782 return true;
4783
4784 const auto &Op = Inst.getOperand(OpNum);
4785 if (!Op.isImm())
4786 return true;
4787
4788 uint64_t Offset = Op.getImm();
4789 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4792 return true;
4793
4794 Error(getSMEMOffsetLoc(Operands),
4795 isGFX12Plus() && IsBuffer
4796 ? "expected a 23-bit unsigned offset for buffer ops"
4797 : isGFX12Plus() ? "expected a 24-bit signed offset"
4798 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4799 : "expected a 21-bit signed offset");
4800
4801 return false;
4802}
4803
4804bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4805 const OperandVector &Operands) {
4806 unsigned Opcode = Inst.getOpcode();
4807 const MCInstrDesc &Desc = MII.get(Opcode);
4808 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4809 return true;
4810
4811 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4812 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4813
4814 const int OpIndices[] = { Src0Idx, Src1Idx };
4815
4816 unsigned NumExprs = 0;
4817 unsigned NumLiterals = 0;
4818 int64_t LiteralValue;
4819
4820 for (int OpIdx : OpIndices) {
4821 if (OpIdx == -1) break;
4822
4823 const MCOperand &MO = Inst.getOperand(OpIdx);
4824 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4826 bool IsLit = false;
4827 std::optional<int64_t> Imm;
4828 if (MO.isImm()) {
4829 Imm = MO.getImm();
4830 } else if (MO.isExpr()) {
4831 if (isLitExpr(MO.getExpr())) {
4832 IsLit = true;
4833 Imm = getLitValue(MO.getExpr());
4834 }
4835 } else {
4836 continue;
4837 }
4838
4839 if (!Imm.has_value()) {
4840 ++NumExprs;
4841 } else if (!isInlineConstant(Inst, OpIdx)) {
4842 auto OpType = static_cast<AMDGPU::OperandType>(
4843 Desc.operands()[OpIdx].OperandType);
4844 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4845 if (NumLiterals == 0 || LiteralValue != Value) {
4847 ++NumLiterals;
4848 }
4849 }
4850 }
4851 }
4852
4853 if (NumLiterals + NumExprs <= 1)
4854 return true;
4855
4856 Error(getOperandLoc(Operands, Src1Idx),
4857 "only one unique literal operand is allowed");
4858 return false;
4859}
4860
4861bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4862 const unsigned Opc = Inst.getOpcode();
4863 if (isPermlane16(Opc)) {
4864 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4865 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4866
4867 if (OpSel & ~3)
4868 return false;
4869 }
4870
4871 uint64_t TSFlags = MII.get(Opc).TSFlags;
4872
4873 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4874 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4875 if (OpSelIdx != -1) {
4876 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4877 return false;
4878 }
4879 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4880 if (OpSelHiIdx != -1) {
4881 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4882 return false;
4883 }
4884 }
4885
4886 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4887 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4888 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4889 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4890 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4891 if (OpSel & 3)
4892 return false;
4893 }
4894
4895 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4896 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4897 // the first SGPR and use it for both the low and high operations.
4898 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4899 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4900 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4901 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4902 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4903
4904 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4905 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4906 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4907 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4908
4909 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4910
4911 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4912 unsigned Mask = 1U << Index;
4913 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4914 };
4915
4916 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4917 !VerifyOneSGPR(/*Index=*/0))
4918 return false;
4919 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4920 !VerifyOneSGPR(/*Index=*/1))
4921 return false;
4922
4923 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4924 if (Src2Idx != -1) {
4925 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4926 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4927 !VerifyOneSGPR(/*Index=*/2))
4928 return false;
4929 }
4930 }
4931
4932 return true;
4933}
4934
4935bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4936 if (!hasTrue16Insts())
4937 return true;
4938 const MCRegisterInfo *MRI = getMRI();
4939 const unsigned Opc = Inst.getOpcode();
4940 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4941 if (OpSelIdx == -1)
4942 return true;
4943 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4944 // If the value is 0 we could have a default OpSel Operand, so conservatively
4945 // allow it.
4946 if (OpSelOpValue == 0)
4947 return true;
4948 unsigned OpCount = 0;
4949 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4950 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4951 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4952 if (OpIdx == -1)
4953 continue;
4954 const MCOperand &Op = Inst.getOperand(OpIdx);
4955 if (Op.isReg() &&
4956 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4957 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4958 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4959 if (OpSelOpIsHi != VGPRSuffixIsHi)
4960 return false;
4961 }
4962 ++OpCount;
4963 }
4964
4965 return true;
4966}
4967
4968bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4969 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4970
4971 const unsigned Opc = Inst.getOpcode();
4972 uint64_t TSFlags = MII.get(Opc).TSFlags;
4973
4974 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4975 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4976 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4977 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4978 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4979 !(TSFlags & SIInstrFlags::IsSWMMAC))
4980 return true;
4981
4982 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4983 if (NegIdx == -1)
4984 return true;
4985
4986 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4987
4988 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4989 // on some src operands but not allowed on other.
4990 // It is convenient that such instructions don't have src_modifiers operand
4991 // for src operands that don't allow neg because they also don't allow opsel.
4992
4993 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4994 AMDGPU::OpName::src1_modifiers,
4995 AMDGPU::OpName::src2_modifiers};
4996
4997 for (unsigned i = 0; i < 3; ++i) {
4998 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4999 if (Neg & (1 << i))
5000 return false;
5001 }
5002 }
5003
5004 return true;
5005}
5006
5007bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5008 const OperandVector &Operands) {
5009 const unsigned Opc = Inst.getOpcode();
5010 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5011 if (DppCtrlIdx >= 0) {
5012 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5013
5014 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5015 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5016 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5017 // only on GFX12.
5018 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5019 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5020 : "DP ALU dpp only supports row_newbcast");
5021 return false;
5022 }
5023 }
5024
5025 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5026 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5027
5028 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5029 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5030 if (Src1Idx >= 0) {
5031 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5032 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5033 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5034 Error(getOperandLoc(Operands, Src1Idx),
5035 "invalid operand for instruction");
5036 return false;
5037 }
5038 if (Src1.isImm()) {
5039 Error(getInstLoc(Operands),
5040 "src1 immediate operand invalid for instruction");
5041 return false;
5042 }
5043 }
5044 }
5045
5046 return true;
5047}
5048
5049// Check if VCC register matches wavefront size
5050bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5051 return (Reg == AMDGPU::VCC && isWave64()) ||
5052 (Reg == AMDGPU::VCC_LO && isWave32());
5053}
5054
5055// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5056bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5057 const OperandVector &Operands) {
5058 unsigned Opcode = Inst.getOpcode();
5059 const MCInstrDesc &Desc = MII.get(Opcode);
5060 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5061 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5062 !HasMandatoryLiteral && !isVOPD(Opcode))
5063 return true;
5064
5065 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5066
5067 std::optional<unsigned> LiteralOpIdx;
5068 std::optional<uint64_t> LiteralValue;
5069
5070 for (int OpIdx : OpIndices) {
5071 if (OpIdx == -1)
5072 continue;
5073
5074 const MCOperand &MO = Inst.getOperand(OpIdx);
5075 if (!MO.isImm() && !MO.isExpr())
5076 continue;
5077 if (!isSISrcOperand(Desc, OpIdx))
5078 continue;
5079
5080 std::optional<int64_t> Imm;
5081 if (MO.isImm())
5082 Imm = MO.getImm();
5083 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5084 Imm = getLitValue(MO.getExpr());
5085
5086 bool IsAnotherLiteral = false;
5087 if (!Imm.has_value()) {
5088 // Literal value not known, so we conservately assume it's different.
5089 IsAnotherLiteral = true;
5090 } else if (!isInlineConstant(Inst, OpIdx)) {
5091 uint64_t Value = *Imm;
5092 bool IsForcedFP64 =
5093 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5095 HasMandatoryLiteral);
5096 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5097 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5098 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5099
5100 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5101 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5102 Error(getOperandLoc(Operands, OpIdx),
5103 "invalid operand for instruction");
5104 return false;
5105 }
5106
5107 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5108 Value = Hi_32(Value);
5109
5110 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5112 }
5113
5114 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5115 !getFeatureBits()[FeatureVOP3Literal]) {
5116 Error(getOperandLoc(Operands, OpIdx),
5117 "literal operands are not supported");
5118 return false;
5119 }
5120
5121 if (LiteralOpIdx && IsAnotherLiteral) {
5122 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5123 getOperandLoc(Operands, *LiteralOpIdx)),
5124 "only one unique literal operand is allowed");
5125 return false;
5126 }
5127
5128 if (IsAnotherLiteral)
5129 LiteralOpIdx = OpIdx;
5130 }
5131
5132 return true;
5133}
5134
5135// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5136static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5137 const MCRegisterInfo *MRI) {
5138 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5139 if (OpIdx < 0)
5140 return -1;
5141
5142 const MCOperand &Op = Inst.getOperand(OpIdx);
5143 if (!Op.isReg())
5144 return -1;
5145
5146 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5147 auto Reg = Sub ? Sub : Op.getReg();
5148 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5149 return AGPR32.contains(Reg) ? 1 : 0;
5150}
5151
5152bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5153 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5154 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5156 SIInstrFlags::DS)) == 0)
5157 return true;
5158
5159 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5160 ? AMDGPU::OpName::data0
5161 : AMDGPU::OpName::vdata;
5162
5163 const MCRegisterInfo *MRI = getMRI();
5164 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5165 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5166
5167 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5168 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5169 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5170 return false;
5171 }
5172
5173 auto FB = getFeatureBits();
5174 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5175 if (DataAreg < 0 || DstAreg < 0)
5176 return true;
5177 return DstAreg == DataAreg;
5178 }
5179
5180 return DstAreg < 1 && DataAreg < 1;
5181}
5182
5183bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5184 auto FB = getFeatureBits();
5185 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5186 return true;
5187
5188 unsigned Opc = Inst.getOpcode();
5189 const MCRegisterInfo *MRI = getMRI();
5190 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5191 // unaligned VGPR. All others only allow even aligned VGPRs.
5192 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5193 return true;
5194
5195 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5196 switch (Opc) {
5197 default:
5198 break;
5199 case AMDGPU::DS_LOAD_TR6_B96:
5200 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5201 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5202 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5203 return true;
5204 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5205 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5206 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5207 // allows unaligned VGPR for vdst, but other operands still only allow
5208 // even aligned VGPRs.
5209 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5210 if (VAddrIdx != -1) {
5211 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5212 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5213 if ((Sub - AMDGPU::VGPR0) & 1)
5214 return false;
5215 }
5216 return true;
5217 }
5218 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5219 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5220 return true;
5221 }
5222 }
5223
5224 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5225 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5226 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5227 const MCOperand &Op = Inst.getOperand(I);
5228 if (!Op.isReg())
5229 continue;
5230
5231 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5232 if (!Sub)
5233 continue;
5234
5235 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5236 return false;
5237 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5238 return false;
5239 }
5240
5241 return true;
5242}
5243
5244SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5245 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5247 if (Op.isBLGP())
5248 return Op.getStartLoc();
5249 }
5250 return SMLoc();
5251}
5252
5253bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5254 const OperandVector &Operands) {
5255 unsigned Opc = Inst.getOpcode();
5256 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5257 if (BlgpIdx == -1)
5258 return true;
5259 SMLoc BLGPLoc = getBLGPLoc(Operands);
5260 if (!BLGPLoc.isValid())
5261 return true;
5262 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5263 auto FB = getFeatureBits();
5264 bool UsesNeg = false;
5265 if (FB[AMDGPU::FeatureGFX940Insts]) {
5266 switch (Opc) {
5267 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5268 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5269 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5270 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5271 UsesNeg = true;
5272 }
5273 }
5274
5275 if (IsNeg == UsesNeg)
5276 return true;
5277
5278 Error(BLGPLoc,
5279 UsesNeg ? "invalid modifier: blgp is not supported"
5280 : "invalid modifier: neg is not supported");
5281
5282 return false;
5283}
5284
5285bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5286 const OperandVector &Operands) {
5287 if (!isGFX11Plus())
5288 return true;
5289
5290 unsigned Opc = Inst.getOpcode();
5291 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5292 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5293 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5294 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5295 return true;
5296
5297 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5298 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5299 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5300 if (Reg == AMDGPU::SGPR_NULL)
5301 return true;
5302
5303 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5304 return false;
5305}
5306
5307bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5308 const OperandVector &Operands) {
5309 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5310 if ((TSFlags & SIInstrFlags::DS) == 0)
5311 return true;
5312 if (TSFlags & SIInstrFlags::GWS)
5313 return validateGWS(Inst, Operands);
5314 // Only validate GDS for non-GWS instructions.
5315 if (hasGDS())
5316 return true;
5317 int GDSIdx =
5318 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5319 if (GDSIdx < 0)
5320 return true;
5321 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5322 if (GDS) {
5323 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5324 Error(S, "gds modifier is not supported on this GPU");
5325 return false;
5326 }
5327 return true;
5328}
5329
5330// gfx90a has an undocumented limitation:
5331// DS_GWS opcodes must use even aligned registers.
5332bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5333 const OperandVector &Operands) {
5334 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5335 return true;
5336
5337 int Opc = Inst.getOpcode();
5338 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5339 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5340 return true;
5341
5342 const MCRegisterInfo *MRI = getMRI();
5343 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5344 int Data0Pos =
5345 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5346 assert(Data0Pos != -1);
5347 auto Reg = Inst.getOperand(Data0Pos).getReg();
5348 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5349 if (RegIdx & 1) {
5350 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5351 return false;
5352 }
5353
5354 return true;
5355}
5356
5357bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5358 const OperandVector &Operands,
5359 SMLoc IDLoc) {
5360 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5361 AMDGPU::OpName::cpol);
5362 if (CPolPos == -1)
5363 return true;
5364
5365 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5366
5367 if (!isGFX1250()) {
5368 if (CPol & CPol::SCAL) {
5369 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5370 StringRef CStr(S.getPointer());
5371 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5372 Error(S, "scale_offset is not supported on this GPU");
5373 }
5374 if (CPol & CPol::NV) {
5375 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5376 StringRef CStr(S.getPointer());
5377 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5378 Error(S, "nv is not supported on this GPU");
5379 }
5380 }
5381
5382 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5383 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5384 StringRef CStr(S.getPointer());
5385 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5386 Error(S, "scale_offset is not supported for this instruction");
5387 }
5388
5389 if (isGFX12Plus())
5390 return validateTHAndScopeBits(Inst, Operands, CPol);
5391
5392 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5393 if (TSFlags & SIInstrFlags::SMRD) {
5394 if (CPol && (isSI() || isCI())) {
5395 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5396 Error(S, "cache policy is not supported for SMRD instructions");
5397 return false;
5398 }
5399 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5400 Error(IDLoc, "invalid cache policy for SMEM instruction");
5401 return false;
5402 }
5403 }
5404
5405 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5406 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5409 if (!(TSFlags & AllowSCCModifier)) {
5410 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5411 StringRef CStr(S.getPointer());
5412 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5413 Error(S,
5414 "scc modifier is not supported for this instruction on this GPU");
5415 return false;
5416 }
5417 }
5418
5420 return true;
5421
5422 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5423 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5424 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5425 : "instruction must use glc");
5426 return false;
5427 }
5428 } else {
5429 if (CPol & CPol::GLC) {
5430 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5431 StringRef CStr(S.getPointer());
5433 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5434 Error(S, isGFX940() ? "instruction must not use sc0"
5435 : "instruction must not use glc");
5436 return false;
5437 }
5438 }
5439
5440 return true;
5441}
5442
5443bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5444 const OperandVector &Operands,
5445 const unsigned CPol) {
5446 const unsigned TH = CPol & AMDGPU::CPol::TH;
5447 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5448
5449 const unsigned Opcode = Inst.getOpcode();
5450 const MCInstrDesc &TID = MII.get(Opcode);
5451
5452 auto PrintError = [&](StringRef Msg) {
5453 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5454 Error(S, Msg);
5455 return false;
5456 };
5457
5458 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5461 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5462
5463 if (TH == 0)
5464 return true;
5465
5466 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5467 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5468 (TH == AMDGPU::CPol::TH_NT_HT)))
5469 return PrintError("invalid th value for SMEM instruction");
5470
5471 if (TH == AMDGPU::CPol::TH_BYPASS) {
5472 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5474 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5476 return PrintError("scope and th combination is not valid");
5477 }
5478
5479 unsigned THType = AMDGPU::getTemporalHintType(TID);
5480 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5481 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5482 return PrintError("invalid th value for atomic instructions");
5483 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5484 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5485 return PrintError("invalid th value for store instructions");
5486 } else {
5487 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5488 return PrintError("invalid th value for load instructions");
5489 }
5490
5491 return true;
5492}
5493
5494bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5495 const OperandVector &Operands) {
5496 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5497 if (Desc.mayStore() &&
5499 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5500 if (Loc != getInstLoc(Operands)) {
5501 Error(Loc, "TFE modifier has no meaning for store instructions");
5502 return false;
5503 }
5504 }
5505
5506 return true;
5507}
5508
5509bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5510 const OperandVector &Operands) {
5511 unsigned Opc = Inst.getOpcode();
5512 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5513 const MCInstrDesc &Desc = MII.get(Opc);
5514
5515 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5516 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5517 if (FmtIdx == -1)
5518 return true;
5519 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5520 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5521 unsigned RegSize =
5522 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5523 .getSizeInBits();
5524
5526 return true;
5527
5528 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5529 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5530 "MATRIX_FMT_FP4"};
5531
5532 Error(getOperandLoc(Operands, SrcIdx),
5533 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5534 return false;
5535 };
5536
5537 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5538 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5539}
5540
5541bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5542 const OperandVector &Operands) {
5543 if (!validateLdsDirect(Inst, Operands))
5544 return false;
5545 if (!validateTrue16OpSel(Inst)) {
5546 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5547 "op_sel operand conflicts with 16-bit operand suffix");
5548 return false;
5549 }
5550 if (!validateSOPLiteral(Inst, Operands))
5551 return false;
5552 if (!validateVOPLiteral(Inst, Operands)) {
5553 return false;
5554 }
5555 if (!validateConstantBusLimitations(Inst, Operands)) {
5556 return false;
5557 }
5558 if (!validateVOPD(Inst, Operands)) {
5559 return false;
5560 }
5561 if (!validateIntClampSupported(Inst)) {
5562 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5563 "integer clamping is not supported on this GPU");
5564 return false;
5565 }
5566 if (!validateOpSel(Inst)) {
5567 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5568 "invalid op_sel operand");
5569 return false;
5570 }
5571 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5572 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5573 "invalid neg_lo operand");
5574 return false;
5575 }
5576 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5577 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5578 "invalid neg_hi operand");
5579 return false;
5580 }
5581 if (!validateDPP(Inst, Operands)) {
5582 return false;
5583 }
5584 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5585 if (!validateMIMGD16(Inst)) {
5586 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5587 "d16 modifier is not supported on this GPU");
5588 return false;
5589 }
5590 if (!validateMIMGDim(Inst, Operands)) {
5591 Error(IDLoc, "missing dim operand");
5592 return false;
5593 }
5594 if (!validateTensorR128(Inst)) {
5595 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5596 "instruction must set modifier r128=0");
5597 return false;
5598 }
5599 if (!validateMIMGMSAA(Inst)) {
5600 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5601 "invalid dim; must be MSAA type");
5602 return false;
5603 }
5604 if (!validateMIMGDataSize(Inst, IDLoc)) {
5605 return false;
5606 }
5607 if (!validateMIMGAddrSize(Inst, IDLoc))
5608 return false;
5609 if (!validateMIMGAtomicDMask(Inst)) {
5610 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5611 "invalid atomic image dmask");
5612 return false;
5613 }
5614 if (!validateMIMGGatherDMask(Inst)) {
5615 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5616 "invalid image_gather dmask: only one bit must be set");
5617 return false;
5618 }
5619 if (!validateMovrels(Inst, Operands)) {
5620 return false;
5621 }
5622 if (!validateOffset(Inst, Operands)) {
5623 return false;
5624 }
5625 if (!validateMAIAccWrite(Inst, Operands)) {
5626 return false;
5627 }
5628 if (!validateMAISrc2(Inst, Operands)) {
5629 return false;
5630 }
5631 if (!validateMFMA(Inst, Operands)) {
5632 return false;
5633 }
5634 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5635 return false;
5636 }
5637
5638 if (!validateAGPRLdSt(Inst)) {
5639 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5640 ? "invalid register class: data and dst should be all VGPR or AGPR"
5641 : "invalid register class: agpr loads and stores not supported on this GPU"
5642 );
5643 return false;
5644 }
5645 if (!validateVGPRAlign(Inst)) {
5646 Error(IDLoc,
5647 "invalid register class: vgpr tuples must be 64 bit aligned");
5648 return false;
5649 }
5650 if (!validateDS(Inst, Operands)) {
5651 return false;
5652 }
5653
5654 if (!validateBLGP(Inst, Operands)) {
5655 return false;
5656 }
5657
5658 if (!validateDivScale(Inst)) {
5659 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5660 return false;
5661 }
5662 if (!validateWaitCnt(Inst, Operands)) {
5663 return false;
5664 }
5665 if (!validateTFE(Inst, Operands)) {
5666 return false;
5667 }
5668 if (!validateWMMA(Inst, Operands)) {
5669 return false;
5670 }
5671
5672 return true;
5673}
5674
5676 const FeatureBitset &FBS,
5677 unsigned VariantID = 0);
5678
5679static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5680 const FeatureBitset &AvailableFeatures,
5681 unsigned VariantID);
5682
5683bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5684 const FeatureBitset &FBS) {
5685 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5686}
5687
5688bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5689 const FeatureBitset &FBS,
5690 ArrayRef<unsigned> Variants) {
5691 for (auto Variant : Variants) {
5692 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5693 return true;
5694 }
5695
5696 return false;
5697}
5698
5699bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5700 SMLoc IDLoc) {
5701 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5702
5703 // Check if requested instruction variant is supported.
5704 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5705 return false;
5706
5707 // This instruction is not supported.
5708 // Clear any other pending errors because they are no longer relevant.
5709 getParser().clearPendingErrors();
5710
5711 // Requested instruction variant is not supported.
5712 // Check if any other variants are supported.
5713 StringRef VariantName = getMatchedVariantName();
5714 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5715 return Error(IDLoc,
5716 Twine(VariantName,
5717 " variant of this instruction is not supported"));
5718 }
5719
5720 // Check if this instruction may be used with a different wavesize.
5721 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5722 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5723 // FIXME: Use getAvailableFeatures, and do not manually recompute
5724 FeatureBitset FeaturesWS32 = getFeatureBits();
5725 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5726 .flip(AMDGPU::FeatureWavefrontSize32);
5727 FeatureBitset AvailableFeaturesWS32 =
5728 ComputeAvailableFeatures(FeaturesWS32);
5729
5730 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5731 return Error(IDLoc, "instruction requires wavesize=32");
5732 }
5733
5734 // Finally check if this instruction is supported on any other GPU.
5735 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5736 return Error(IDLoc, "instruction not supported on this GPU");
5737 }
5738
5739 // Instruction not supported on any GPU. Probably a typo.
5740 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5741 return Error(IDLoc, "invalid instruction" + Suggestion);
5742}
5743
5744static bool isInvalidVOPDY(const OperandVector &Operands,
5745 uint64_t InvalidOprIdx) {
5746 assert(InvalidOprIdx < Operands.size());
5747 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5748 if (Op.isToken() && InvalidOprIdx > 1) {
5749 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5750 return PrevOp.isToken() && PrevOp.getToken() == "::";
5751 }
5752 return false;
5753}
5754
5755bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5756 OperandVector &Operands,
5757 MCStreamer &Out,
5758 uint64_t &ErrorInfo,
5759 bool MatchingInlineAsm) {
5760 MCInst Inst;
5761 Inst.setLoc(IDLoc);
5762 unsigned Result = Match_Success;
5763 for (auto Variant : getMatchedVariants()) {
5764 uint64_t EI;
5765 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5766 Variant);
5767 // We order match statuses from least to most specific. We use most specific
5768 // status as resulting
5769 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5770 if (R == Match_Success || R == Match_MissingFeature ||
5771 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5772 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5773 Result != Match_MissingFeature)) {
5774 Result = R;
5775 ErrorInfo = EI;
5776 }
5777 if (R == Match_Success)
5778 break;
5779 }
5780
5781 if (Result == Match_Success) {
5782 if (!validateInstruction(Inst, IDLoc, Operands)) {
5783 return true;
5784 }
5785 Out.emitInstruction(Inst, getSTI());
5786 return false;
5787 }
5788
5789 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5790 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5791 return true;
5792 }
5793
5794 switch (Result) {
5795 default: break;
5796 case Match_MissingFeature:
5797 // It has been verified that the specified instruction
5798 // mnemonic is valid. A match was found but it requires
5799 // features which are not supported on this GPU.
5800 return Error(IDLoc, "operands are not valid for this GPU or mode");
5801
5802 case Match_InvalidOperand: {
5803 SMLoc ErrorLoc = IDLoc;
5804 if (ErrorInfo != ~0ULL) {
5805 if (ErrorInfo >= Operands.size()) {
5806 return Error(IDLoc, "too few operands for instruction");
5807 }
5808 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5809 if (ErrorLoc == SMLoc())
5810 ErrorLoc = IDLoc;
5811
5812 if (isInvalidVOPDY(Operands, ErrorInfo))
5813 return Error(ErrorLoc, "invalid VOPDY instruction");
5814 }
5815 return Error(ErrorLoc, "invalid operand for instruction");
5816 }
5817
5818 case Match_MnemonicFail:
5819 llvm_unreachable("Invalid instructions should have been handled already");
5820 }
5821 llvm_unreachable("Implement any new match types added!");
5822}
5823
5824bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5825 int64_t Tmp = -1;
5826 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5827 return true;
5828 }
5829 if (getParser().parseAbsoluteExpression(Tmp)) {
5830 return true;
5831 }
5832 Ret = static_cast<uint32_t>(Tmp);
5833 return false;
5834}
5835
5836bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5837 if (!getSTI().getTargetTriple().isAMDGCN())
5838 return TokError("directive only supported for amdgcn architecture");
5839
5840 std::string TargetIDDirective;
5841 SMLoc TargetStart = getTok().getLoc();
5842 if (getParser().parseEscapedString(TargetIDDirective))
5843 return true;
5844
5845 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5846 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5847 return getParser().Error(TargetRange.Start,
5848 (Twine(".amdgcn_target directive's target id ") +
5849 Twine(TargetIDDirective) +
5850 Twine(" does not match the specified target id ") +
5851 Twine(getTargetStreamer().getTargetID()->toString())).str());
5852
5853 return false;
5854}
5855
5856bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5857 return Error(Range.Start, "value out of range", Range);
5858}
5859
5860bool AMDGPUAsmParser::calculateGPRBlocks(
5861 const FeatureBitset &Features, const MCExpr *VCCUsed,
5862 const MCExpr *FlatScrUsed, bool XNACKUsed,
5863 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5864 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5865 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5866 // TODO(scott.linder): These calculations are duplicated from
5867 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5868 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5869 MCContext &Ctx = getContext();
5870
5871 const MCExpr *NumSGPRs = NextFreeSGPR;
5872 int64_t EvaluatedSGPRs;
5873
5874 if (Version.Major >= 10)
5876 else {
5877 unsigned MaxAddressableNumSGPRs =
5879
5880 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5881 !Features.test(FeatureSGPRInitBug) &&
5882 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5883 return OutOfRangeError(SGPRRange);
5884
5885 const MCExpr *ExtraSGPRs =
5886 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5887 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5888
5889 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5890 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5891 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5892 return OutOfRangeError(SGPRRange);
5893
5894 if (Features.test(FeatureSGPRInitBug))
5895 NumSGPRs =
5897 }
5898
5899 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5900 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5901 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5902 unsigned Granule) -> const MCExpr * {
5903 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5904 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5905 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5906 const MCExpr *AlignToGPR =
5907 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5908 const MCExpr *DivGPR =
5909 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5910 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5911 return SubGPR;
5912 };
5913
5914 VGPRBlocks = GetNumGPRBlocks(
5915 NextFreeVGPR,
5916 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5917 SGPRBlocks =
5918 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5919
5920 return false;
5921}
5922
5923bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5924 if (!getSTI().getTargetTriple().isAMDGCN())
5925 return TokError("directive only supported for amdgcn architecture");
5926
5927 if (!isHsaAbi(getSTI()))
5928 return TokError("directive only supported for amdhsa OS");
5929
5930 StringRef KernelName;
5931 if (getParser().parseIdentifier(KernelName))
5932 return true;
5933
5934 AMDGPU::MCKernelDescriptor KD =
5936 &getSTI(), getContext());
5937
5938 StringSet<> Seen;
5939
5940 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5941
5942 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5943 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5944
5945 SMRange VGPRRange;
5946 const MCExpr *NextFreeVGPR = ZeroExpr;
5947 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5948 const MCExpr *NamedBarCnt = ZeroExpr;
5949 uint64_t SharedVGPRCount = 0;
5950 uint64_t PreloadLength = 0;
5951 uint64_t PreloadOffset = 0;
5952 SMRange SGPRRange;
5953 const MCExpr *NextFreeSGPR = ZeroExpr;
5954
5955 // Count the number of user SGPRs implied from the enabled feature bits.
5956 unsigned ImpliedUserSGPRCount = 0;
5957
5958 // Track if the asm explicitly contains the directive for the user SGPR
5959 // count.
5960 std::optional<unsigned> ExplicitUserSGPRCount;
5961 const MCExpr *ReserveVCC = OneExpr;
5962 const MCExpr *ReserveFlatScr = OneExpr;
5963 std::optional<bool> EnableWavefrontSize32;
5964
5965 while (true) {
5966 while (trySkipToken(AsmToken::EndOfStatement));
5967
5968 StringRef ID;
5969 SMRange IDRange = getTok().getLocRange();
5970 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5971 return true;
5972
5973 if (ID == ".end_amdhsa_kernel")
5974 break;
5975
5976 if (!Seen.insert(ID).second)
5977 return TokError(".amdhsa_ directives cannot be repeated");
5978
5979 SMLoc ValStart = getLoc();
5980 const MCExpr *ExprVal;
5981 if (getParser().parseExpression(ExprVal))
5982 return true;
5983 SMLoc ValEnd = getLoc();
5984 SMRange ValRange = SMRange(ValStart, ValEnd);
5985
5986 int64_t IVal = 0;
5987 uint64_t Val = IVal;
5988 bool EvaluatableExpr;
5989 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5990 if (IVal < 0)
5991 return OutOfRangeError(ValRange);
5992 Val = IVal;
5993 }
5994
5995#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5996 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5997 return OutOfRangeError(RANGE); \
5998 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5999 getContext());
6000
6001// Some fields use the parsed value immediately which requires the expression to
6002// be solvable.
6003#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6004 if (!(RESOLVED)) \
6005 return Error(IDRange.Start, "directive should have resolvable expression", \
6006 IDRange);
6007
6008 if (ID == ".amdhsa_group_segment_fixed_size") {
6010 CHAR_BIT>(Val))
6011 return OutOfRangeError(ValRange);
6012 KD.group_segment_fixed_size = ExprVal;
6013 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6015 CHAR_BIT>(Val))
6016 return OutOfRangeError(ValRange);
6017 KD.private_segment_fixed_size = ExprVal;
6018 } else if (ID == ".amdhsa_kernarg_size") {
6019 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6020 return OutOfRangeError(ValRange);
6021 KD.kernarg_size = ExprVal;
6022 } else if (ID == ".amdhsa_user_sgpr_count") {
6023 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6024 ExplicitUserSGPRCount = Val;
6025 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6026 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6028 return Error(IDRange.Start,
6029 "directive is not supported with architected flat scratch",
6030 IDRange);
6032 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6033 ExprVal, ValRange);
6034 if (Val)
6035 ImpliedUserSGPRCount += 4;
6036 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6037 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6038 if (!hasKernargPreload())
6039 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6040
6041 if (Val > getMaxNumUserSGPRs())
6042 return OutOfRangeError(ValRange);
6043 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6044 ValRange);
6045 if (Val) {
6046 ImpliedUserSGPRCount += Val;
6047 PreloadLength = Val;
6048 }
6049 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6050 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6051 if (!hasKernargPreload())
6052 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6053
6054 if (Val >= 1024)
6055 return OutOfRangeError(ValRange);
6056 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6057 ValRange);
6058 if (Val)
6059 PreloadOffset = Val;
6060 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6061 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6063 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6064 ValRange);
6065 if (Val)
6066 ImpliedUserSGPRCount += 2;
6067 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6068 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6071 ValRange);
6072 if (Val)
6073 ImpliedUserSGPRCount += 2;
6074 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6075 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6077 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6078 ExprVal, ValRange);
6079 if (Val)
6080 ImpliedUserSGPRCount += 2;
6081 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6082 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6084 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6085 ValRange);
6086 if (Val)
6087 ImpliedUserSGPRCount += 2;
6088 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6090 return Error(IDRange.Start,
6091 "directive is not supported with architected flat scratch",
6092 IDRange);
6093 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6095 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6096 ExprVal, ValRange);
6097 if (Val)
6098 ImpliedUserSGPRCount += 2;
6099 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6100 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6102 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6103 ExprVal, ValRange);
6104 if (Val)
6105 ImpliedUserSGPRCount += 1;
6106 } else if (ID == ".amdhsa_wavefront_size32") {
6107 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6108 if (IVersion.Major < 10)
6109 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6110 EnableWavefrontSize32 = Val;
6112 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6113 ValRange);
6114 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6116 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6117 ValRange);
6118 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6120 return Error(IDRange.Start,
6121 "directive is not supported with architected flat scratch",
6122 IDRange);
6124 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6125 ValRange);
6126 } else if (ID == ".amdhsa_enable_private_segment") {
6128 return Error(
6129 IDRange.Start,
6130 "directive is not supported without architected flat scratch",
6131 IDRange);
6133 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6134 ValRange);
6135 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6137 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6138 ValRange);
6139 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6141 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6142 ValRange);
6143 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6145 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6146 ValRange);
6147 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6149 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6150 ValRange);
6151 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6153 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6154 ValRange);
6155 } else if (ID == ".amdhsa_next_free_vgpr") {
6156 VGPRRange = ValRange;
6157 NextFreeVGPR = ExprVal;
6158 } else if (ID == ".amdhsa_next_free_sgpr") {
6159 SGPRRange = ValRange;
6160 NextFreeSGPR = ExprVal;
6161 } else if (ID == ".amdhsa_accum_offset") {
6162 if (!isGFX90A())
6163 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6164 AccumOffset = ExprVal;
6165 } else if (ID == ".amdhsa_named_barrier_count") {
6166 if (!isGFX1250())
6167 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6168 NamedBarCnt = ExprVal;
6169 } else if (ID == ".amdhsa_reserve_vcc") {
6170 if (EvaluatableExpr && !isUInt<1>(Val))
6171 return OutOfRangeError(ValRange);
6172 ReserveVCC = ExprVal;
6173 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6174 if (IVersion.Major < 7)
6175 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6177 return Error(IDRange.Start,
6178 "directive is not supported with architected flat scratch",
6179 IDRange);
6180 if (EvaluatableExpr && !isUInt<1>(Val))
6181 return OutOfRangeError(ValRange);
6182 ReserveFlatScr = ExprVal;
6183 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6184 if (IVersion.Major < 8)
6185 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6186 if (!isUInt<1>(Val))
6187 return OutOfRangeError(ValRange);
6188 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6189 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6190 IDRange);
6191 } else if (ID == ".amdhsa_float_round_mode_32") {
6193 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6194 ValRange);
6195 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6197 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6198 ValRange);
6199 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6201 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6202 ValRange);
6203 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6205 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6206 ValRange);
6207 } else if (ID == ".amdhsa_dx10_clamp") {
6208 if (IVersion.Major >= 12)
6209 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6211 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6212 ValRange);
6213 } else if (ID == ".amdhsa_ieee_mode") {
6214 if (IVersion.Major >= 12)
6215 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6217 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6218 ValRange);
6219 } else if (ID == ".amdhsa_fp16_overflow") {
6220 if (IVersion.Major < 9)
6221 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6223 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6224 ValRange);
6225 } else if (ID == ".amdhsa_tg_split") {
6226 if (!isGFX90A())
6227 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6228 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6229 ExprVal, ValRange);
6230 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6231 if (!supportsWGP(getSTI()))
6232 return Error(IDRange.Start,
6233 "directive unsupported on " + getSTI().getCPU(), IDRange);
6235 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6236 ValRange);
6237 } else if (ID == ".amdhsa_memory_ordered") {
6238 if (IVersion.Major < 10)
6239 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6241 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6242 ValRange);
6243 } else if (ID == ".amdhsa_forward_progress") {
6244 if (IVersion.Major < 10)
6245 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6247 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6248 ValRange);
6249 } else if (ID == ".amdhsa_shared_vgpr_count") {
6250 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6251 if (IVersion.Major < 10 || IVersion.Major >= 12)
6252 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6253 IDRange);
6254 SharedVGPRCount = Val;
6256 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6257 ValRange);
6258 } else if (ID == ".amdhsa_inst_pref_size") {
6259 if (IVersion.Major < 11)
6260 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6261 if (IVersion.Major == 11) {
6263 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6264 ValRange);
6265 } else {
6267 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6268 ValRange);
6269 }
6270 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6273 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6274 ExprVal, ValRange);
6275 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6277 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6278 ExprVal, ValRange);
6279 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6282 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6283 ExprVal, ValRange);
6284 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6286 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6287 ExprVal, ValRange);
6288 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6290 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6291 ExprVal, ValRange);
6292 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6294 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6295 ExprVal, ValRange);
6296 } else if (ID == ".amdhsa_exception_int_div_zero") {
6298 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6299 ExprVal, ValRange);
6300 } else if (ID == ".amdhsa_round_robin_scheduling") {
6301 if (IVersion.Major < 12)
6302 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6304 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6305 ValRange);
6306 } else {
6307 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6308 }
6309
6310#undef PARSE_BITS_ENTRY
6311 }
6312
6313 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6314 return TokError(".amdhsa_next_free_vgpr directive is required");
6315
6316 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6317 return TokError(".amdhsa_next_free_sgpr directive is required");
6318
6319 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6320
6321 // Consider the case where the total number of UserSGPRs with trailing
6322 // allocated preload SGPRs, is greater than the number of explicitly
6323 // referenced SGPRs.
6324 if (PreloadLength) {
6325 MCContext &Ctx = getContext();
6326 NextFreeSGPR = AMDGPUMCExpr::createMax(
6327 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6328 }
6329
6330 const MCExpr *VGPRBlocks;
6331 const MCExpr *SGPRBlocks;
6332 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6333 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6334 EnableWavefrontSize32, NextFreeVGPR,
6335 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6336 SGPRBlocks))
6337 return true;
6338
6339 int64_t EvaluatedVGPRBlocks;
6340 bool VGPRBlocksEvaluatable =
6341 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6342 if (VGPRBlocksEvaluatable &&
6344 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6345 return OutOfRangeError(VGPRRange);
6346 }
6348 KD.compute_pgm_rsrc1, VGPRBlocks,
6349 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6350 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6351
6352 int64_t EvaluatedSGPRBlocks;
6353 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6355 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6356 return OutOfRangeError(SGPRRange);
6358 KD.compute_pgm_rsrc1, SGPRBlocks,
6359 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6360 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6361
6362 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6363 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6364 "enabled user SGPRs");
6365
6366 if (isGFX1250()) {
6368 return TokError("too many user SGPRs enabled");
6371 MCConstantExpr::create(UserSGPRCount, getContext()),
6372 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6373 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6374 } else {
6376 UserSGPRCount))
6377 return TokError("too many user SGPRs enabled");
6380 MCConstantExpr::create(UserSGPRCount, getContext()),
6381 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6382 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6383 }
6384
6385 int64_t IVal = 0;
6386 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6387 return TokError("Kernarg size should be resolvable");
6388 uint64_t kernarg_size = IVal;
6389 if (PreloadLength && kernarg_size &&
6390 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6391 return TokError("Kernarg preload length + offset is larger than the "
6392 "kernarg segment size");
6393
6394 if (isGFX90A()) {
6395 if (!Seen.contains(".amdhsa_accum_offset"))
6396 return TokError(".amdhsa_accum_offset directive is required");
6397 int64_t EvaluatedAccum;
6398 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6399 uint64_t UEvaluatedAccum = EvaluatedAccum;
6400 if (AccumEvaluatable &&
6401 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6402 return TokError("accum_offset should be in range [4..256] in "
6403 "increments of 4");
6404
6405 int64_t EvaluatedNumVGPR;
6406 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6407 AccumEvaluatable &&
6408 UEvaluatedAccum >
6409 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6410 return TokError("accum_offset exceeds total VGPR allocation");
6411 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6413 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6416 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6417 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6418 getContext());
6419 }
6420
6421 if (isGFX1250())
6423 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6424 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6425 getContext());
6426
6427 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6428 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6429 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6430 return TokError("shared_vgpr_count directive not valid on "
6431 "wavefront size 32");
6432 }
6433
6434 if (VGPRBlocksEvaluatable &&
6435 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6436 63)) {
6437 return TokError("shared_vgpr_count*2 + "
6438 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6439 "exceed 63\n");
6440 }
6441 }
6442
6443 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6444 NextFreeVGPR, NextFreeSGPR,
6445 ReserveVCC, ReserveFlatScr);
6446 return false;
6447}
6448
6449bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6450 uint32_t Version;
6451 if (ParseAsAbsoluteExpression(Version))
6452 return true;
6453
6454 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6455 return false;
6456}
6457
6458bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6459 AMDGPUMCKernelCodeT &C) {
6460 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6461 // assembly for backwards compatibility.
6462 if (ID == "max_scratch_backing_memory_byte_size") {
6463 Parser.eatToEndOfStatement();
6464 return false;
6465 }
6466
6467 SmallString<40> ErrStr;
6468 raw_svector_ostream Err(ErrStr);
6469 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6470 return TokError(Err.str());
6471 }
6472 Lex();
6473
6474 if (ID == "enable_wavefront_size32") {
6475 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6476 if (!isGFX10Plus())
6477 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6478 if (!isWave32())
6479 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6480 } else {
6481 if (!isWave64())
6482 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6483 }
6484 }
6485
6486 if (ID == "wavefront_size") {
6487 if (C.wavefront_size == 5) {
6488 if (!isGFX10Plus())
6489 return TokError("wavefront_size=5 is only allowed on GFX10+");
6490 if (!isWave32())
6491 return TokError("wavefront_size=5 requires +WavefrontSize32");
6492 } else if (C.wavefront_size == 6) {
6493 if (!isWave64())
6494 return TokError("wavefront_size=6 requires +WavefrontSize64");
6495 }
6496 }
6497
6498 return false;
6499}
6500
6501bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6502 AMDGPUMCKernelCodeT KernelCode;
6503 KernelCode.initDefault(&getSTI(), getContext());
6504
6505 while (true) {
6506 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6507 // will set the current token to EndOfStatement.
6508 while(trySkipToken(AsmToken::EndOfStatement));
6509
6510 StringRef ID;
6511 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6512 return true;
6513
6514 if (ID == ".end_amd_kernel_code_t")
6515 break;
6516
6517 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6518 return true;
6519 }
6520
6521 KernelCode.validate(&getSTI(), getContext());
6522 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6523
6524 return false;
6525}
6526
6527bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6528 StringRef KernelName;
6529 if (!parseId(KernelName, "expected symbol name"))
6530 return true;
6531
6532 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6534
6535 KernelScope.initialize(getContext());
6536 return false;
6537}
6538
6539bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6540 if (!getSTI().getTargetTriple().isAMDGCN()) {
6541 return Error(getLoc(),
6542 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6543 "architectures");
6544 }
6545
6546 auto TargetIDDirective = getLexer().getTok().getStringContents();
6547 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6548 return Error(getParser().getTok().getLoc(), "target id must match options");
6549
6550 getTargetStreamer().EmitISAVersion();
6551 Lex();
6552
6553 return false;
6554}
6555
6556bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6557 assert(isHsaAbi(getSTI()));
6558
6559 std::string HSAMetadataString;
6560 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6561 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6562 return true;
6563
6564 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6565 return Error(getLoc(), "invalid HSA metadata");
6566
6567 return false;
6568}
6569
6570/// Common code to parse out a block of text (typically YAML) between start and
6571/// end directives.
6572bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6573 const char *AssemblerDirectiveEnd,
6574 std::string &CollectString) {
6575
6576 raw_string_ostream CollectStream(CollectString);
6577
6578 getLexer().setSkipSpace(false);
6579
6580 bool FoundEnd = false;
6581 while (!isToken(AsmToken::Eof)) {
6582 while (isToken(AsmToken::Space)) {
6583 CollectStream << getTokenStr();
6584 Lex();
6585 }
6586
6587 if (trySkipId(AssemblerDirectiveEnd)) {
6588 FoundEnd = true;
6589 break;
6590 }
6591
6592 CollectStream << Parser.parseStringToEndOfStatement()
6593 << getContext().getAsmInfo()->getSeparatorString();
6594
6595 Parser.eatToEndOfStatement();
6596 }
6597
6598 getLexer().setSkipSpace(true);
6599
6600 if (isToken(AsmToken::Eof) && !FoundEnd) {
6601 return TokError(Twine("expected directive ") +
6602 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6603 }
6604
6605 return false;
6606}
6607
6608/// Parse the assembler directive for new MsgPack-format PAL metadata.
6609bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6610 std::string String;
6611 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6613 return true;
6614
6615 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6616 if (!PALMetadata->setFromString(String))
6617 return Error(getLoc(), "invalid PAL metadata");
6618 return false;
6619}
6620
6621/// Parse the assembler directive for old linear-format PAL metadata.
6622bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6623 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6624 return Error(getLoc(),
6625 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6626 "not available on non-amdpal OSes")).str());
6627 }
6628
6629 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6630 PALMetadata->setLegacy();
6631 for (;;) {
6632 uint32_t Key, Value;
6633 if (ParseAsAbsoluteExpression(Key)) {
6634 return TokError(Twine("invalid value in ") +
6636 }
6637 if (!trySkipToken(AsmToken::Comma)) {
6638 return TokError(Twine("expected an even number of values in ") +
6640 }
6641 if (ParseAsAbsoluteExpression(Value)) {
6642 return TokError(Twine("invalid value in ") +
6644 }
6645 PALMetadata->setRegister(Key, Value);
6646 if (!trySkipToken(AsmToken::Comma))
6647 break;
6648 }
6649 return false;
6650}
6651
6652/// ParseDirectiveAMDGPULDS
6653/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6654bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6655 if (getParser().checkForValidSection())
6656 return true;
6657
6658 StringRef Name;
6659 SMLoc NameLoc = getLoc();
6660 if (getParser().parseIdentifier(Name))
6661 return TokError("expected identifier in directive");
6662
6663 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6664 if (getParser().parseComma())
6665 return true;
6666
6667 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6668
6669 int64_t Size;
6670 SMLoc SizeLoc = getLoc();
6671 if (getParser().parseAbsoluteExpression(Size))
6672 return true;
6673 if (Size < 0)
6674 return Error(SizeLoc, "size must be non-negative");
6675 if (Size > LocalMemorySize)
6676 return Error(SizeLoc, "size is too large");
6677
6678 int64_t Alignment = 4;
6679 if (trySkipToken(AsmToken::Comma)) {
6680 SMLoc AlignLoc = getLoc();
6681 if (getParser().parseAbsoluteExpression(Alignment))
6682 return true;
6683 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6684 return Error(AlignLoc, "alignment must be a power of two");
6685
6686 // Alignment larger than the size of LDS is possible in theory, as long
6687 // as the linker manages to place to symbol at address 0, but we do want
6688 // to make sure the alignment fits nicely into a 32-bit integer.
6689 if (Alignment >= 1u << 31)
6690 return Error(AlignLoc, "alignment is too large");
6691 }
6692
6693 if (parseEOL())
6694 return true;
6695
6696 Symbol->redefineIfPossible();
6697 if (!Symbol->isUndefined())
6698 return Error(NameLoc, "invalid symbol redefinition");
6699
6700 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6701 return false;
6702}
6703
6704bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6705 StringRef IDVal = DirectiveID.getString();
6706
6707 if (isHsaAbi(getSTI())) {
6708 if (IDVal == ".amdhsa_kernel")
6709 return ParseDirectiveAMDHSAKernel();
6710
6711 if (IDVal == ".amdhsa_code_object_version")
6712 return ParseDirectiveAMDHSACodeObjectVersion();
6713
6714 // TODO: Restructure/combine with PAL metadata directive.
6716 return ParseDirectiveHSAMetadata();
6717 } else {
6718 if (IDVal == ".amd_kernel_code_t")
6719 return ParseDirectiveAMDKernelCodeT();
6720
6721 if (IDVal == ".amdgpu_hsa_kernel")
6722 return ParseDirectiveAMDGPUHsaKernel();
6723
6724 if (IDVal == ".amd_amdgpu_isa")
6725 return ParseDirectiveISAVersion();
6726
6728 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6729 Twine(" directive is "
6730 "not available on non-amdhsa OSes"))
6731 .str());
6732 }
6733 }
6734
6735 if (IDVal == ".amdgcn_target")
6736 return ParseDirectiveAMDGCNTarget();
6737
6738 if (IDVal == ".amdgpu_lds")
6739 return ParseDirectiveAMDGPULDS();
6740
6741 if (IDVal == PALMD::AssemblerDirectiveBegin)
6742 return ParseDirectivePALMetadataBegin();
6743
6744 if (IDVal == PALMD::AssemblerDirective)
6745 return ParseDirectivePALMetadata();
6746
6747 return true;
6748}
6749
6750bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6751 MCRegister Reg) {
6752 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6753 return isGFX9Plus();
6754
6755 // GFX10+ has 2 more SGPRs 104 and 105.
6756 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6757 return hasSGPR104_SGPR105();
6758
6759 switch (Reg.id()) {
6760 case SRC_SHARED_BASE_LO:
6761 case SRC_SHARED_BASE:
6762 case SRC_SHARED_LIMIT_LO:
6763 case SRC_SHARED_LIMIT:
6764 case SRC_PRIVATE_BASE_LO:
6765 case SRC_PRIVATE_BASE:
6766 case SRC_PRIVATE_LIMIT_LO:
6767 case SRC_PRIVATE_LIMIT:
6768 return isGFX9Plus();
6769 case SRC_FLAT_SCRATCH_BASE_LO:
6770 case SRC_FLAT_SCRATCH_BASE_HI:
6771 return hasGloballyAddressableScratch();
6772 case SRC_POPS_EXITING_WAVE_ID:
6773 return isGFX9Plus() && !isGFX11Plus();
6774 case TBA:
6775 case TBA_LO:
6776 case TBA_HI:
6777 case TMA:
6778 case TMA_LO:
6779 case TMA_HI:
6780 return !isGFX9Plus();
6781 case XNACK_MASK:
6782 case XNACK_MASK_LO:
6783 case XNACK_MASK_HI:
6784 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6785 case SGPR_NULL:
6786 return isGFX10Plus();
6787 case SRC_EXECZ:
6788 case SRC_VCCZ:
6789 return !isGFX11Plus();
6790 default:
6791 break;
6792 }
6793
6794 if (isCI())
6795 return true;
6796
6797 if (isSI() || isGFX10Plus()) {
6798 // No flat_scr on SI.
6799 // On GFX10Plus flat scratch is not a valid register operand and can only be
6800 // accessed with s_setreg/s_getreg.
6801 switch (Reg.id()) {
6802 case FLAT_SCR:
6803 case FLAT_SCR_LO:
6804 case FLAT_SCR_HI:
6805 return false;
6806 default:
6807 return true;
6808 }
6809 }
6810
6811 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6812 // SI/CI have.
6813 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6814 return hasSGPR102_SGPR103();
6815
6816 return true;
6817}
6818
6819ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6820 StringRef Mnemonic,
6821 OperandMode Mode) {
6822 ParseStatus Res = parseVOPD(Operands);
6823 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6824 return Res;
6825
6826 // Try to parse with a custom parser
6827 Res = MatchOperandParserImpl(Operands, Mnemonic);
6828
6829 // If we successfully parsed the operand or if there as an error parsing,
6830 // we are done.
6831 //
6832 // If we are parsing after we reach EndOfStatement then this means we
6833 // are appending default values to the Operands list. This is only done
6834 // by custom parser, so we shouldn't continue on to the generic parsing.
6835 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6836 return Res;
6837
6838 SMLoc RBraceLoc;
6839 SMLoc LBraceLoc = getLoc();
6840 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6841 unsigned Prefix = Operands.size();
6842
6843 for (;;) {
6844 auto Loc = getLoc();
6845 Res = parseReg(Operands);
6846 if (Res.isNoMatch())
6847 Error(Loc, "expected a register");
6848 if (!Res.isSuccess())
6849 return ParseStatus::Failure;
6850
6851 RBraceLoc = getLoc();
6852 if (trySkipToken(AsmToken::RBrac))
6853 break;
6854
6855 if (!skipToken(AsmToken::Comma,
6856 "expected a comma or a closing square bracket"))
6857 return ParseStatus::Failure;
6858 }
6859
6860 if (Operands.size() - Prefix > 1) {
6861 Operands.insert(Operands.begin() + Prefix,
6862 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6863 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6864 }
6865
6866 return ParseStatus::Success;
6867 }
6868
6869 return parseRegOrImm(Operands);
6870}
6871
6872StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6873 // Clear any forced encodings from the previous instruction.
6874 setForcedEncodingSize(0);
6875 setForcedDPP(false);
6876 setForcedSDWA(false);
6877
6878 if (Name.consume_back("_e64_dpp")) {
6879 setForcedDPP(true);
6880 setForcedEncodingSize(64);
6881 return Name;
6882 }
6883 if (Name.consume_back("_e64")) {
6884 setForcedEncodingSize(64);
6885 return Name;
6886 }
6887 if (Name.consume_back("_e32")) {
6888 setForcedEncodingSize(32);
6889 return Name;
6890 }
6891 if (Name.consume_back("_dpp")) {
6892 setForcedDPP(true);
6893 return Name;
6894 }
6895 if (Name.consume_back("_sdwa")) {
6896 setForcedSDWA(true);
6897 return Name;
6898 }
6899 return Name;
6900}
6901
6902static void applyMnemonicAliases(StringRef &Mnemonic,
6903 const FeatureBitset &Features,
6904 unsigned VariantID);
6905
6906bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6907 StringRef Name, SMLoc NameLoc,
6908 OperandVector &Operands) {
6909 // Add the instruction mnemonic
6910 Name = parseMnemonicSuffix(Name);
6911
6912 // If the target architecture uses MnemonicAlias, call it here to parse
6913 // operands correctly.
6914 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6915
6916 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6917
6918 bool IsMIMG = Name.starts_with("image_");
6919
6920 while (!trySkipToken(AsmToken::EndOfStatement)) {
6921 OperandMode Mode = OperandMode_Default;
6922 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6923 Mode = OperandMode_NSA;
6924 ParseStatus Res = parseOperand(Operands, Name, Mode);
6925
6926 if (!Res.isSuccess()) {
6927 checkUnsupportedInstruction(Name, NameLoc);
6928 if (!Parser.hasPendingError()) {
6929 // FIXME: use real operand location rather than the current location.
6930 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6931 : "not a valid operand.";
6932 Error(getLoc(), Msg);
6933 }
6934 while (!trySkipToken(AsmToken::EndOfStatement)) {
6935 lex();
6936 }
6937 return true;
6938 }
6939
6940 // Eat the comma or space if there is one.
6941 trySkipToken(AsmToken::Comma);
6942 }
6943
6944 return false;
6945}
6946
6947//===----------------------------------------------------------------------===//
6948// Utility functions
6949//===----------------------------------------------------------------------===//
6950
6951ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6952 OperandVector &Operands) {
6953 SMLoc S = getLoc();
6954 if (!trySkipId(Name))
6955 return ParseStatus::NoMatch;
6956
6957 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6958 return ParseStatus::Success;
6959}
6960
6961ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6962 int64_t &IntVal) {
6963
6964 if (!trySkipId(Prefix, AsmToken::Colon))
6965 return ParseStatus::NoMatch;
6966
6968}
6969
6970ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6971 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6972 std::function<bool(int64_t &)> ConvertResult) {
6973 SMLoc S = getLoc();
6974 int64_t Value = 0;
6975
6976 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6977 if (!Res.isSuccess())
6978 return Res;
6979
6980 if (ConvertResult && !ConvertResult(Value)) {
6981 Error(S, "invalid " + StringRef(Prefix) + " value.");
6982 }
6983
6984 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6985 return ParseStatus::Success;
6986}
6987
6988ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6989 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6990 bool (*ConvertResult)(int64_t &)) {
6991 SMLoc S = getLoc();
6992 if (!trySkipId(Prefix, AsmToken::Colon))
6993 return ParseStatus::NoMatch;
6994
6995 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6996 return ParseStatus::Failure;
6997
6998 unsigned Val = 0;
6999 const unsigned MaxSize = 4;
7000
7001 // FIXME: How to verify the number of elements matches the number of src
7002 // operands?
7003 for (int I = 0; ; ++I) {
7004 int64_t Op;
7005 SMLoc Loc = getLoc();
7006 if (!parseExpr(Op))
7007 return ParseStatus::Failure;
7008
7009 if (Op != 0 && Op != 1)
7010 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7011
7012 Val |= (Op << I);
7013
7014 if (trySkipToken(AsmToken::RBrac))
7015 break;
7016
7017 if (I + 1 == MaxSize)
7018 return Error(getLoc(), "expected a closing square bracket");
7019
7020 if (!skipToken(AsmToken::Comma, "expected a comma"))
7021 return ParseStatus::Failure;
7022 }
7023
7024 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7025 return ParseStatus::Success;
7026}
7027
7028ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7029 OperandVector &Operands,
7030 AMDGPUOperand::ImmTy ImmTy) {
7031 int64_t Bit;
7032 SMLoc S = getLoc();
7033
7034 if (trySkipId(Name)) {
7035 Bit = 1;
7036 } else if (trySkipId("no", Name)) {
7037 Bit = 0;
7038 } else {
7039 return ParseStatus::NoMatch;
7040 }
7041
7042 if (Name == "r128" && !hasMIMG_R128())
7043 return Error(S, "r128 modifier is not supported on this GPU");
7044 if (Name == "a16" && !hasA16())
7045 return Error(S, "a16 modifier is not supported on this GPU");
7046
7047 if (Bit == 0 && Name == "gds") {
7048 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7049 if (Mnemo.starts_with("ds_gws"))
7050 return Error(S, "nogds is not allowed");
7051 }
7052
7053 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7054 ImmTy = AMDGPUOperand::ImmTyR128A16;
7055
7056 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7057 return ParseStatus::Success;
7058}
7059
7060unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7061 bool &Disabling) const {
7062 Disabling = Id.consume_front("no");
7063
7064 if (isGFX940() && !Mnemo.starts_with("s_")) {
7065 return StringSwitch<unsigned>(Id)
7066 .Case("nt", AMDGPU::CPol::NT)
7067 .Case("sc0", AMDGPU::CPol::SC0)
7068 .Case("sc1", AMDGPU::CPol::SC1)
7069 .Default(0);
7070 }
7071
7072 return StringSwitch<unsigned>(Id)
7073 .Case("dlc", AMDGPU::CPol::DLC)
7074 .Case("glc", AMDGPU::CPol::GLC)
7075 .Case("scc", AMDGPU::CPol::SCC)
7076 .Case("slc", AMDGPU::CPol::SLC)
7077 .Default(0);
7078}
7079
7080ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7081 if (isGFX12Plus()) {
7082 SMLoc StringLoc = getLoc();
7083
7084 int64_t CPolVal = 0;
7085 ParseStatus ResTH = ParseStatus::NoMatch;
7086 ParseStatus ResScope = ParseStatus::NoMatch;
7087 ParseStatus ResNV = ParseStatus::NoMatch;
7088 ParseStatus ResScal = ParseStatus::NoMatch;
7089
7090 for (;;) {
7091 if (ResTH.isNoMatch()) {
7092 int64_t TH;
7093 ResTH = parseTH(Operands, TH);
7094 if (ResTH.isFailure())
7095 return ResTH;
7096 if (ResTH.isSuccess()) {
7097 CPolVal |= TH;
7098 continue;
7099 }
7100 }
7101
7102 if (ResScope.isNoMatch()) {
7103 int64_t Scope;
7104 ResScope = parseScope(Operands, Scope);
7105 if (ResScope.isFailure())
7106 return ResScope;
7107 if (ResScope.isSuccess()) {
7108 CPolVal |= Scope;
7109 continue;
7110 }
7111 }
7112
7113 // NV bit exists on GFX12+, but does something starting from GFX1250.
7114 // Allow parsing on all GFX12 and fail on validation for better
7115 // diagnostics.
7116 if (ResNV.isNoMatch()) {
7117 if (trySkipId("nv")) {
7118 ResNV = ParseStatus::Success;
7119 CPolVal |= CPol::NV;
7120 continue;
7121 } else if (trySkipId("no", "nv")) {
7122 ResNV = ParseStatus::Success;
7123 continue;
7124 }
7125 }
7126
7127 if (ResScal.isNoMatch()) {
7128 if (trySkipId("scale_offset")) {
7129 ResScal = ParseStatus::Success;
7130 CPolVal |= CPol::SCAL;
7131 continue;
7132 } else if (trySkipId("no", "scale_offset")) {
7133 ResScal = ParseStatus::Success;
7134 continue;
7135 }
7136 }
7137
7138 break;
7139 }
7140
7141 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7142 ResScal.isNoMatch())
7143 return ParseStatus::NoMatch;
7144
7145 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7146 AMDGPUOperand::ImmTyCPol));
7147 return ParseStatus::Success;
7148 }
7149
7150 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7151 SMLoc OpLoc = getLoc();
7152 unsigned Enabled = 0, Seen = 0;
7153 for (;;) {
7154 SMLoc S = getLoc();
7155 bool Disabling;
7156 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7157 if (!CPol)
7158 break;
7159
7160 lex();
7161
7162 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7163 return Error(S, "dlc modifier is not supported on this GPU");
7164
7165 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7166 return Error(S, "scc modifier is not supported on this GPU");
7167
7168 if (Seen & CPol)
7169 return Error(S, "duplicate cache policy modifier");
7170
7171 if (!Disabling)
7172 Enabled |= CPol;
7173
7174 Seen |= CPol;
7175 }
7176
7177 if (!Seen)
7178 return ParseStatus::NoMatch;
7179
7180 Operands.push_back(
7181 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7182 return ParseStatus::Success;
7183}
7184
7185ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7186 int64_t &Scope) {
7187 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7189
7190 ParseStatus Res = parseStringOrIntWithPrefix(
7191 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7192 Scope);
7193
7194 if (Res.isSuccess())
7195 Scope = Scopes[Scope];
7196
7197 return Res;
7198}
7199
7200ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7201 TH = AMDGPU::CPol::TH_RT; // default
7202
7203 StringRef Value;
7204 SMLoc StringLoc;
7205 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7206 if (!Res.isSuccess())
7207 return Res;
7208
7209 if (Value == "TH_DEFAULT")
7211 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7212 Value == "TH_LOAD_NT_WB") {
7213 return Error(StringLoc, "invalid th value");
7214 } else if (Value.consume_front("TH_ATOMIC_")) {
7216 } else if (Value.consume_front("TH_LOAD_")) {
7218 } else if (Value.consume_front("TH_STORE_")) {
7220 } else {
7221 return Error(StringLoc, "invalid th value");
7222 }
7223
7224 if (Value == "BYPASS")
7226
7227 if (TH != 0) {
7229 TH |= StringSwitch<int64_t>(Value)
7230 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7231 .Case("RT", AMDGPU::CPol::TH_RT)
7232 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7233 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7234 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7236 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7237 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7239 .Default(0xffffffff);
7240 else
7241 TH |= StringSwitch<int64_t>(Value)
7242 .Case("RT", AMDGPU::CPol::TH_RT)
7243 .Case("NT", AMDGPU::CPol::TH_NT)
7244 .Case("HT", AMDGPU::CPol::TH_HT)
7245 .Case("LU", AMDGPU::CPol::TH_LU)
7246 .Case("WB", AMDGPU::CPol::TH_WB)
7247 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7248 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7249 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7250 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7251 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7252 .Default(0xffffffff);
7253 }
7254
7255 if (TH == 0xffffffff)
7256 return Error(StringLoc, "invalid th value");
7257
7258 return ParseStatus::Success;
7259}
7260
7261static void
7263 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7264 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7265 std::optional<unsigned> InsertAt = std::nullopt) {
7266 auto i = OptionalIdx.find(ImmT);
7267 if (i != OptionalIdx.end()) {
7268 unsigned Idx = i->second;
7269 const AMDGPUOperand &Op =
7270 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7271 if (InsertAt)
7272 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7273 else
7274 Op.addImmOperands(Inst, 1);
7275 } else {
7276 if (InsertAt.has_value())
7277 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7278 else
7280 }
7281}
7282
7283ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7284 StringRef &Value,
7285 SMLoc &StringLoc) {
7286 if (!trySkipId(Prefix, AsmToken::Colon))
7287 return ParseStatus::NoMatch;
7288
7289 StringLoc = getLoc();
7290 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7292}
7293
7294ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7295 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7296 int64_t &IntVal) {
7297 if (!trySkipId(Name, AsmToken::Colon))
7298 return ParseStatus::NoMatch;
7299
7300 SMLoc StringLoc = getLoc();
7301
7302 StringRef Value;
7303 if (isToken(AsmToken::Identifier)) {
7304 Value = getTokenStr();
7305 lex();
7306
7307 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7308 if (Value == Ids[IntVal])
7309 break;
7310 } else if (!parseExpr(IntVal))
7311 return ParseStatus::Failure;
7312
7313 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7314 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7315
7316 return ParseStatus::Success;
7317}
7318
7319ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7320 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7321 AMDGPUOperand::ImmTy Type) {
7322 SMLoc S = getLoc();
7323 int64_t IntVal;
7324
7325 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7326 if (Res.isSuccess())
7327 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7328
7329 return Res;
7330}
7331
7332//===----------------------------------------------------------------------===//
7333// MTBUF format
7334//===----------------------------------------------------------------------===//
7335
7336bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7337 int64_t MaxVal,
7338 int64_t &Fmt) {
7339 int64_t Val;
7340 SMLoc Loc = getLoc();
7341
7342 auto Res = parseIntWithPrefix(Pref, Val);
7343 if (Res.isFailure())
7344 return false;
7345 if (Res.isNoMatch())
7346 return true;
7347
7348 if (Val < 0 || Val > MaxVal) {
7349 Error(Loc, Twine("out of range ", StringRef(Pref)));
7350 return false;
7351 }
7352
7353 Fmt = Val;
7354 return true;
7355}
7356
7357ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7358 AMDGPUOperand::ImmTy ImmTy) {
7359 const char *Pref = "index_key";
7360 int64_t ImmVal = 0;
7361 SMLoc Loc = getLoc();
7362 auto Res = parseIntWithPrefix(Pref, ImmVal);
7363 if (!Res.isSuccess())
7364 return Res;
7365
7366 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7367 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7368 (ImmVal < 0 || ImmVal > 1))
7369 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7370
7371 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7372 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7373
7374 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7375 return ParseStatus::Success;
7376}
7377
7378ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7379 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7380}
7381
7382ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7383 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7384}
7385
7386ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7387 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7388}
7389
7390ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7391 StringRef Name,
7392 AMDGPUOperand::ImmTy Type) {
7393 return parseStringOrIntWithPrefix(Operands, Name,
7394 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7395 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7396 "MATRIX_FMT_FP4"},
7397 Type);
7398}
7399
7400ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7401 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7402 AMDGPUOperand::ImmTyMatrixAFMT);
7403}
7404
7405ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7406 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7407 AMDGPUOperand::ImmTyMatrixBFMT);
7408}
7409
7410ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7411 StringRef Name,
7412 AMDGPUOperand::ImmTy Type) {
7413 return parseStringOrIntWithPrefix(
7414 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7415}
7416
7417ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7418 return tryParseMatrixScale(Operands, "matrix_a_scale",
7419 AMDGPUOperand::ImmTyMatrixAScale);
7420}
7421
7422ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7423 return tryParseMatrixScale(Operands, "matrix_b_scale",
7424 AMDGPUOperand::ImmTyMatrixBScale);
7425}
7426
7427ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7428 StringRef Name,
7429 AMDGPUOperand::ImmTy Type) {
7430 return parseStringOrIntWithPrefix(
7431 Operands, Name,
7432 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7433 Type);
7434}
7435
7436ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7437 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7438 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7439}
7440
7441ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7442 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7443 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7444}
7445
7446// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7447// values to live in a joint format operand in the MCInst encoding.
7448ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7449 using namespace llvm::AMDGPU::MTBUFFormat;
7450
7451 int64_t Dfmt = DFMT_UNDEF;
7452 int64_t Nfmt = NFMT_UNDEF;
7453
7454 // dfmt and nfmt can appear in either order, and each is optional.
7455 for (int I = 0; I < 2; ++I) {
7456 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7457 return ParseStatus::Failure;
7458
7459 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7460 return ParseStatus::Failure;
7461
7462 // Skip optional comma between dfmt/nfmt
7463 // but guard against 2 commas following each other.
7464 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7465 !peekToken().is(AsmToken::Comma)) {
7466 trySkipToken(AsmToken::Comma);
7467 }
7468 }
7469
7470 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7471 return ParseStatus::NoMatch;
7472
7473 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7474 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7475
7476 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7477 return ParseStatus::Success;
7478}
7479
7480ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7481 using namespace llvm::AMDGPU::MTBUFFormat;
7482
7483 int64_t Fmt = UFMT_UNDEF;
7484
7485 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7486 return ParseStatus::Failure;
7487
7488 if (Fmt == UFMT_UNDEF)
7489 return ParseStatus::NoMatch;
7490
7491 Format = Fmt;
7492 return ParseStatus::Success;
7493}
7494
7495bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7496 int64_t &Nfmt,
7497 StringRef FormatStr,
7498 SMLoc Loc) {
7499 using namespace llvm::AMDGPU::MTBUFFormat;
7500 int64_t Format;
7501
7502 Format = getDfmt(FormatStr);
7503 if (Format != DFMT_UNDEF) {
7504 Dfmt = Format;
7505 return true;
7506 }
7507
7508 Format = getNfmt(FormatStr, getSTI());
7509 if (Format != NFMT_UNDEF) {
7510 Nfmt = Format;
7511 return true;
7512 }
7513
7514 Error(Loc, "unsupported format");
7515 return false;
7516}
7517
7518ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7519 SMLoc FormatLoc,
7520 int64_t &Format) {
7521 using namespace llvm::AMDGPU::MTBUFFormat;
7522
7523 int64_t Dfmt = DFMT_UNDEF;
7524 int64_t Nfmt = NFMT_UNDEF;
7525 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7526 return ParseStatus::Failure;
7527
7528 if (trySkipToken(AsmToken::Comma)) {
7529 StringRef Str;
7530 SMLoc Loc = getLoc();
7531 if (!parseId(Str, "expected a format string") ||
7532 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7533 return ParseStatus::Failure;
7534 if (Dfmt == DFMT_UNDEF)
7535 return Error(Loc, "duplicate numeric format");
7536 if (Nfmt == NFMT_UNDEF)
7537 return Error(Loc, "duplicate data format");
7538 }
7539
7540 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7541 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7542
7543 if (isGFX10Plus()) {
7544 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7545 if (Ufmt == UFMT_UNDEF)
7546 return Error(FormatLoc, "unsupported format");
7547 Format = Ufmt;
7548 } else {
7549 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7550 }
7551
7552 return ParseStatus::Success;
7553}
7554
7555ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7556 SMLoc Loc,
7557 int64_t &Format) {
7558 using namespace llvm::AMDGPU::MTBUFFormat;
7559
7560 auto Id = getUnifiedFormat(FormatStr, getSTI());
7561 if (Id == UFMT_UNDEF)
7562 return ParseStatus::NoMatch;
7563
7564 if (!isGFX10Plus())
7565 return Error(Loc, "unified format is not supported on this GPU");
7566
7567 Format = Id;
7568 return ParseStatus::Success;
7569}
7570
7571ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7572 using namespace llvm::AMDGPU::MTBUFFormat;
7573 SMLoc Loc = getLoc();
7574
7575 if (!parseExpr(Format))
7576 return ParseStatus::Failure;
7577 if (!isValidFormatEncoding(Format, getSTI()))
7578 return Error(Loc, "out of range format");
7579
7580 return ParseStatus::Success;
7581}
7582
7583ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7584 using namespace llvm::AMDGPU::MTBUFFormat;
7585
7586 if (!trySkipId("format", AsmToken::Colon))
7587 return ParseStatus::NoMatch;
7588
7589 if (trySkipToken(AsmToken::LBrac)) {
7590 StringRef FormatStr;
7591 SMLoc Loc = getLoc();
7592 if (!parseId(FormatStr, "expected a format string"))
7593 return ParseStatus::Failure;
7594
7595 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7596 if (Res.isNoMatch())
7597 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7598 if (!Res.isSuccess())
7599 return Res;
7600
7601 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7602 return ParseStatus::Failure;
7603
7604 return ParseStatus::Success;
7605 }
7606
7607 return parseNumericFormat(Format);
7608}
7609
7610ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7611 using namespace llvm::AMDGPU::MTBUFFormat;
7612
7613 int64_t Format = getDefaultFormatEncoding(getSTI());
7614 ParseStatus Res;
7615 SMLoc Loc = getLoc();
7616
7617 // Parse legacy format syntax.
7618 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7619 if (Res.isFailure())
7620 return Res;
7621
7622 bool FormatFound = Res.isSuccess();
7623
7624 Operands.push_back(
7625 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7626
7627 if (FormatFound)
7628 trySkipToken(AsmToken::Comma);
7629
7630 if (isToken(AsmToken::EndOfStatement)) {
7631 // We are expecting an soffset operand,
7632 // but let matcher handle the error.
7633 return ParseStatus::Success;
7634 }
7635
7636 // Parse soffset.
7637 Res = parseRegOrImm(Operands);
7638 if (!Res.isSuccess())
7639 return Res;
7640
7641 trySkipToken(AsmToken::Comma);
7642
7643 if (!FormatFound) {
7644 Res = parseSymbolicOrNumericFormat(Format);
7645 if (Res.isFailure())
7646 return Res;
7647 if (Res.isSuccess()) {
7648 auto Size = Operands.size();
7649 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7650 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7651 Op.setImm(Format);
7652 }
7653 return ParseStatus::Success;
7654 }
7655
7656 if (isId("format") && peekToken().is(AsmToken::Colon))
7657 return Error(getLoc(), "duplicate format");
7658 return ParseStatus::Success;
7659}
7660
7661ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7662 ParseStatus Res =
7663 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7664 if (Res.isNoMatch()) {
7665 Res = parseIntWithPrefix("inst_offset", Operands,
7666 AMDGPUOperand::ImmTyInstOffset);
7667 }
7668 return Res;
7669}
7670
7671ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7672 ParseStatus Res =
7673 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7674 if (Res.isNoMatch())
7675 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7676 return Res;
7677}
7678
7679ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7680 ParseStatus Res =
7681 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7682 if (Res.isNoMatch()) {
7683 Res =
7684 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7685 }
7686 return Res;
7687}
7688
7689//===----------------------------------------------------------------------===//
7690// Exp
7691//===----------------------------------------------------------------------===//
7692
7693void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7694 OptionalImmIndexMap OptionalIdx;
7695
7696 unsigned OperandIdx[4];
7697 unsigned EnMask = 0;
7698 int SrcIdx = 0;
7699
7700 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7701 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7702
7703 // Add the register arguments
7704 if (Op.isReg()) {
7705 assert(SrcIdx < 4);
7706 OperandIdx[SrcIdx] = Inst.size();
7707 Op.addRegOperands(Inst, 1);
7708 ++SrcIdx;
7709 continue;
7710 }
7711
7712 if (Op.isOff()) {
7713 assert(SrcIdx < 4);
7714 OperandIdx[SrcIdx] = Inst.size();
7715 Inst.addOperand(MCOperand::createReg(MCRegister()));
7716 ++SrcIdx;
7717 continue;
7718 }
7719
7720 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7721 Op.addImmOperands(Inst, 1);
7722 continue;
7723 }
7724
7725 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7726 continue;
7727
7728 // Handle optional arguments
7729 OptionalIdx[Op.getImmTy()] = i;
7730 }
7731
7732 assert(SrcIdx == 4);
7733
7734 bool Compr = false;
7735 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7736 Compr = true;
7737 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7738 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7739 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7740 }
7741
7742 for (auto i = 0; i < SrcIdx; ++i) {
7743 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7744 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7745 }
7746 }
7747
7748 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7749 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7750
7751 Inst.addOperand(MCOperand::createImm(EnMask));
7752}
7753
7754//===----------------------------------------------------------------------===//
7755// s_waitcnt
7756//===----------------------------------------------------------------------===//
7757
7758static bool
7760 const AMDGPU::IsaVersion ISA,
7761 int64_t &IntVal,
7762 int64_t CntVal,
7763 bool Saturate,
7764 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7765 unsigned (*decode)(const IsaVersion &Version, unsigned))
7766{
7767 bool Failed = false;
7768
7769 IntVal = encode(ISA, IntVal, CntVal);
7770 if (CntVal != decode(ISA, IntVal)) {
7771 if (Saturate) {
7772 IntVal = encode(ISA, IntVal, -1);
7773 } else {
7774 Failed = true;
7775 }
7776 }
7777 return Failed;
7778}
7779
7780bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7781
7782 SMLoc CntLoc = getLoc();
7783 StringRef CntName = getTokenStr();
7784
7785 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7786 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7787 return false;
7788
7789 int64_t CntVal;
7790 SMLoc ValLoc = getLoc();
7791 if (!parseExpr(CntVal))
7792 return false;
7793
7794 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7795
7796 bool Failed = true;
7797 bool Sat = CntName.ends_with("_sat");
7798
7799 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7800 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7801 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7802 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7803 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7804 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7805 } else {
7806 Error(CntLoc, "invalid counter name " + CntName);
7807 return false;
7808 }
7809
7810 if (Failed) {
7811 Error(ValLoc, "too large value for " + CntName);
7812 return false;
7813 }
7814
7815 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7816 return false;
7817
7818 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7819 if (isToken(AsmToken::EndOfStatement)) {
7820 Error(getLoc(), "expected a counter name");
7821 return false;
7822 }
7823 }
7824
7825 return true;
7826}
7827
7828ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7829 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7830 int64_t Waitcnt = getWaitcntBitMask(ISA);
7831 SMLoc S = getLoc();
7832
7833 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7834 while (!isToken(AsmToken::EndOfStatement)) {
7835 if (!parseCnt(Waitcnt))
7836 return ParseStatus::Failure;
7837 }
7838 } else {
7839 if (!parseExpr(Waitcnt))
7840 return ParseStatus::Failure;
7841 }
7842
7843 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7844 return ParseStatus::Success;
7845}
7846
7847bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7848 SMLoc FieldLoc = getLoc();
7849 StringRef FieldName = getTokenStr();
7850 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7851 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7852 return false;
7853
7854 SMLoc ValueLoc = getLoc();
7855 StringRef ValueName = getTokenStr();
7856 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7857 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7858 return false;
7859
7860 unsigned Shift;
7861 if (FieldName == "instid0") {
7862 Shift = 0;
7863 } else if (FieldName == "instskip") {
7864 Shift = 4;
7865 } else if (FieldName == "instid1") {
7866 Shift = 7;
7867 } else {
7868 Error(FieldLoc, "invalid field name " + FieldName);
7869 return false;
7870 }
7871
7872 int Value;
7873 if (Shift == 4) {
7874 // Parse values for instskip.
7875 Value = StringSwitch<int>(ValueName)
7876 .Case("SAME", 0)
7877 .Case("NEXT", 1)
7878 .Case("SKIP_1", 2)
7879 .Case("SKIP_2", 3)
7880 .Case("SKIP_3", 4)
7881 .Case("SKIP_4", 5)
7882 .Default(-1);
7883 } else {
7884 // Parse values for instid0 and instid1.
7885 Value = StringSwitch<int>(ValueName)
7886 .Case("NO_DEP", 0)
7887 .Case("VALU_DEP_1", 1)
7888 .Case("VALU_DEP_2", 2)
7889 .Case("VALU_DEP_3", 3)
7890 .Case("VALU_DEP_4", 4)
7891 .Case("TRANS32_DEP_1", 5)
7892 .Case("TRANS32_DEP_2", 6)
7893 .Case("TRANS32_DEP_3", 7)
7894 .Case("FMA_ACCUM_CYCLE_1", 8)
7895 .Case("SALU_CYCLE_1", 9)
7896 .Case("SALU_CYCLE_2", 10)
7897 .Case("SALU_CYCLE_3", 11)
7898 .Default(-1);
7899 }
7900 if (Value < 0) {
7901 Error(ValueLoc, "invalid value name " + ValueName);
7902 return false;
7903 }
7904
7905 Delay |= Value << Shift;
7906 return true;
7907}
7908
7909ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7910 int64_t Delay = 0;
7911 SMLoc S = getLoc();
7912
7913 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7914 do {
7915 if (!parseDelay(Delay))
7916 return ParseStatus::Failure;
7917 } while (trySkipToken(AsmToken::Pipe));
7918 } else {
7919 if (!parseExpr(Delay))
7920 return ParseStatus::Failure;
7921 }
7922
7923 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7924 return ParseStatus::Success;
7925}
7926
7927bool
7928AMDGPUOperand::isSWaitCnt() const {
7929 return isImm();
7930}
7931
7932bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7933
7934//===----------------------------------------------------------------------===//
7935// DepCtr
7936//===----------------------------------------------------------------------===//
7937
7938void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7939 StringRef DepCtrName) {
7940 switch (ErrorId) {
7941 case OPR_ID_UNKNOWN:
7942 Error(Loc, Twine("invalid counter name ", DepCtrName));
7943 return;
7944 case OPR_ID_UNSUPPORTED:
7945 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7946 return;
7947 case OPR_ID_DUPLICATE:
7948 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7949 return;
7950 case OPR_VAL_INVALID:
7951 Error(Loc, Twine("invalid value for ", DepCtrName));
7952 return;
7953 default:
7954 assert(false);
7955 }
7956}
7957
7958bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7959
7960 using namespace llvm::AMDGPU::DepCtr;
7961
7962 SMLoc DepCtrLoc = getLoc();
7963 StringRef DepCtrName = getTokenStr();
7964
7965 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7966 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7967 return false;
7968
7969 int64_t ExprVal;
7970 if (!parseExpr(ExprVal))
7971 return false;
7972
7973 unsigned PrevOprMask = UsedOprMask;
7974 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7975
7976 if (CntVal < 0) {
7977 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7978 return false;
7979 }
7980
7981 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7982 return false;
7983
7984 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7985 if (isToken(AsmToken::EndOfStatement)) {
7986 Error(getLoc(), "expected a counter name");
7987 return false;
7988 }
7989 }
7990
7991 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7992 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7993 return true;
7994}
7995
7996ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7997 using namespace llvm::AMDGPU::DepCtr;
7998
7999 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8000 SMLoc Loc = getLoc();
8001
8002 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8003 unsigned UsedOprMask = 0;
8004 while (!isToken(AsmToken::EndOfStatement)) {
8005 if (!parseDepCtr(DepCtr, UsedOprMask))
8006 return ParseStatus::Failure;
8007 }
8008 } else {
8009 if (!parseExpr(DepCtr))
8010 return ParseStatus::Failure;
8011 }
8012
8013 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8014 return ParseStatus::Success;
8015}
8016
8017bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8018
8019//===----------------------------------------------------------------------===//
8020// hwreg
8021//===----------------------------------------------------------------------===//
8022
8023ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8024 OperandInfoTy &Offset,
8025 OperandInfoTy &Width) {
8026 using namespace llvm::AMDGPU::Hwreg;
8027
8028 if (!trySkipId("hwreg", AsmToken::LParen))
8029 return ParseStatus::NoMatch;
8030
8031 // The register may be specified by name or using a numeric code
8032 HwReg.Loc = getLoc();
8033 if (isToken(AsmToken::Identifier) &&
8034 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8035 HwReg.IsSymbolic = true;
8036 lex(); // skip register name
8037 } else if (!parseExpr(HwReg.Val, "a register name")) {
8038 return ParseStatus::Failure;
8039 }
8040
8041 if (trySkipToken(AsmToken::RParen))
8042 return ParseStatus::Success;
8043
8044 // parse optional params
8045 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8046 return ParseStatus::Failure;
8047
8048 Offset.Loc = getLoc();
8049 if (!parseExpr(Offset.Val))
8050 return ParseStatus::Failure;
8051
8052 if (!skipToken(AsmToken::Comma, "expected a comma"))
8053 return ParseStatus::Failure;
8054
8055 Width.Loc = getLoc();
8056 if (!parseExpr(Width.Val) ||
8057 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8058 return ParseStatus::Failure;
8059
8060 return ParseStatus::Success;
8061}
8062
8063ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8064 using namespace llvm::AMDGPU::Hwreg;
8065
8066 int64_t ImmVal = 0;
8067 SMLoc Loc = getLoc();
8068
8069 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8070 HwregId::Default);
8071 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8072 HwregOffset::Default);
8073 struct : StructuredOpField {
8074 using StructuredOpField::StructuredOpField;
8075 bool validate(AMDGPUAsmParser &Parser) const override {
8076 if (!isUIntN(Width, Val - 1))
8077 return Error(Parser, "only values from 1 to 32 are legal");
8078 return true;
8079 }
8080 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8081 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8082
8083 if (Res.isNoMatch())
8084 Res = parseHwregFunc(HwReg, Offset, Width);
8085
8086 if (Res.isSuccess()) {
8087 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8088 return ParseStatus::Failure;
8089 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8090 }
8091
8092 if (Res.isNoMatch() &&
8093 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8095
8096 if (!Res.isSuccess())
8097 return ParseStatus::Failure;
8098
8099 if (!isUInt<16>(ImmVal))
8100 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8101 Operands.push_back(
8102 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8103 return ParseStatus::Success;
8104}
8105
8106bool AMDGPUOperand::isHwreg() const {
8107 return isImmTy(ImmTyHwreg);
8108}
8109
8110//===----------------------------------------------------------------------===//
8111// sendmsg
8112//===----------------------------------------------------------------------===//
8113
8114bool
8115AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8116 OperandInfoTy &Op,
8117 OperandInfoTy &Stream) {
8118 using namespace llvm::AMDGPU::SendMsg;
8119
8120 Msg.Loc = getLoc();
8121 if (isToken(AsmToken::Identifier) &&
8122 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8123 Msg.IsSymbolic = true;
8124 lex(); // skip message name
8125 } else if (!parseExpr(Msg.Val, "a message name")) {
8126 return false;
8127 }
8128
8129 if (trySkipToken(AsmToken::Comma)) {
8130 Op.IsDefined = true;
8131 Op.Loc = getLoc();
8132 if (isToken(AsmToken::Identifier) &&
8133 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8135 lex(); // skip operation name
8136 } else if (!parseExpr(Op.Val, "an operation name")) {
8137 return false;
8138 }
8139
8140 if (trySkipToken(AsmToken::Comma)) {
8141 Stream.IsDefined = true;
8142 Stream.Loc = getLoc();
8143 if (!parseExpr(Stream.Val))
8144 return false;
8145 }
8146 }
8147
8148 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8149}
8150
8151bool
8152AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8153 const OperandInfoTy &Op,
8154 const OperandInfoTy &Stream) {
8155 using namespace llvm::AMDGPU::SendMsg;
8156
8157 // Validation strictness depends on whether message is specified
8158 // in a symbolic or in a numeric form. In the latter case
8159 // only encoding possibility is checked.
8160 bool Strict = Msg.IsSymbolic;
8161
8162 if (Strict) {
8163 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8164 Error(Msg.Loc, "specified message id is not supported on this GPU");
8165 return false;
8166 }
8167 } else {
8168 if (!isValidMsgId(Msg.Val, getSTI())) {
8169 Error(Msg.Loc, "invalid message id");
8170 return false;
8171 }
8172 }
8173 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8174 if (Op.IsDefined) {
8175 Error(Op.Loc, "message does not support operations");
8176 } else {
8177 Error(Msg.Loc, "missing message operation");
8178 }
8179 return false;
8180 }
8181 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8182 if (Op.Val == OPR_ID_UNSUPPORTED)
8183 Error(Op.Loc, "specified operation id is not supported on this GPU");
8184 else
8185 Error(Op.Loc, "invalid operation id");
8186 return false;
8187 }
8188 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8189 Stream.IsDefined) {
8190 Error(Stream.Loc, "message operation does not support streams");
8191 return false;
8192 }
8193 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8194 Error(Stream.Loc, "invalid message stream id");
8195 return false;
8196 }
8197 return true;
8198}
8199
8200ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8201 using namespace llvm::AMDGPU::SendMsg;
8202
8203 int64_t ImmVal = 0;
8204 SMLoc Loc = getLoc();
8205
8206 if (trySkipId("sendmsg", AsmToken::LParen)) {
8207 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8208 OperandInfoTy Op(OP_NONE_);
8209 OperandInfoTy Stream(STREAM_ID_NONE_);
8210 if (parseSendMsgBody(Msg, Op, Stream) &&
8211 validateSendMsg(Msg, Op, Stream)) {
8212 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8213 } else {
8214 return ParseStatus::Failure;
8215 }
8216 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8217 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8218 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8219 } else {
8220 return ParseStatus::Failure;
8221 }
8222
8223 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8224 return ParseStatus::Success;
8225}
8226
8227bool AMDGPUOperand::isSendMsg() const {
8228 return isImmTy(ImmTySendMsg);
8229}
8230
8231//===----------------------------------------------------------------------===//
8232// v_interp
8233//===----------------------------------------------------------------------===//
8234
8235ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8236 StringRef Str;
8237 SMLoc S = getLoc();
8238
8239 if (!parseId(Str))
8240 return ParseStatus::NoMatch;
8241
8242 int Slot = StringSwitch<int>(Str)
8243 .Case("p10", 0)
8244 .Case("p20", 1)
8245 .Case("p0", 2)
8246 .Default(-1);
8247
8248 if (Slot == -1)
8249 return Error(S, "invalid interpolation slot");
8250
8251 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8252 AMDGPUOperand::ImmTyInterpSlot));
8253 return ParseStatus::Success;
8254}
8255
8256ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8257 StringRef Str;
8258 SMLoc S = getLoc();
8259
8260 if (!parseId(Str))
8261 return ParseStatus::NoMatch;
8262
8263 if (!Str.starts_with("attr"))
8264 return Error(S, "invalid interpolation attribute");
8265
8266 StringRef Chan = Str.take_back(2);
8267 int AttrChan = StringSwitch<int>(Chan)
8268 .Case(".x", 0)
8269 .Case(".y", 1)
8270 .Case(".z", 2)
8271 .Case(".w", 3)
8272 .Default(-1);
8273 if (AttrChan == -1)
8274 return Error(S, "invalid or missing interpolation attribute channel");
8275
8276 Str = Str.drop_back(2).drop_front(4);
8277
8278 uint8_t Attr;
8279 if (Str.getAsInteger(10, Attr))
8280 return Error(S, "invalid or missing interpolation attribute number");
8281
8282 if (Attr > 32)
8283 return Error(S, "out of bounds interpolation attribute number");
8284
8285 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8286
8287 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8288 AMDGPUOperand::ImmTyInterpAttr));
8289 Operands.push_back(AMDGPUOperand::CreateImm(
8290 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8291 return ParseStatus::Success;
8292}
8293
8294//===----------------------------------------------------------------------===//
8295// exp
8296//===----------------------------------------------------------------------===//
8297
8298ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8299 using namespace llvm::AMDGPU::Exp;
8300
8301 StringRef Str;
8302 SMLoc S = getLoc();
8303
8304 if (!parseId(Str))
8305 return ParseStatus::NoMatch;
8306
8307 unsigned Id = getTgtId(Str);
8308 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8309 return Error(S, (Id == ET_INVALID)
8310 ? "invalid exp target"
8311 : "exp target is not supported on this GPU");
8312
8313 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8314 AMDGPUOperand::ImmTyExpTgt));
8315 return ParseStatus::Success;
8316}
8317
8318//===----------------------------------------------------------------------===//
8319// parser helpers
8320//===----------------------------------------------------------------------===//
8321
8322bool
8323AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8324 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8325}
8326
8327bool
8328AMDGPUAsmParser::isId(const StringRef Id) const {
8329 return isId(getToken(), Id);
8330}
8331
8332bool
8333AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8334 return getTokenKind() == Kind;
8335}
8336
8337StringRef AMDGPUAsmParser::getId() const {
8338 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8339}
8340
8341bool
8342AMDGPUAsmParser::trySkipId(const StringRef Id) {
8343 if (isId(Id)) {
8344 lex();
8345 return true;
8346 }
8347 return false;
8348}
8349
8350bool
8351AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8352 if (isToken(AsmToken::Identifier)) {
8353 StringRef Tok = getTokenStr();
8354 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8355 lex();
8356 return true;
8357 }
8358 }
8359 return false;
8360}
8361
8362bool
8363AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8364 if (isId(Id) && peekToken().is(Kind)) {
8365 lex();
8366 lex();
8367 return true;
8368 }
8369 return false;
8370}
8371
8372bool
8373AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8374 if (isToken(Kind)) {
8375 lex();
8376 return true;
8377 }
8378 return false;
8379}
8380
8381bool
8382AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8383 const StringRef ErrMsg) {
8384 if (!trySkipToken(Kind)) {
8385 Error(getLoc(), ErrMsg);
8386 return false;
8387 }
8388 return true;
8389}
8390
8391bool
8392AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8393 SMLoc S = getLoc();
8394
8395 const MCExpr *Expr;
8396 if (Parser.parseExpression(Expr))
8397 return false;
8398
8399 if (Expr->evaluateAsAbsolute(Imm))
8400 return true;
8401
8402 if (Expected.empty()) {
8403 Error(S, "expected absolute expression");
8404 } else {
8405 Error(S, Twine("expected ", Expected) +
8406 Twine(" or an absolute expression"));
8407 }
8408 return false;
8409}
8410
8411bool
8412AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8413 SMLoc S = getLoc();
8414
8415 const MCExpr *Expr;
8416 if (Parser.parseExpression(Expr))
8417 return false;
8418
8419 int64_t IntVal;
8420 if (Expr->evaluateAsAbsolute(IntVal)) {
8421 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8422 } else {
8423 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8424 }
8425 return true;
8426}
8427
8428bool
8429AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8430 if (isToken(AsmToken::String)) {
8431 Val = getToken().getStringContents();
8432 lex();
8433 return true;
8434 }
8435 Error(getLoc(), ErrMsg);
8436 return false;
8437}
8438
8439bool
8440AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8441 if (isToken(AsmToken::Identifier)) {
8442 Val = getTokenStr();
8443 lex();
8444 return true;
8445 }
8446 if (!ErrMsg.empty())
8447 Error(getLoc(), ErrMsg);
8448 return false;
8449}
8450
8451AsmToken
8452AMDGPUAsmParser::getToken() const {
8453 return Parser.getTok();
8454}
8455
8456AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8457 return isToken(AsmToken::EndOfStatement)
8458 ? getToken()
8459 : getLexer().peekTok(ShouldSkipSpace);
8460}
8461
8462void
8463AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8464 auto TokCount = getLexer().peekTokens(Tokens);
8465
8466 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8467 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8468}
8469
8471AMDGPUAsmParser::getTokenKind() const {
8472 return getLexer().getKind();
8473}
8474
8475SMLoc
8476AMDGPUAsmParser::getLoc() const {
8477 return getToken().getLoc();
8478}
8479
8480StringRef
8481AMDGPUAsmParser::getTokenStr() const {
8482 return getToken().getString();
8483}
8484
8485void
8486AMDGPUAsmParser::lex() {
8487 Parser.Lex();
8488}
8489
8490SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8491 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8492}
8493
8494// Returns one of the given locations that comes later in the source.
8495SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8496 return a.getPointer() < b.getPointer() ? b : a;
8497}
8498
8499SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8500 int MCOpIdx) const {
8501 for (const auto &Op : Operands) {
8502 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8503 if (TargetOp.getMCOpIdx() == MCOpIdx)
8504 return TargetOp.getStartLoc();
8505 }
8506 llvm_unreachable("No such MC operand!");
8507}
8508
8509SMLoc
8510AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8511 const OperandVector &Operands) const {
8512 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8513 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8514 if (Test(Op))
8515 return Op.getStartLoc();
8516 }
8517 return getInstLoc(Operands);
8518}
8519
8520SMLoc
8521AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8522 const OperandVector &Operands) const {
8523 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8524 return getOperandLoc(Test, Operands);
8525}
8526
8527ParseStatus
8528AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8529 if (!trySkipToken(AsmToken::LCurly))
8530 return ParseStatus::NoMatch;
8531
8532 bool First = true;
8533 while (!trySkipToken(AsmToken::RCurly)) {
8534 if (!First &&
8535 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8536 return ParseStatus::Failure;
8537
8538 StringRef Id = getTokenStr();
8539 SMLoc IdLoc = getLoc();
8540 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8541 !skipToken(AsmToken::Colon, "colon expected"))
8542 return ParseStatus::Failure;
8543
8544 const auto *I =
8545 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8546 if (I == Fields.end())
8547 return Error(IdLoc, "unknown field");
8548 if ((*I)->IsDefined)
8549 return Error(IdLoc, "duplicate field");
8550
8551 // TODO: Support symbolic values.
8552 (*I)->Loc = getLoc();
8553 if (!parseExpr((*I)->Val))
8554 return ParseStatus::Failure;
8555 (*I)->IsDefined = true;
8556
8557 First = false;
8558 }
8559 return ParseStatus::Success;
8560}
8561
8562bool AMDGPUAsmParser::validateStructuredOpFields(
8564 return all_of(Fields, [this](const StructuredOpField *F) {
8565 return F->validate(*this);
8566 });
8567}
8568
8569//===----------------------------------------------------------------------===//
8570// swizzle
8571//===----------------------------------------------------------------------===//
8572
8574static unsigned
8575encodeBitmaskPerm(const unsigned AndMask,
8576 const unsigned OrMask,
8577 const unsigned XorMask) {
8578 using namespace llvm::AMDGPU::Swizzle;
8579
8580 return BITMASK_PERM_ENC |
8581 (AndMask << BITMASK_AND_SHIFT) |
8582 (OrMask << BITMASK_OR_SHIFT) |
8583 (XorMask << BITMASK_XOR_SHIFT);
8584}
8585
8586bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8587 const unsigned MaxVal,
8588 const Twine &ErrMsg, SMLoc &Loc) {
8589 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8590 return false;
8591 }
8592 Loc = getLoc();
8593 if (!parseExpr(Op)) {
8594 return false;
8595 }
8596 if (Op < MinVal || Op > MaxVal) {
8597 Error(Loc, ErrMsg);
8598 return false;
8599 }
8600
8601 return true;
8602}
8603
8604bool
8605AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8606 const unsigned MinVal,
8607 const unsigned MaxVal,
8608 const StringRef ErrMsg) {
8609 SMLoc Loc;
8610 for (unsigned i = 0; i < OpNum; ++i) {
8611 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8612 return false;
8613 }
8614
8615 return true;
8616}
8617
8618bool
8619AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8620 using namespace llvm::AMDGPU::Swizzle;
8621
8622 int64_t Lane[LANE_NUM];
8623 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8624 "expected a 2-bit lane id")) {
8626 for (unsigned I = 0; I < LANE_NUM; ++I) {
8627 Imm |= Lane[I] << (LANE_SHIFT * I);
8628 }
8629 return true;
8630 }
8631 return false;
8632}
8633
8634bool
8635AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8636 using namespace llvm::AMDGPU::Swizzle;
8637
8638 SMLoc Loc;
8639 int64_t GroupSize;
8640 int64_t LaneIdx;
8641
8642 if (!parseSwizzleOperand(GroupSize,
8643 2, 32,
8644 "group size must be in the interval [2,32]",
8645 Loc)) {
8646 return false;
8647 }
8648 if (!isPowerOf2_64(GroupSize)) {
8649 Error(Loc, "group size must be a power of two");
8650 return false;
8651 }
8652 if (parseSwizzleOperand(LaneIdx,
8653 0, GroupSize - 1,
8654 "lane id must be in the interval [0,group size - 1]",
8655 Loc)) {
8656 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8657 return true;
8658 }
8659 return false;
8660}
8661
8662bool
8663AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8664 using namespace llvm::AMDGPU::Swizzle;
8665
8666 SMLoc Loc;
8667 int64_t GroupSize;
8668
8669 if (!parseSwizzleOperand(GroupSize,
8670 2, 32,
8671 "group size must be in the interval [2,32]",
8672 Loc)) {
8673 return false;
8674 }
8675 if (!isPowerOf2_64(GroupSize)) {
8676 Error(Loc, "group size must be a power of two");
8677 return false;
8678 }
8679
8680 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8681 return true;
8682}
8683
8684bool
8685AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8686 using namespace llvm::AMDGPU::Swizzle;
8687
8688 SMLoc Loc;
8689 int64_t GroupSize;
8690
8691 if (!parseSwizzleOperand(GroupSize,
8692 1, 16,
8693 "group size must be in the interval [1,16]",
8694 Loc)) {
8695 return false;
8696 }
8697 if (!isPowerOf2_64(GroupSize)) {
8698 Error(Loc, "group size must be a power of two");
8699 return false;
8700 }
8701
8702 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8703 return true;
8704}
8705
8706bool
8707AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8708 using namespace llvm::AMDGPU::Swizzle;
8709
8710 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8711 return false;
8712 }
8713
8714 StringRef Ctl;
8715 SMLoc StrLoc = getLoc();
8716 if (!parseString(Ctl)) {
8717 return false;
8718 }
8719 if (Ctl.size() != BITMASK_WIDTH) {
8720 Error(StrLoc, "expected a 5-character mask");
8721 return false;
8722 }
8723
8724 unsigned AndMask = 0;
8725 unsigned OrMask = 0;
8726 unsigned XorMask = 0;
8727
8728 for (size_t i = 0; i < Ctl.size(); ++i) {
8729 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8730 switch(Ctl[i]) {
8731 default:
8732 Error(StrLoc, "invalid mask");
8733 return false;
8734 case '0':
8735 break;
8736 case '1':
8737 OrMask |= Mask;
8738 break;
8739 case 'p':
8740 AndMask |= Mask;
8741 break;
8742 case 'i':
8743 AndMask |= Mask;
8744 XorMask |= Mask;
8745 break;
8746 }
8747 }
8748
8749 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8750 return true;
8751}
8752
8753bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8754 using namespace llvm::AMDGPU::Swizzle;
8755
8756 if (!AMDGPU::isGFX9Plus(getSTI())) {
8757 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8758 return false;
8759 }
8760
8761 int64_t Swizzle;
8762 SMLoc Loc;
8763 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8764 "FFT swizzle must be in the interval [0," +
8765 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8766 Loc))
8767 return false;
8768
8769 Imm = FFT_MODE_ENC | Swizzle;
8770 return true;
8771}
8772
8773bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8774 using namespace llvm::AMDGPU::Swizzle;
8775
8776 if (!AMDGPU::isGFX9Plus(getSTI())) {
8777 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8778 return false;
8779 }
8780
8781 SMLoc Loc;
8782 int64_t Direction;
8783
8784 if (!parseSwizzleOperand(Direction, 0, 1,
8785 "direction must be 0 (left) or 1 (right)", Loc))
8786 return false;
8787
8788 int64_t RotateSize;
8789 if (!parseSwizzleOperand(
8790 RotateSize, 0, ROTATE_MAX_SIZE,
8791 "number of threads to rotate must be in the interval [0," +
8792 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8793 Loc))
8794 return false;
8795
8797 (RotateSize << ROTATE_SIZE_SHIFT);
8798 return true;
8799}
8800
8801bool
8802AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8803
8804 SMLoc OffsetLoc = getLoc();
8805
8806 if (!parseExpr(Imm, "a swizzle macro")) {
8807 return false;
8808 }
8809 if (!isUInt<16>(Imm)) {
8810 Error(OffsetLoc, "expected a 16-bit offset");
8811 return false;
8812 }
8813 return true;
8814}
8815
8816bool
8817AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8818 using namespace llvm::AMDGPU::Swizzle;
8819
8820 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8821
8822 SMLoc ModeLoc = getLoc();
8823 bool Ok = false;
8824
8825 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8826 Ok = parseSwizzleQuadPerm(Imm);
8827 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8828 Ok = parseSwizzleBitmaskPerm(Imm);
8829 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8830 Ok = parseSwizzleBroadcast(Imm);
8831 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8832 Ok = parseSwizzleSwap(Imm);
8833 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8834 Ok = parseSwizzleReverse(Imm);
8835 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8836 Ok = parseSwizzleFFT(Imm);
8837 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8838 Ok = parseSwizzleRotate(Imm);
8839 } else {
8840 Error(ModeLoc, "expected a swizzle mode");
8841 }
8842
8843 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8844 }
8845
8846 return false;
8847}
8848
8849ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8850 SMLoc S = getLoc();
8851 int64_t Imm = 0;
8852
8853 if (trySkipId("offset")) {
8854
8855 bool Ok = false;
8856 if (skipToken(AsmToken::Colon, "expected a colon")) {
8857 if (trySkipId("swizzle")) {
8858 Ok = parseSwizzleMacro(Imm);
8859 } else {
8860 Ok = parseSwizzleOffset(Imm);
8861 }
8862 }
8863
8864 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8865
8867 }
8868 return ParseStatus::NoMatch;
8869}
8870
8871bool
8872AMDGPUOperand::isSwizzle() const {
8873 return isImmTy(ImmTySwizzle);
8874}
8875
8876//===----------------------------------------------------------------------===//
8877// VGPR Index Mode
8878//===----------------------------------------------------------------------===//
8879
8880int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8881
8882 using namespace llvm::AMDGPU::VGPRIndexMode;
8883
8884 if (trySkipToken(AsmToken::RParen)) {
8885 return OFF;
8886 }
8887
8888 int64_t Imm = 0;
8889
8890 while (true) {
8891 unsigned Mode = 0;
8892 SMLoc S = getLoc();
8893
8894 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8895 if (trySkipId(IdSymbolic[ModeId])) {
8896 Mode = 1 << ModeId;
8897 break;
8898 }
8899 }
8900
8901 if (Mode == 0) {
8902 Error(S, (Imm == 0)?
8903 "expected a VGPR index mode or a closing parenthesis" :
8904 "expected a VGPR index mode");
8905 return UNDEF;
8906 }
8907
8908 if (Imm & Mode) {
8909 Error(S, "duplicate VGPR index mode");
8910 return UNDEF;
8911 }
8912 Imm |= Mode;
8913
8914 if (trySkipToken(AsmToken::RParen))
8915 break;
8916 if (!skipToken(AsmToken::Comma,
8917 "expected a comma or a closing parenthesis"))
8918 return UNDEF;
8919 }
8920
8921 return Imm;
8922}
8923
8924ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8925
8926 using namespace llvm::AMDGPU::VGPRIndexMode;
8927
8928 int64_t Imm = 0;
8929 SMLoc S = getLoc();
8930
8931 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8932 Imm = parseGPRIdxMacro();
8933 if (Imm == UNDEF)
8934 return ParseStatus::Failure;
8935 } else {
8936 if (getParser().parseAbsoluteExpression(Imm))
8937 return ParseStatus::Failure;
8938 if (Imm < 0 || !isUInt<4>(Imm))
8939 return Error(S, "invalid immediate: only 4-bit values are legal");
8940 }
8941
8942 Operands.push_back(
8943 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8944 return ParseStatus::Success;
8945}
8946
8947bool AMDGPUOperand::isGPRIdxMode() const {
8948 return isImmTy(ImmTyGprIdxMode);
8949}
8950
8951//===----------------------------------------------------------------------===//
8952// sopp branch targets
8953//===----------------------------------------------------------------------===//
8954
8955ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8956
8957 // Make sure we are not parsing something
8958 // that looks like a label or an expression but is not.
8959 // This will improve error messages.
8960 if (isRegister() || isModifier())
8961 return ParseStatus::NoMatch;
8962
8963 if (!parseExpr(Operands))
8964 return ParseStatus::Failure;
8965
8966 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8967 assert(Opr.isImm() || Opr.isExpr());
8968 SMLoc Loc = Opr.getStartLoc();
8969
8970 // Currently we do not support arbitrary expressions as branch targets.
8971 // Only labels and absolute expressions are accepted.
8972 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8973 Error(Loc, "expected an absolute expression or a label");
8974 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8975 Error(Loc, "expected a 16-bit signed jump offset");
8976 }
8977
8978 return ParseStatus::Success;
8979}
8980
8981//===----------------------------------------------------------------------===//
8982// Boolean holding registers
8983//===----------------------------------------------------------------------===//
8984
8985ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8986 return parseReg(Operands);
8987}
8988
8989//===----------------------------------------------------------------------===//
8990// mubuf
8991//===----------------------------------------------------------------------===//
8992
8993void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8994 const OperandVector &Operands,
8995 bool IsAtomic) {
8996 OptionalImmIndexMap OptionalIdx;
8997 unsigned FirstOperandIdx = 1;
8998 bool IsAtomicReturn = false;
8999
9000 if (IsAtomic) {
9001 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9003 }
9004
9005 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9006 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9007
9008 // Add the register arguments
9009 if (Op.isReg()) {
9010 Op.addRegOperands(Inst, 1);
9011 // Insert a tied src for atomic return dst.
9012 // This cannot be postponed as subsequent calls to
9013 // addImmOperands rely on correct number of MC operands.
9014 if (IsAtomicReturn && i == FirstOperandIdx)
9015 Op.addRegOperands(Inst, 1);
9016 continue;
9017 }
9018
9019 // Handle the case where soffset is an immediate
9020 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9021 Op.addImmOperands(Inst, 1);
9022 continue;
9023 }
9024
9025 // Handle tokens like 'offen' which are sometimes hard-coded into the
9026 // asm string. There are no MCInst operands for these.
9027 if (Op.isToken()) {
9028 continue;
9029 }
9030 assert(Op.isImm());
9031
9032 // Handle optional arguments
9033 OptionalIdx[Op.getImmTy()] = i;
9034 }
9035
9036 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9037 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9038 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9039 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9041}
9042
9043//===----------------------------------------------------------------------===//
9044// smrd
9045//===----------------------------------------------------------------------===//
9046
9047bool AMDGPUOperand::isSMRDOffset8() const {
9048 return isImmLiteral() && isUInt<8>(getImm());
9049}
9050
9051bool AMDGPUOperand::isSMEMOffset() const {
9052 // Offset range is checked later by validator.
9053 return isImmLiteral();
9054}
9055
9056bool AMDGPUOperand::isSMRDLiteralOffset() const {
9057 // 32-bit literals are only supported on CI and we only want to use them
9058 // when the offset is > 8-bits.
9059 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9060}
9061
9062//===----------------------------------------------------------------------===//
9063// vop3
9064//===----------------------------------------------------------------------===//
9065
9066static bool ConvertOmodMul(int64_t &Mul) {
9067 if (Mul != 1 && Mul != 2 && Mul != 4)
9068 return false;
9069
9070 Mul >>= 1;
9071 return true;
9072}
9073
9074static bool ConvertOmodDiv(int64_t &Div) {
9075 if (Div == 1) {
9076 Div = 0;
9077 return true;
9078 }
9079
9080 if (Div == 2) {
9081 Div = 3;
9082 return true;
9083 }
9084
9085 return false;
9086}
9087
9088// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9089// This is intentional and ensures compatibility with sp3.
9090// See bug 35397 for details.
9091bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9092 if (BoundCtrl == 0 || BoundCtrl == 1) {
9093 if (!isGFX11Plus())
9094 BoundCtrl = 1;
9095 return true;
9096 }
9097 return false;
9098}
9099
9100void AMDGPUAsmParser::onBeginOfFile() {
9101 if (!getParser().getStreamer().getTargetStreamer() ||
9102 getSTI().getTargetTriple().getArch() == Triple::r600)
9103 return;
9104
9105 if (!getTargetStreamer().getTargetID())
9106 getTargetStreamer().initializeTargetID(getSTI(),
9107 getSTI().getFeatureString());
9108
9109 if (isHsaAbi(getSTI()))
9110 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9111}
9112
9113/// Parse AMDGPU specific expressions.
9114///
9115/// expr ::= or(expr, ...) |
9116/// max(expr, ...)
9117///
9118bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9119 using AGVK = AMDGPUMCExpr::VariantKind;
9120
9121 if (isToken(AsmToken::Identifier)) {
9122 StringRef TokenId = getTokenStr();
9123 AGVK VK = StringSwitch<AGVK>(TokenId)
9124 .Case("max", AGVK::AGVK_Max)
9125 .Case("or", AGVK::AGVK_Or)
9126 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9127 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9128 .Case("alignto", AGVK::AGVK_AlignTo)
9129 .Case("occupancy", AGVK::AGVK_Occupancy)
9130 .Default(AGVK::AGVK_None);
9131
9132 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9134 uint64_t CommaCount = 0;
9135 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9136 lex(); // Eat '('
9137 while (true) {
9138 if (trySkipToken(AsmToken::RParen)) {
9139 if (Exprs.empty()) {
9140 Error(getToken().getLoc(),
9141 "empty " + Twine(TokenId) + " expression");
9142 return true;
9143 }
9144 if (CommaCount + 1 != Exprs.size()) {
9145 Error(getToken().getLoc(),
9146 "mismatch of commas in " + Twine(TokenId) + " expression");
9147 return true;
9148 }
9149 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9150 return false;
9151 }
9152 const MCExpr *Expr;
9153 if (getParser().parseExpression(Expr, EndLoc))
9154 return true;
9155 Exprs.push_back(Expr);
9156 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9157 if (LastTokenWasComma)
9158 CommaCount++;
9159 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9160 Error(getToken().getLoc(),
9161 "unexpected token in " + Twine(TokenId) + " expression");
9162 return true;
9163 }
9164 }
9165 }
9166 }
9167 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9168}
9169
9170ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9171 StringRef Name = getTokenStr();
9172 if (Name == "mul") {
9173 return parseIntWithPrefix("mul", Operands,
9174 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9175 }
9176
9177 if (Name == "div") {
9178 return parseIntWithPrefix("div", Operands,
9179 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9180 }
9181
9182 return ParseStatus::NoMatch;
9183}
9184
9185// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9186// the number of src operands present, then copies that bit into src0_modifiers.
9187static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9188 int Opc = Inst.getOpcode();
9189 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9190 if (OpSelIdx == -1)
9191 return;
9192
9193 int SrcNum;
9194 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9195 AMDGPU::OpName::src2};
9196 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9197 ++SrcNum)
9198 ;
9199 assert(SrcNum > 0);
9200
9201 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9202
9203 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9204 if (DstIdx == -1)
9205 return;
9206
9207 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9208 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9209 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9210 if (DstOp.isReg() &&
9211 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9213 ModVal |= SISrcMods::DST_OP_SEL;
9214 } else {
9215 if ((OpSel & (1 << SrcNum)) != 0)
9216 ModVal |= SISrcMods::DST_OP_SEL;
9217 }
9218 Inst.getOperand(ModIdx).setImm(ModVal);
9219}
9220
9221void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9222 const OperandVector &Operands) {
9223 cvtVOP3P(Inst, Operands);
9224 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9225}
9226
9227void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9228 OptionalImmIndexMap &OptionalIdx) {
9229 cvtVOP3P(Inst, Operands, OptionalIdx);
9230 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9231}
9232
9233static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9234 return
9235 // 1. This operand is input modifiers
9236 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9237 // 2. This is not last operand
9238 && Desc.NumOperands > (OpNum + 1)
9239 // 3. Next operand is register class
9240 && Desc.operands()[OpNum + 1].RegClass != -1
9241 // 4. Next register is not tied to any other operand
9242 && Desc.getOperandConstraint(OpNum + 1,
9244}
9245
9246void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9247 unsigned Opc = Inst.getOpcode();
9248 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9249 AMDGPU::OpName::src2};
9250 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9251 AMDGPU::OpName::src1_modifiers,
9252 AMDGPU::OpName::src2_modifiers};
9253 for (int J = 0; J < 3; ++J) {
9254 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9255 if (OpIdx == -1)
9256 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9257 // no src1. So continue instead of break.
9258 continue;
9259
9260 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9261 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9262
9263 if ((OpSel & (1 << J)) != 0)
9264 ModVal |= SISrcMods::OP_SEL_0;
9265 // op_sel[3] is encoded in src0_modifiers.
9266 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9267 ModVal |= SISrcMods::DST_OP_SEL;
9268
9269 Inst.getOperand(ModIdx).setImm(ModVal);
9270 }
9271}
9272
9273void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9274{
9275 OptionalImmIndexMap OptionalIdx;
9276 unsigned Opc = Inst.getOpcode();
9277
9278 unsigned I = 1;
9279 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9280 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9281 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9282 }
9283
9284 for (unsigned E = Operands.size(); I != E; ++I) {
9285 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9287 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9288 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9289 Op.isInterpAttrChan()) {
9290 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9291 } else if (Op.isImmModifier()) {
9292 OptionalIdx[Op.getImmTy()] = I;
9293 } else {
9294 llvm_unreachable("unhandled operand type");
9295 }
9296 }
9297
9298 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9299 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9300 AMDGPUOperand::ImmTyHigh);
9301
9302 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9303 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9304 AMDGPUOperand::ImmTyClamp);
9305
9306 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9307 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9308 AMDGPUOperand::ImmTyOModSI);
9309
9310 // Some v_interp instructions use op_sel[3] for dst.
9311 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9312 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9313 AMDGPUOperand::ImmTyOpSel);
9314 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9315 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9316
9317 cvtOpSelHelper(Inst, OpSel);
9318 }
9319}
9320
9321void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9322{
9323 OptionalImmIndexMap OptionalIdx;
9324 unsigned Opc = Inst.getOpcode();
9325
9326 unsigned I = 1;
9327 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9328 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9329 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9330 }
9331
9332 for (unsigned E = Operands.size(); I != E; ++I) {
9333 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9335 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9336 } else if (Op.isImmModifier()) {
9337 OptionalIdx[Op.getImmTy()] = I;
9338 } else {
9339 llvm_unreachable("unhandled operand type");
9340 }
9341 }
9342
9343 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9344
9345 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9346 if (OpSelIdx != -1)
9347 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9348
9349 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9350
9351 if (OpSelIdx == -1)
9352 return;
9353
9354 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9355 cvtOpSelHelper(Inst, OpSel);
9356}
9357
9358void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9359 const OperandVector &Operands) {
9360 OptionalImmIndexMap OptionalIdx;
9361 unsigned Opc = Inst.getOpcode();
9362 unsigned I = 1;
9363 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9364
9365 const MCInstrDesc &Desc = MII.get(Opc);
9366
9367 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9368 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9369
9370 for (unsigned E = Operands.size(); I != E; ++I) {
9371 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9372 int NumOperands = Inst.getNumOperands();
9373 // The order of operands in MCInst and parsed operands are different.
9374 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9375 // indices for parsing scale values correctly.
9376 if (NumOperands == CbszOpIdx) {
9379 }
9380 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9381 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9382 } else if (Op.isImmModifier()) {
9383 OptionalIdx[Op.getImmTy()] = I;
9384 } else {
9385 Op.addRegOrImmOperands(Inst, 1);
9386 }
9387 }
9388
9389 // Insert CBSZ and BLGP operands for F8F6F4 variants
9390 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9391 if (CbszIdx != OptionalIdx.end()) {
9392 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9393 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9394 }
9395
9396 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9397 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9398 if (BlgpIdx != OptionalIdx.end()) {
9399 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9400 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9401 }
9402
9403 // Add dummy src_modifiers
9406
9407 // Handle op_sel fields
9408
9409 unsigned OpSel = 0;
9410 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9411 if (OpselIdx != OptionalIdx.end()) {
9412 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9413 .getImm();
9414 }
9415
9416 unsigned OpSelHi = 0;
9417 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9418 if (OpselHiIdx != OptionalIdx.end()) {
9419 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9420 .getImm();
9421 }
9422 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9423 AMDGPU::OpName::src1_modifiers};
9424
9425 for (unsigned J = 0; J < 2; ++J) {
9426 unsigned ModVal = 0;
9427 if (OpSel & (1 << J))
9428 ModVal |= SISrcMods::OP_SEL_0;
9429 if (OpSelHi & (1 << J))
9430 ModVal |= SISrcMods::OP_SEL_1;
9431
9432 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9433 Inst.getOperand(ModIdx).setImm(ModVal);
9434 }
9435}
9436
9437void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9438 OptionalImmIndexMap &OptionalIdx) {
9439 unsigned Opc = Inst.getOpcode();
9440
9441 unsigned I = 1;
9442 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9443 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9444 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9445 }
9446
9447 for (unsigned E = Operands.size(); I != E; ++I) {
9448 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9450 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9451 } else if (Op.isImmModifier()) {
9452 OptionalIdx[Op.getImmTy()] = I;
9453 } else {
9454 Op.addRegOrImmOperands(Inst, 1);
9455 }
9456 }
9457
9458 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9459 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9460 AMDGPUOperand::ImmTyScaleSel);
9461
9462 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9463 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9464 AMDGPUOperand::ImmTyClamp);
9465
9466 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9467 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9468 Inst.addOperand(Inst.getOperand(0));
9469 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9470 AMDGPUOperand::ImmTyByteSel);
9471 }
9472
9473 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9474 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9475 AMDGPUOperand::ImmTyOModSI);
9476
9477 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9478 // it has src2 register operand that is tied to dst operand
9479 // we don't allow modifiers for this operand in assembler so src2_modifiers
9480 // should be 0.
9481 if (isMAC(Opc)) {
9482 auto *it = Inst.begin();
9483 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9484 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9485 ++it;
9486 // Copy the operand to ensure it's not invalidated when Inst grows.
9487 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9488 }
9489}
9490
9491void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9492 OptionalImmIndexMap OptionalIdx;
9493 cvtVOP3(Inst, Operands, OptionalIdx);
9494}
9495
9496void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9497 OptionalImmIndexMap &OptIdx) {
9498 const int Opc = Inst.getOpcode();
9499 const MCInstrDesc &Desc = MII.get(Opc);
9500
9501 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9502
9503 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9504 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9505 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9506 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9507 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9508 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9509 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9510 Inst.addOperand(Inst.getOperand(0));
9511 }
9512
9513 // Adding vdst_in operand is already covered for these DPP instructions in
9514 // cvtVOP3DPP.
9515 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9516 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9517 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9518 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9519 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9520 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9521 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9522 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9523 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9524 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9525 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9526 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9527 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9528 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9529 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9530 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9531 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9532 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9533 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9534 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9535 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9536 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9537 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9538 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9539 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9540 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9541 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9542 Inst.addOperand(Inst.getOperand(0));
9543 }
9544
9545 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9546 if (BitOp3Idx != -1) {
9547 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9548 }
9549
9550 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9551 // instruction, and then figure out where to actually put the modifiers
9552
9553 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9554 if (OpSelIdx != -1) {
9555 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9556 }
9557
9558 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9559 if (OpSelHiIdx != -1) {
9560 int DefaultVal = IsPacked ? -1 : 0;
9561 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9562 DefaultVal);
9563 }
9564
9565 int MatrixAFMTIdx =
9566 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9567 if (MatrixAFMTIdx != -1) {
9568 addOptionalImmOperand(Inst, Operands, OptIdx,
9569 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9570 }
9571
9572 int MatrixBFMTIdx =
9573 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9574 if (MatrixBFMTIdx != -1) {
9575 addOptionalImmOperand(Inst, Operands, OptIdx,
9576 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9577 }
9578
9579 int MatrixAScaleIdx =
9580 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9581 if (MatrixAScaleIdx != -1) {
9582 addOptionalImmOperand(Inst, Operands, OptIdx,
9583 AMDGPUOperand::ImmTyMatrixAScale, 0);
9584 }
9585
9586 int MatrixBScaleIdx =
9587 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9588 if (MatrixBScaleIdx != -1) {
9589 addOptionalImmOperand(Inst, Operands, OptIdx,
9590 AMDGPUOperand::ImmTyMatrixBScale, 0);
9591 }
9592
9593 int MatrixAScaleFmtIdx =
9594 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9595 if (MatrixAScaleFmtIdx != -1) {
9596 addOptionalImmOperand(Inst, Operands, OptIdx,
9597 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9598 }
9599
9600 int MatrixBScaleFmtIdx =
9601 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9602 if (MatrixBScaleFmtIdx != -1) {
9603 addOptionalImmOperand(Inst, Operands, OptIdx,
9604 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9605 }
9606
9607 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9608 addOptionalImmOperand(Inst, Operands, OptIdx,
9609 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9610
9611 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9612 addOptionalImmOperand(Inst, Operands, OptIdx,
9613 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9614
9615 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9616 if (NegLoIdx != -1)
9617 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9618
9619 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9620 if (NegHiIdx != -1)
9621 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9622
9623 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9624 AMDGPU::OpName::src2};
9625 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9626 AMDGPU::OpName::src1_modifiers,
9627 AMDGPU::OpName::src2_modifiers};
9628
9629 unsigned OpSel = 0;
9630 unsigned OpSelHi = 0;
9631 unsigned NegLo = 0;
9632 unsigned NegHi = 0;
9633
9634 if (OpSelIdx != -1)
9635 OpSel = Inst.getOperand(OpSelIdx).getImm();
9636
9637 if (OpSelHiIdx != -1)
9638 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9639
9640 if (NegLoIdx != -1)
9641 NegLo = Inst.getOperand(NegLoIdx).getImm();
9642
9643 if (NegHiIdx != -1)
9644 NegHi = Inst.getOperand(NegHiIdx).getImm();
9645
9646 for (int J = 0; J < 3; ++J) {
9647 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9648 if (OpIdx == -1)
9649 break;
9650
9651 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9652
9653 if (ModIdx == -1)
9654 continue;
9655
9656 uint32_t ModVal = 0;
9657
9658 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9659 if (SrcOp.isReg() && getMRI()
9660 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9661 .contains(SrcOp.getReg())) {
9662 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9663 if (VGPRSuffixIsHi)
9664 ModVal |= SISrcMods::OP_SEL_0;
9665 } else {
9666 if ((OpSel & (1 << J)) != 0)
9667 ModVal |= SISrcMods::OP_SEL_0;
9668 }
9669
9670 if ((OpSelHi & (1 << J)) != 0)
9671 ModVal |= SISrcMods::OP_SEL_1;
9672
9673 if ((NegLo & (1 << J)) != 0)
9674 ModVal |= SISrcMods::NEG;
9675
9676 if ((NegHi & (1 << J)) != 0)
9677 ModVal |= SISrcMods::NEG_HI;
9678
9679 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9680 }
9681}
9682
9683void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9684 OptionalImmIndexMap OptIdx;
9685 cvtVOP3(Inst, Operands, OptIdx);
9686 cvtVOP3P(Inst, Operands, OptIdx);
9687}
9688
9690 unsigned i, unsigned Opc,
9691 AMDGPU::OpName OpName) {
9692 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9693 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9694 else
9695 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9696}
9697
9698void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9699 unsigned Opc = Inst.getOpcode();
9700
9701 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9702 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9703 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9704 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9705 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9706
9707 OptionalImmIndexMap OptIdx;
9708 for (unsigned i = 5; i < Operands.size(); ++i) {
9709 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9710 OptIdx[Op.getImmTy()] = i;
9711 }
9712
9713 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9714 addOptionalImmOperand(Inst, Operands, OptIdx,
9715 AMDGPUOperand::ImmTyIndexKey8bit);
9716
9717 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9718 addOptionalImmOperand(Inst, Operands, OptIdx,
9719 AMDGPUOperand::ImmTyIndexKey16bit);
9720
9721 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9722 addOptionalImmOperand(Inst, Operands, OptIdx,
9723 AMDGPUOperand::ImmTyIndexKey32bit);
9724
9725 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9726 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9727
9728 cvtVOP3P(Inst, Operands, OptIdx);
9729}
9730
9731//===----------------------------------------------------------------------===//
9732// VOPD
9733//===----------------------------------------------------------------------===//
9734
9735ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9736 if (!hasVOPD(getSTI()))
9737 return ParseStatus::NoMatch;
9738
9739 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9740 SMLoc S = getLoc();
9741 lex();
9742 lex();
9743 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9744 SMLoc OpYLoc = getLoc();
9745 StringRef OpYName;
9746 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9747 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9748 return ParseStatus::Success;
9749 }
9750 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9751 }
9752 return ParseStatus::NoMatch;
9753}
9754
9755// Create VOPD MCInst operands using parsed assembler operands.
9756void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9757 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9758
9759 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9760 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9762 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9763 return;
9764 }
9765 if (Op.isReg()) {
9766 Op.addRegOperands(Inst, 1);
9767 return;
9768 }
9769 if (Op.isImm()) {
9770 Op.addImmOperands(Inst, 1);
9771 return;
9772 }
9773 llvm_unreachable("Unhandled operand type in cvtVOPD");
9774 };
9775
9776 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9777
9778 // MCInst operands are ordered as follows:
9779 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9780
9781 for (auto CompIdx : VOPD::COMPONENTS) {
9782 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9783 }
9784
9785 for (auto CompIdx : VOPD::COMPONENTS) {
9786 const auto &CInfo = InstInfo[CompIdx];
9787 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9788 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9789 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9790 if (CInfo.hasSrc2Acc())
9791 addOp(CInfo.getIndexOfDstInParsedOperands());
9792 }
9793
9794 int BitOp3Idx =
9795 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9796 if (BitOp3Idx != -1) {
9797 OptionalImmIndexMap OptIdx;
9798 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9799 if (Op.isImm())
9800 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9801
9802 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9803 }
9804}
9805
9806//===----------------------------------------------------------------------===//
9807// dpp
9808//===----------------------------------------------------------------------===//
9809
9810bool AMDGPUOperand::isDPP8() const {
9811 return isImmTy(ImmTyDPP8);
9812}
9813
9814bool AMDGPUOperand::isDPPCtrl() const {
9815 using namespace AMDGPU::DPP;
9816
9817 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9818 if (result) {
9819 int64_t Imm = getImm();
9820 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9821 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9822 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9823 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9824 (Imm == DppCtrl::WAVE_SHL1) ||
9825 (Imm == DppCtrl::WAVE_ROL1) ||
9826 (Imm == DppCtrl::WAVE_SHR1) ||
9827 (Imm == DppCtrl::WAVE_ROR1) ||
9828 (Imm == DppCtrl::ROW_MIRROR) ||
9829 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9830 (Imm == DppCtrl::BCAST15) ||
9831 (Imm == DppCtrl::BCAST31) ||
9832 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9833 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9834 }
9835 return false;
9836}
9837
9838//===----------------------------------------------------------------------===//
9839// mAI
9840//===----------------------------------------------------------------------===//
9841
9842bool AMDGPUOperand::isBLGP() const {
9843 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9844}
9845
9846bool AMDGPUOperand::isS16Imm() const {
9847 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9848}
9849
9850bool AMDGPUOperand::isU16Imm() const {
9851 return isImmLiteral() && isUInt<16>(getImm());
9852}
9853
9854//===----------------------------------------------------------------------===//
9855// dim
9856//===----------------------------------------------------------------------===//
9857
9858bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9859 // We want to allow "dim:1D" etc.,
9860 // but the initial 1 is tokenized as an integer.
9861 std::string Token;
9862 if (isToken(AsmToken::Integer)) {
9863 SMLoc Loc = getToken().getEndLoc();
9864 Token = std::string(getTokenStr());
9865 lex();
9866 if (getLoc() != Loc)
9867 return false;
9868 }
9869
9870 StringRef Suffix;
9871 if (!parseId(Suffix))
9872 return false;
9873 Token += Suffix;
9874
9875 StringRef DimId = Token;
9876 DimId.consume_front("SQ_RSRC_IMG_");
9877
9878 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9879 if (!DimInfo)
9880 return false;
9881
9882 Encoding = DimInfo->Encoding;
9883 return true;
9884}
9885
9886ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9887 if (!isGFX10Plus())
9888 return ParseStatus::NoMatch;
9889
9890 SMLoc S = getLoc();
9891
9892 if (!trySkipId("dim", AsmToken::Colon))
9893 return ParseStatus::NoMatch;
9894
9895 unsigned Encoding;
9896 SMLoc Loc = getLoc();
9897 if (!parseDimId(Encoding))
9898 return Error(Loc, "invalid dim value");
9899
9900 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9901 AMDGPUOperand::ImmTyDim));
9902 return ParseStatus::Success;
9903}
9904
9905//===----------------------------------------------------------------------===//
9906// dpp
9907//===----------------------------------------------------------------------===//
9908
9909ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9910 SMLoc S = getLoc();
9911
9912 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9913 return ParseStatus::NoMatch;
9914
9915 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9916
9917 int64_t Sels[8];
9918
9919 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9920 return ParseStatus::Failure;
9921
9922 for (size_t i = 0; i < 8; ++i) {
9923 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9924 return ParseStatus::Failure;
9925
9926 SMLoc Loc = getLoc();
9927 if (getParser().parseAbsoluteExpression(Sels[i]))
9928 return ParseStatus::Failure;
9929 if (0 > Sels[i] || 7 < Sels[i])
9930 return Error(Loc, "expected a 3-bit value");
9931 }
9932
9933 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9934 return ParseStatus::Failure;
9935
9936 unsigned DPP8 = 0;
9937 for (size_t i = 0; i < 8; ++i)
9938 DPP8 |= (Sels[i] << (i * 3));
9939
9940 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9941 return ParseStatus::Success;
9942}
9943
9944bool
9945AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9946 const OperandVector &Operands) {
9947 if (Ctrl == "row_newbcast")
9948 return isGFX90A();
9949
9950 if (Ctrl == "row_share" ||
9951 Ctrl == "row_xmask")
9952 return isGFX10Plus();
9953
9954 if (Ctrl == "wave_shl" ||
9955 Ctrl == "wave_shr" ||
9956 Ctrl == "wave_rol" ||
9957 Ctrl == "wave_ror" ||
9958 Ctrl == "row_bcast")
9959 return isVI() || isGFX9();
9960
9961 return Ctrl == "row_mirror" ||
9962 Ctrl == "row_half_mirror" ||
9963 Ctrl == "quad_perm" ||
9964 Ctrl == "row_shl" ||
9965 Ctrl == "row_shr" ||
9966 Ctrl == "row_ror";
9967}
9968
9969int64_t
9970AMDGPUAsmParser::parseDPPCtrlPerm() {
9971 // quad_perm:[%d,%d,%d,%d]
9972
9973 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9974 return -1;
9975
9976 int64_t Val = 0;
9977 for (int i = 0; i < 4; ++i) {
9978 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9979 return -1;
9980
9981 int64_t Temp;
9982 SMLoc Loc = getLoc();
9983 if (getParser().parseAbsoluteExpression(Temp))
9984 return -1;
9985 if (Temp < 0 || Temp > 3) {
9986 Error(Loc, "expected a 2-bit value");
9987 return -1;
9988 }
9989
9990 Val += (Temp << i * 2);
9991 }
9992
9993 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9994 return -1;
9995
9996 return Val;
9997}
9998
9999int64_t
10000AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10001 using namespace AMDGPU::DPP;
10002
10003 // sel:%d
10004
10005 int64_t Val;
10006 SMLoc Loc = getLoc();
10007
10008 if (getParser().parseAbsoluteExpression(Val))
10009 return -1;
10010
10011 struct DppCtrlCheck {
10012 int64_t Ctrl;
10013 int Lo;
10014 int Hi;
10015 };
10016
10017 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10018 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10019 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10020 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10021 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10022 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10023 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10024 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10025 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10026 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10027 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10028 .Default({-1, 0, 0});
10029
10030 bool Valid;
10031 if (Check.Ctrl == -1) {
10032 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10033 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10034 } else {
10035 Valid = Check.Lo <= Val && Val <= Check.Hi;
10036 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10037 }
10038
10039 if (!Valid) {
10040 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10041 return -1;
10042 }
10043
10044 return Val;
10045}
10046
10047ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10048 using namespace AMDGPU::DPP;
10049
10050 if (!isToken(AsmToken::Identifier) ||
10051 !isSupportedDPPCtrl(getTokenStr(), Operands))
10052 return ParseStatus::NoMatch;
10053
10054 SMLoc S = getLoc();
10055 int64_t Val = -1;
10056 StringRef Ctrl;
10057
10058 parseId(Ctrl);
10059
10060 if (Ctrl == "row_mirror") {
10061 Val = DppCtrl::ROW_MIRROR;
10062 } else if (Ctrl == "row_half_mirror") {
10063 Val = DppCtrl::ROW_HALF_MIRROR;
10064 } else {
10065 if (skipToken(AsmToken::Colon, "expected a colon")) {
10066 if (Ctrl == "quad_perm") {
10067 Val = parseDPPCtrlPerm();
10068 } else {
10069 Val = parseDPPCtrlSel(Ctrl);
10070 }
10071 }
10072 }
10073
10074 if (Val == -1)
10075 return ParseStatus::Failure;
10076
10077 Operands.push_back(
10078 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10079 return ParseStatus::Success;
10080}
10081
10082void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10083 bool IsDPP8) {
10084 OptionalImmIndexMap OptionalIdx;
10085 unsigned Opc = Inst.getOpcode();
10086 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10087
10088 // MAC instructions are special because they have 'old'
10089 // operand which is not tied to dst (but assumed to be).
10090 // They also have dummy unused src2_modifiers.
10091 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10092 int Src2ModIdx =
10093 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10094 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10095 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10096
10097 unsigned I = 1;
10098 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10099 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10100 }
10101
10102 int Fi = 0;
10103 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10104 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10105 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10106 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10107 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10108
10109 for (unsigned E = Operands.size(); I != E; ++I) {
10110
10111 if (IsMAC) {
10112 int NumOperands = Inst.getNumOperands();
10113 if (OldIdx == NumOperands) {
10114 // Handle old operand
10115 constexpr int DST_IDX = 0;
10116 Inst.addOperand(Inst.getOperand(DST_IDX));
10117 } else if (Src2ModIdx == NumOperands) {
10118 // Add unused dummy src2_modifiers
10120 }
10121 }
10122
10123 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10124 Inst.addOperand(Inst.getOperand(0));
10125 }
10126
10127 if (IsVOP3CvtSrDpp) {
10128 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10130 Inst.addOperand(MCOperand::createReg(MCRegister()));
10131 }
10132 }
10133
10134 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10136 if (TiedTo != -1) {
10137 assert((unsigned)TiedTo < Inst.getNumOperands());
10138 // handle tied old or src2 for MAC instructions
10139 Inst.addOperand(Inst.getOperand(TiedTo));
10140 }
10141 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10142 // Add the register arguments
10143 if (IsDPP8 && Op.isDppFI()) {
10144 Fi = Op.getImm();
10145 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10146 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10147 } else if (Op.isReg()) {
10148 Op.addRegOperands(Inst, 1);
10149 } else if (Op.isImm() &&
10150 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10151 Op.addImmOperands(Inst, 1);
10152 } else if (Op.isImm()) {
10153 OptionalIdx[Op.getImmTy()] = I;
10154 } else {
10155 llvm_unreachable("unhandled operand type");
10156 }
10157 }
10158
10159 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10160 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10161 AMDGPUOperand::ImmTyClamp);
10162
10163 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10164 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10165 Inst.addOperand(Inst.getOperand(0));
10166 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10167 AMDGPUOperand::ImmTyByteSel);
10168 }
10169
10170 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10171 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10172
10173 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10174 cvtVOP3P(Inst, Operands, OptionalIdx);
10175 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10176 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10177 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10178 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10179 }
10180
10181 if (IsDPP8) {
10182 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10183 using namespace llvm::AMDGPU::DPP;
10184 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10185 } else {
10186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10187 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10188 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10190
10191 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10192 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10193 AMDGPUOperand::ImmTyDppFI);
10194 }
10195}
10196
10197void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10198 OptionalImmIndexMap OptionalIdx;
10199
10200 unsigned I = 1;
10201 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10202 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10203 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10204 }
10205
10206 int Fi = 0;
10207 for (unsigned E = Operands.size(); I != E; ++I) {
10208 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10210 if (TiedTo != -1) {
10211 assert((unsigned)TiedTo < Inst.getNumOperands());
10212 // handle tied old or src2 for MAC instructions
10213 Inst.addOperand(Inst.getOperand(TiedTo));
10214 }
10215 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10216 // Add the register arguments
10217 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10218 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10219 // Skip it.
10220 continue;
10221 }
10222
10223 if (IsDPP8) {
10224 if (Op.isDPP8()) {
10225 Op.addImmOperands(Inst, 1);
10226 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10227 Op.addRegWithFPInputModsOperands(Inst, 2);
10228 } else if (Op.isDppFI()) {
10229 Fi = Op.getImm();
10230 } else if (Op.isReg()) {
10231 Op.addRegOperands(Inst, 1);
10232 } else {
10233 llvm_unreachable("Invalid operand type");
10234 }
10235 } else {
10237 Op.addRegWithFPInputModsOperands(Inst, 2);
10238 } else if (Op.isReg()) {
10239 Op.addRegOperands(Inst, 1);
10240 } else if (Op.isDPPCtrl()) {
10241 Op.addImmOperands(Inst, 1);
10242 } else if (Op.isImm()) {
10243 // Handle optional arguments
10244 OptionalIdx[Op.getImmTy()] = I;
10245 } else {
10246 llvm_unreachable("Invalid operand type");
10247 }
10248 }
10249 }
10250
10251 if (IsDPP8) {
10252 using namespace llvm::AMDGPU::DPP;
10253 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10254 } else {
10255 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10256 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10257 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10258 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10259 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10260 AMDGPUOperand::ImmTyDppFI);
10261 }
10262 }
10263}
10264
10265//===----------------------------------------------------------------------===//
10266// sdwa
10267//===----------------------------------------------------------------------===//
10268
10269ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10270 StringRef Prefix,
10271 AMDGPUOperand::ImmTy Type) {
10272 return parseStringOrIntWithPrefix(
10273 Operands, Prefix,
10274 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10275 Type);
10276}
10277
10278ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10279 return parseStringOrIntWithPrefix(
10280 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10281 AMDGPUOperand::ImmTySDWADstUnused);
10282}
10283
10284void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10285 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10286}
10287
10288void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10289 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10290}
10291
10292void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10293 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10294}
10295
10296void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10297 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10298}
10299
10300void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10301 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10302}
10303
10304void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10305 uint64_t BasicInstType,
10306 bool SkipDstVcc,
10307 bool SkipSrcVcc) {
10308 using namespace llvm::AMDGPU::SDWA;
10309
10310 OptionalImmIndexMap OptionalIdx;
10311 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10312 bool SkippedVcc = false;
10313
10314 unsigned I = 1;
10315 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10316 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10317 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10318 }
10319
10320 for (unsigned E = Operands.size(); I != E; ++I) {
10321 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10322 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10323 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10324 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10325 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10326 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10327 // Skip VCC only if we didn't skip it on previous iteration.
10328 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10329 if (BasicInstType == SIInstrFlags::VOP2 &&
10330 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10331 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10332 SkippedVcc = true;
10333 continue;
10334 }
10335 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10336 SkippedVcc = true;
10337 continue;
10338 }
10339 }
10341 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10342 } else if (Op.isImm()) {
10343 // Handle optional arguments
10344 OptionalIdx[Op.getImmTy()] = I;
10345 } else {
10346 llvm_unreachable("Invalid operand type");
10347 }
10348 SkippedVcc = false;
10349 }
10350
10351 const unsigned Opc = Inst.getOpcode();
10352 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10353 Opc != AMDGPU::V_NOP_sdwa_vi) {
10354 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10355 switch (BasicInstType) {
10356 case SIInstrFlags::VOP1:
10357 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10358 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10359 AMDGPUOperand::ImmTyClamp, 0);
10360
10361 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10362 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10363 AMDGPUOperand::ImmTyOModSI, 0);
10364
10365 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10366 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10367 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10368
10369 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10370 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10371 AMDGPUOperand::ImmTySDWADstUnused,
10372 DstUnused::UNUSED_PRESERVE);
10373
10374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10375 break;
10376
10377 case SIInstrFlags::VOP2:
10378 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10379 AMDGPUOperand::ImmTyClamp, 0);
10380
10381 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10382 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10383
10384 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10385 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10386 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10388 break;
10389
10390 case SIInstrFlags::VOPC:
10391 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10392 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10393 AMDGPUOperand::ImmTyClamp, 0);
10394 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10395 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10396 break;
10397
10398 default:
10399 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10400 }
10401 }
10402
10403 // special case v_mac_{f16, f32}:
10404 // it has src2 register operand that is tied to dst operand
10405 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10406 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10407 auto *it = Inst.begin();
10408 std::advance(
10409 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10410 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10411 }
10412}
10413
10414/// Force static initialization.
10415extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10420
10421#define GET_MATCHER_IMPLEMENTATION
10422#define GET_MNEMONIC_SPELL_CHECKER
10423#define GET_MNEMONIC_CHECKER
10424#include "AMDGPUGenAsmMatcher.inc"
10425
10426ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10427 unsigned MCK) {
10428 switch (MCK) {
10429 case MCK_addr64:
10430 return parseTokenOp("addr64", Operands);
10431 case MCK_done:
10432 return parseTokenOp("done", Operands);
10433 case MCK_idxen:
10434 return parseTokenOp("idxen", Operands);
10435 case MCK_lds:
10436 return parseTokenOp("lds", Operands);
10437 case MCK_offen:
10438 return parseTokenOp("offen", Operands);
10439 case MCK_off:
10440 return parseTokenOp("off", Operands);
10441 case MCK_row_95_en:
10442 return parseTokenOp("row_en", Operands);
10443 case MCK_gds:
10444 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10445 case MCK_tfe:
10446 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10447 }
10448 return tryCustomParseOperand(Operands, MCK);
10449}
10450
10451// This function should be defined after auto-generated include so that we have
10452// MatchClassKind enum defined
10453unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10454 unsigned Kind) {
10455 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10456 // But MatchInstructionImpl() expects to meet token and fails to validate
10457 // operand. This method checks if we are given immediate operand but expect to
10458 // get corresponding token.
10459 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10460 switch (Kind) {
10461 case MCK_addr64:
10462 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10463 case MCK_gds:
10464 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10465 case MCK_lds:
10466 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10467 case MCK_idxen:
10468 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10469 case MCK_offen:
10470 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10471 case MCK_tfe:
10472 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10473 case MCK_SSrc_b32:
10474 // When operands have expression values, they will return true for isToken,
10475 // because it is not possible to distinguish between a token and an
10476 // expression at parse time. MatchInstructionImpl() will always try to
10477 // match an operand as a token, when isToken returns true, and when the
10478 // name of the expression is not a valid token, the match will fail,
10479 // so we need to handle it here.
10480 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10481 case MCK_SSrc_f32:
10482 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10483 case MCK_SOPPBrTarget:
10484 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10485 case MCK_VReg32OrOff:
10486 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10487 case MCK_InterpSlot:
10488 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10489 case MCK_InterpAttr:
10490 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10491 case MCK_InterpAttrChan:
10492 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10493 case MCK_SReg_64:
10494 case MCK_SReg_64_XEXEC:
10495 // Null is defined as a 32-bit register but
10496 // it should also be enabled with 64-bit operands or larger.
10497 // The following code enables it for SReg_64 and larger operands
10498 // used as source and destination. Remaining source
10499 // operands are handled in isInlinableImm.
10500 case MCK_SReg_96:
10501 case MCK_SReg_128:
10502 case MCK_SReg_256:
10503 case MCK_SReg_512:
10504 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10505 default:
10506 return Match_InvalidOperand;
10507 }
10508}
10509
10510//===----------------------------------------------------------------------===//
10511// endpgm
10512//===----------------------------------------------------------------------===//
10513
10514ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10515 SMLoc S = getLoc();
10516 int64_t Imm = 0;
10517
10518 if (!parseExpr(Imm)) {
10519 // The operand is optional, if not present default to 0
10520 Imm = 0;
10521 }
10522
10523 if (!isUInt<16>(Imm))
10524 return Error(S, "expected a 16-bit value");
10525
10526 Operands.push_back(
10527 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10528 return ParseStatus::Success;
10529}
10530
10531bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10532
10533//===----------------------------------------------------------------------===//
10534// Split Barrier
10535//===----------------------------------------------------------------------===//
10536
10537bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1430
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:62
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...