LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyIndexKey32bit,
160 ImmTyDPP8,
161 ImmTyDppCtrl,
162 ImmTyDppRowMask,
163 ImmTyDppBankMask,
164 ImmTyDppBoundCtrl,
165 ImmTyDppFI,
166 ImmTySwizzle,
167 ImmTyGprIdxMode,
168 ImmTyHigh,
169 ImmTyBLGP,
170 ImmTyCBSZ,
171 ImmTyABID,
172 ImmTyEndpgm,
173 ImmTyWaitVDST,
174 ImmTyWaitEXP,
175 ImmTyWaitVAVDst,
176 ImmTyWaitVMVSrc,
177 ImmTyBitOp3,
178 ImmTyMatrixAFMT,
179 ImmTyMatrixBFMT,
180 ImmTyMatrixAScale,
181 ImmTyMatrixBScale,
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
184 ImmTyMatrixAReuse,
185 ImmTyMatrixBReuse,
186 ImmTyScaleSel,
187 ImmTyByteSel,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 MCRegister RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 // The index of the associated MCInst operand.
216 mutable int MCOpIdx = -1;
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 bool isInlinableImm(MVT type) const;
230 bool isLiteralImm(MVT type) const;
231
232 bool isRegKind() const {
233 return Kind == Register;
234 }
235
236 bool isReg() const override {
237 return isRegKind() && !hasModifiers();
238 }
239
240 bool isRegOrInline(unsigned RCID, MVT type) const {
241 return isRegClass(RCID) || isInlinableImm(type);
242 }
243
244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
246 }
247
248 bool isRegOrImmWithInt16InputMods() const {
249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250 }
251
252 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrInlineImmWithInt16InputMods() const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
263 }
264
265 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
268 }
269
270 bool isRegOrInlineImmWithInt32InputMods() const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
272 }
273
274 bool isRegOrImmWithInt64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
276 }
277
278 bool isRegOrImmWithFP16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
280 }
281
282 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
285 }
286
287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
295 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
298 }
299
300 bool isRegOrInlineImmWithFP32InputMods() const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
302 }
303
304 bool isRegOrInlineImmWithFP64InputMods() const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
306 }
307
308 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
309
310 bool isVRegWithFP32InputMods() const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
312 }
313
314 bool isVRegWithFP64InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
316 }
317
318 bool isPackedFP16InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
320 }
321
322 bool isPackedVGPRFP32InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
324 }
325
326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isVRegWithInputMods() const;
351 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
352 template <bool IsFake16> bool isT16VRegWithInputMods() const;
353
354 bool isSDWAOperand(MVT type) const;
355 bool isSDWAFP16Operand() const;
356 bool isSDWAFP32Operand() const;
357 bool isSDWAInt16Operand() const;
358 bool isSDWAInt32Operand() const;
359
360 bool isImmTy(ImmTy ImmT) const {
361 return isImm() && Imm.Type == ImmT;
362 }
363
364 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
365
366 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
367
368 bool isImmModifier() const {
369 return isImm() && Imm.Type != ImmTyNone;
370 }
371
372 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
373 bool isDim() const { return isImmTy(ImmTyDim); }
374 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
375 bool isOff() const { return isImmTy(ImmTyOff); }
376 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
377 bool isOffen() const { return isImmTy(ImmTyOffen); }
378 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
379 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
380 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
381 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
382 bool isGDS() const { return isImmTy(ImmTyGDS); }
383 bool isLDS() const { return isImmTy(ImmTyLDS); }
384 bool isCPol() const { return isImmTy(ImmTyCPol); }
385 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
386 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
387 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
388 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
389 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
390 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
391 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
392 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
393 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
394 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
395 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
396 bool isTFE() const { return isImmTy(ImmTyTFE); }
397 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
399 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
400 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
401 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
402 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
403 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
404 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
405 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
406 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
407 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
408 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
409 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
410 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
411
412 bool isRegOrImm() const {
413 return isReg() || isImm();
414 }
415
416 bool isRegClass(unsigned RCID) const;
417
418 bool isInlineValue() const;
419
420 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
421 return isRegOrInline(RCID, type) && !hasModifiers();
422 }
423
424 bool isSCSrcB16() const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
426 }
427
428 bool isSCSrcV2B16() const {
429 return isSCSrcB16();
430 }
431
432 bool isSCSrc_b32() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
434 }
435
436 bool isSCSrc_b64() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
438 }
439
440 bool isBoolReg() const;
441
442 bool isSCSrcF16() const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
444 }
445
446 bool isSCSrcV2F16() const {
447 return isSCSrcF16();
448 }
449
450 bool isSCSrcF32() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
452 }
453
454 bool isSCSrcF64() const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
456 }
457
458 bool isSSrc_b32() const {
459 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
460 }
461
462 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
463
464 bool isSSrcV2B16() const {
465 llvm_unreachable("cannot happen");
466 return isSSrc_b16();
467 }
468
469 bool isSSrc_b64() const {
470 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
471 // See isVSrc64().
472 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
473 (((const MCTargetAsmParser *)AsmParser)
474 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
475 isExpr());
476 }
477
478 bool isSSrc_f32() const {
479 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
480 }
481
482 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
483
484 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
485
486 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
487
488 bool isSSrcV2F16() const {
489 llvm_unreachable("cannot happen");
490 return isSSrc_f16();
491 }
492
493 bool isSSrcV2FP32() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f32();
496 }
497
498 bool isSCSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSCSrcF32();
501 }
502
503 bool isSSrcV2INT32() const {
504 llvm_unreachable("cannot happen");
505 return isSSrc_b32();
506 }
507
508 bool isSCSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSCSrc_b32();
511 }
512
513 bool isSSrcOrLds_b32() const {
514 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
515 isLiteralImm(MVT::i32) || isExpr();
516 }
517
518 bool isVCSrc_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
520 }
521
522 bool isVCSrc_b32_Lo256() const {
523 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
524 }
525
526 bool isVCSrc_b64_Lo256() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
528 }
529
530 bool isVCSrc_b64() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
532 }
533
534 bool isVCSrcT_b16() const {
535 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
536 }
537
538 bool isVCSrcTB16_Lo128() const {
539 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
540 }
541
542 bool isVCSrcFake16B16_Lo128() const {
543 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
544 }
545
546 bool isVCSrc_b16() const {
547 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
548 }
549
550 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
551
552 bool isVCSrc_f32() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
554 }
555
556 bool isVCSrc_f64() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
558 }
559
560 bool isVCSrcTBF16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
562 }
563
564 bool isVCSrcT_f16() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
566 }
567
568 bool isVCSrcT_bf16() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
570 }
571
572 bool isVCSrcTBF16_Lo128() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
574 }
575
576 bool isVCSrcTF16_Lo128() const {
577 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
578 }
579
580 bool isVCSrcFake16BF16_Lo128() const {
581 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
582 }
583
584 bool isVCSrcFake16F16_Lo128() const {
585 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
586 }
587
588 bool isVCSrc_bf16() const {
589 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
590 }
591
592 bool isVCSrc_f16() const {
593 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
594 }
595
596 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
597
598 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
599
600 bool isVSrc_b32() const {
601 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
602 }
603
604 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
605
606 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
607
608 bool isVSrcT_b16_Lo128() const {
609 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
610 }
611
612 bool isVSrcFake16_b16_Lo128() const {
613 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
614 }
615
616 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
617
618 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
619
620 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
621
622 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
623
624 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
625
626 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
627
628 bool isVSrc_f32() const {
629 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
630 }
631
632 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
633
634 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
635
636 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
637
638 bool isVSrcT_bf16_Lo128() const {
639 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
640 }
641
642 bool isVSrcT_f16_Lo128() const {
643 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
644 }
645
646 bool isVSrcFake16_bf16_Lo128() const {
647 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
648 }
649
650 bool isVSrcFake16_f16_Lo128() const {
651 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
652 }
653
654 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
655
656 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
657
658 bool isVSrc_v2bf16() const {
659 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
660 }
661
662 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
663
664 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
665
666 bool isVISrcB32() const {
667 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
668 }
669
670 bool isVISrcB16() const {
671 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
672 }
673
674 bool isVISrcV2B16() const {
675 return isVISrcB16();
676 }
677
678 bool isVISrcF32() const {
679 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
680 }
681
682 bool isVISrcF16() const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
684 }
685
686 bool isVISrcV2F16() const {
687 return isVISrcF16() || isVISrcB32();
688 }
689
690 bool isVISrc_64_bf16() const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
692 }
693
694 bool isVISrc_64_f16() const {
695 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
696 }
697
698 bool isVISrc_64_b32() const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
700 }
701
702 bool isVISrc_64B64() const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
704 }
705
706 bool isVISrc_64_f64() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
708 }
709
710 bool isVISrc_64V2FP32() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
712 }
713
714 bool isVISrc_64V2INT32() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
716 }
717
718 bool isVISrc_256_b32() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
720 }
721
722 bool isVISrc_256_f32() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
724 }
725
726 bool isVISrc_256B64() const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
728 }
729
730 bool isVISrc_256_f64() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
732 }
733
734 bool isVISrc_512_f64() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
736 }
737
738 bool isVISrc_128B16() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
740 }
741
742 bool isVISrc_128V2B16() const {
743 return isVISrc_128B16();
744 }
745
746 bool isVISrc_128_b32() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
748 }
749
750 bool isVISrc_128_f32() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
752 }
753
754 bool isVISrc_256V2FP32() const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
756 }
757
758 bool isVISrc_256V2INT32() const {
759 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
760 }
761
762 bool isVISrc_512_b32() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
764 }
765
766 bool isVISrc_512B16() const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
768 }
769
770 bool isVISrc_512V2B16() const {
771 return isVISrc_512B16();
772 }
773
774 bool isVISrc_512_f32() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
776 }
777
778 bool isVISrc_512F16() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
780 }
781
782 bool isVISrc_512V2F16() const {
783 return isVISrc_512F16() || isVISrc_512_b32();
784 }
785
786 bool isVISrc_1024_b32() const {
787 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
788 }
789
790 bool isVISrc_1024B16() const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
792 }
793
794 bool isVISrc_1024V2B16() const {
795 return isVISrc_1024B16();
796 }
797
798 bool isVISrc_1024_f32() const {
799 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
800 }
801
802 bool isVISrc_1024F16() const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
804 }
805
806 bool isVISrc_1024V2F16() const {
807 return isVISrc_1024F16() || isVISrc_1024_b32();
808 }
809
810 bool isAISrcB32() const {
811 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
812 }
813
814 bool isAISrcB16() const {
815 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
816 }
817
818 bool isAISrcV2B16() const {
819 return isAISrcB16();
820 }
821
822 bool isAISrcF32() const {
823 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
824 }
825
826 bool isAISrcF16() const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
828 }
829
830 bool isAISrcV2F16() const {
831 return isAISrcF16() || isAISrcB32();
832 }
833
834 bool isAISrc_64B64() const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
836 }
837
838 bool isAISrc_64_f64() const {
839 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
840 }
841
842 bool isAISrc_128_b32() const {
843 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
844 }
845
846 bool isAISrc_128B16() const {
847 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
848 }
849
850 bool isAISrc_128V2B16() const {
851 return isAISrc_128B16();
852 }
853
854 bool isAISrc_128_f32() const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
856 }
857
858 bool isAISrc_128F16() const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
860 }
861
862 bool isAISrc_128V2F16() const {
863 return isAISrc_128F16() || isAISrc_128_b32();
864 }
865
866 bool isVISrc_128_bf16() const {
867 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
868 }
869
870 bool isVISrc_128_f16() const {
871 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
872 }
873
874 bool isVISrc_128V2F16() const {
875 return isVISrc_128_f16() || isVISrc_128_b32();
876 }
877
878 bool isAISrc_256B64() const {
879 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
880 }
881
882 bool isAISrc_256_f64() const {
883 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
884 }
885
886 bool isAISrc_512_b32() const {
887 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
888 }
889
890 bool isAISrc_512B16() const {
891 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
892 }
893
894 bool isAISrc_512V2B16() const {
895 return isAISrc_512B16();
896 }
897
898 bool isAISrc_512_f32() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
900 }
901
902 bool isAISrc_512F16() const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
904 }
905
906 bool isAISrc_512V2F16() const {
907 return isAISrc_512F16() || isAISrc_512_b32();
908 }
909
910 bool isAISrc_1024_b32() const {
911 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
912 }
913
914 bool isAISrc_1024B16() const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
916 }
917
918 bool isAISrc_1024V2B16() const {
919 return isAISrc_1024B16();
920 }
921
922 bool isAISrc_1024_f32() const {
923 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
924 }
925
926 bool isAISrc_1024F16() const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
928 }
929
930 bool isAISrc_1024V2F16() const {
931 return isAISrc_1024F16() || isAISrc_1024_b32();
932 }
933
934 bool isKImmFP32() const {
935 return isLiteralImm(MVT::f32);
936 }
937
938 bool isKImmFP16() const {
939 return isLiteralImm(MVT::f16);
940 }
941
942 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
943
944 bool isMem() const override {
945 return false;
946 }
947
948 bool isExpr() const {
949 return Kind == Expression;
950 }
951
952 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
953
954 bool isSWaitCnt() const;
955 bool isDepCtr() const;
956 bool isSDelayALU() const;
957 bool isHwreg() const;
958 bool isSendMsg() const;
959 bool isSplitBarrier() const;
960 bool isSwizzle() const;
961 bool isSMRDOffset8() const;
962 bool isSMEMOffset() const;
963 bool isSMRDLiteralOffset() const;
964 bool isDPP8() const;
965 bool isDPPCtrl() const;
966 bool isBLGP() const;
967 bool isGPRIdxMode() const;
968 bool isS16Imm() const;
969 bool isU16Imm() const;
970 bool isEndpgm() const;
971
972 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
973 return [this, P]() { return P(*this); };
974 }
975
976 StringRef getToken() const {
977 assert(isToken());
978 return StringRef(Tok.Data, Tok.Length);
979 }
980
981 int64_t getImm() const {
982 assert(isImm());
983 return Imm.Val;
984 }
985
986 void setImm(int64_t Val) {
987 assert(isImm());
988 Imm.Val = Val;
989 }
990
991 ImmTy getImmTy() const {
992 assert(isImm());
993 return Imm.Type;
994 }
995
996 MCRegister getReg() const override {
997 assert(isRegKind());
998 return Reg.RegNo;
999 }
1000
1001 SMLoc getStartLoc() const override {
1002 return StartLoc;
1003 }
1004
1005 SMLoc getEndLoc() const override {
1006 return EndLoc;
1007 }
1008
1009 SMRange getLocRange() const {
1010 return SMRange(StartLoc, EndLoc);
1011 }
1012
1013 int getMCOpIdx() const { return MCOpIdx; }
1014
1015 Modifiers getModifiers() const {
1016 assert(isRegKind() || isImmTy(ImmTyNone));
1017 return isRegKind() ? Reg.Mods : Imm.Mods;
1018 }
1019
1020 void setModifiers(Modifiers Mods) {
1021 assert(isRegKind() || isImmTy(ImmTyNone));
1022 if (isRegKind())
1023 Reg.Mods = Mods;
1024 else
1025 Imm.Mods = Mods;
1026 }
1027
1028 bool hasModifiers() const {
1029 return getModifiers().hasModifiers();
1030 }
1031
1032 bool hasFPModifiers() const {
1033 return getModifiers().hasFPModifiers();
1034 }
1035
1036 bool hasIntModifiers() const {
1037 return getModifiers().hasIntModifiers();
1038 }
1039
1040 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1041
1042 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1043
1044 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1045
1046 void addRegOperands(MCInst &Inst, unsigned N) const;
1047
1048 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1049 if (isRegKind())
1050 addRegOperands(Inst, N);
1051 else
1052 addImmOperands(Inst, N);
1053 }
1054
1055 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1056 Modifiers Mods = getModifiers();
1057 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1058 if (isRegKind()) {
1059 addRegOperands(Inst, N);
1060 } else {
1061 addImmOperands(Inst, N, false);
1062 }
1063 }
1064
1065 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1066 assert(!hasIntModifiers());
1067 addRegOrImmWithInputModsOperands(Inst, N);
1068 }
1069
1070 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1071 assert(!hasFPModifiers());
1072 addRegOrImmWithInputModsOperands(Inst, N);
1073 }
1074
1075 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1076 Modifiers Mods = getModifiers();
1077 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1078 assert(isRegKind());
1079 addRegOperands(Inst, N);
1080 }
1081
1082 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1083 assert(!hasIntModifiers());
1084 addRegWithInputModsOperands(Inst, N);
1085 }
1086
1087 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1088 assert(!hasFPModifiers());
1089 addRegWithInputModsOperands(Inst, N);
1090 }
1091
1092 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1093 // clang-format off
1094 switch (Type) {
1095 case ImmTyNone: OS << "None"; break;
1096 case ImmTyGDS: OS << "GDS"; break;
1097 case ImmTyLDS: OS << "LDS"; break;
1098 case ImmTyOffen: OS << "Offen"; break;
1099 case ImmTyIdxen: OS << "Idxen"; break;
1100 case ImmTyAddr64: OS << "Addr64"; break;
1101 case ImmTyOffset: OS << "Offset"; break;
1102 case ImmTyInstOffset: OS << "InstOffset"; break;
1103 case ImmTyOffset0: OS << "Offset0"; break;
1104 case ImmTyOffset1: OS << "Offset1"; break;
1105 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1106 case ImmTyCPol: OS << "CPol"; break;
1107 case ImmTyIndexKey8bit: OS << "index_key"; break;
1108 case ImmTyIndexKey16bit: OS << "index_key"; break;
1109 case ImmTyIndexKey32bit: OS << "index_key"; break;
1110 case ImmTyTFE: OS << "TFE"; break;
1111 case ImmTyD16: OS << "D16"; break;
1112 case ImmTyFORMAT: OS << "FORMAT"; break;
1113 case ImmTyClamp: OS << "Clamp"; break;
1114 case ImmTyOModSI: OS << "OModSI"; break;
1115 case ImmTyDPP8: OS << "DPP8"; break;
1116 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1117 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1118 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1119 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1120 case ImmTyDppFI: OS << "DppFI"; break;
1121 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1122 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1123 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1124 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1125 case ImmTyDMask: OS << "DMask"; break;
1126 case ImmTyDim: OS << "Dim"; break;
1127 case ImmTyUNorm: OS << "UNorm"; break;
1128 case ImmTyDA: OS << "DA"; break;
1129 case ImmTyR128A16: OS << "R128A16"; break;
1130 case ImmTyA16: OS << "A16"; break;
1131 case ImmTyLWE: OS << "LWE"; break;
1132 case ImmTyOff: OS << "Off"; break;
1133 case ImmTyExpTgt: OS << "ExpTgt"; break;
1134 case ImmTyExpCompr: OS << "ExpCompr"; break;
1135 case ImmTyExpVM: OS << "ExpVM"; break;
1136 case ImmTyHwreg: OS << "Hwreg"; break;
1137 case ImmTySendMsg: OS << "SendMsg"; break;
1138 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1139 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1140 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1141 case ImmTyOpSel: OS << "OpSel"; break;
1142 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1143 case ImmTyNegLo: OS << "NegLo"; break;
1144 case ImmTyNegHi: OS << "NegHi"; break;
1145 case ImmTySwizzle: OS << "Swizzle"; break;
1146 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1147 case ImmTyHigh: OS << "High"; break;
1148 case ImmTyBLGP: OS << "BLGP"; break;
1149 case ImmTyCBSZ: OS << "CBSZ"; break;
1150 case ImmTyABID: OS << "ABID"; break;
1151 case ImmTyEndpgm: OS << "Endpgm"; break;
1152 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1153 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1154 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1155 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1156 case ImmTyBitOp3: OS << "BitOp3"; break;
1157 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1158 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1159 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1160 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1161 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1162 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1163 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1164 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1165 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1166 case ImmTyByteSel: OS << "ByteSel" ; break;
1167 }
1168 // clang-format on
1169 }
1170
1171 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1172 switch (Kind) {
1173 case Register:
1174 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1175 << " mods: " << Reg.Mods << '>';
1176 break;
1177 case Immediate:
1178 OS << '<' << getImm();
1179 if (getImmTy() != ImmTyNone) {
1180 OS << " type: "; printImmTy(OS, getImmTy());
1181 }
1182 OS << " mods: " << Imm.Mods << '>';
1183 break;
1184 case Token:
1185 OS << '\'' << getToken() << '\'';
1186 break;
1187 case Expression:
1188 OS << "<expr ";
1189 MAI.printExpr(OS, *Expr);
1190 OS << '>';
1191 break;
1192 }
1193 }
1194
1195 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1196 int64_t Val, SMLoc Loc,
1197 ImmTy Type = ImmTyNone,
1198 bool IsFPImm = false) {
1199 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1200 Op->Imm.Val = Val;
1201 Op->Imm.IsFPImm = IsFPImm;
1202 Op->Imm.Type = Type;
1203 Op->Imm.Mods = Modifiers();
1204 Op->StartLoc = Loc;
1205 Op->EndLoc = Loc;
1206 return Op;
1207 }
1208
1209 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1210 StringRef Str, SMLoc Loc,
1211 bool HasExplicitEncodingSize = true) {
1212 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1213 Res->Tok.Data = Str.data();
1214 Res->Tok.Length = Str.size();
1215 Res->StartLoc = Loc;
1216 Res->EndLoc = Loc;
1217 return Res;
1218 }
1219
1220 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1221 MCRegister Reg, SMLoc S, SMLoc E) {
1222 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1223 Op->Reg.RegNo = Reg;
1224 Op->Reg.Mods = Modifiers();
1225 Op->StartLoc = S;
1226 Op->EndLoc = E;
1227 return Op;
1228 }
1229
1230 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1231 const class MCExpr *Expr, SMLoc S) {
1232 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1233 Op->Expr = Expr;
1234 Op->StartLoc = S;
1235 Op->EndLoc = S;
1236 return Op;
1237 }
1238};
1239
1240raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1241 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1242 return OS;
1243}
1244
1245//===----------------------------------------------------------------------===//
1246// AsmParser
1247//===----------------------------------------------------------------------===//
1248
1249// TODO: define GET_SUBTARGET_FEATURE_NAME
1250#define GET_REGISTER_MATCHER
1251#include "AMDGPUGenAsmMatcher.inc"
1252#undef GET_REGISTER_MATCHER
1253#undef GET_SUBTARGET_FEATURE_NAME
1254
1255// Holds info related to the current kernel, e.g. count of SGPRs used.
1256// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1257// .amdgpu_hsa_kernel or at EOF.
1258class KernelScopeInfo {
1259 int SgprIndexUnusedMin = -1;
1260 int VgprIndexUnusedMin = -1;
1261 int AgprIndexUnusedMin = -1;
1262 MCContext *Ctx = nullptr;
1263 MCSubtargetInfo const *MSTI = nullptr;
1264
1265 void usesSgprAt(int i) {
1266 if (i >= SgprIndexUnusedMin) {
1267 SgprIndexUnusedMin = ++i;
1268 if (Ctx) {
1269 MCSymbol* const Sym =
1270 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1271 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1272 }
1273 }
1274 }
1275
1276 void usesVgprAt(int i) {
1277 if (i >= VgprIndexUnusedMin) {
1278 VgprIndexUnusedMin = ++i;
1279 if (Ctx) {
1280 MCSymbol* const Sym =
1281 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1282 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1283 VgprIndexUnusedMin);
1284 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1285 }
1286 }
1287 }
1288
1289 void usesAgprAt(int i) {
1290 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1291 if (!hasMAIInsts(*MSTI))
1292 return;
1293
1294 if (i >= AgprIndexUnusedMin) {
1295 AgprIndexUnusedMin = ++i;
1296 if (Ctx) {
1297 MCSymbol* const Sym =
1298 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1299 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1300
1301 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1302 MCSymbol* const vSym =
1303 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1304 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1305 VgprIndexUnusedMin);
1306 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1307 }
1308 }
1309 }
1310
1311public:
1312 KernelScopeInfo() = default;
1313
1314 void initialize(MCContext &Context) {
1315 Ctx = &Context;
1316 MSTI = Ctx->getSubtargetInfo();
1317
1318 usesSgprAt(SgprIndexUnusedMin = -1);
1319 usesVgprAt(VgprIndexUnusedMin = -1);
1320 if (hasMAIInsts(*MSTI)) {
1321 usesAgprAt(AgprIndexUnusedMin = -1);
1322 }
1323 }
1324
1325 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1326 unsigned RegWidth) {
1327 switch (RegKind) {
1328 case IS_SGPR:
1329 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1330 break;
1331 case IS_AGPR:
1332 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1333 break;
1334 case IS_VGPR:
1335 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1336 break;
1337 default:
1338 break;
1339 }
1340 }
1341};
1342
1343class AMDGPUAsmParser : public MCTargetAsmParser {
1344 MCAsmParser &Parser;
1345
1346 unsigned ForcedEncodingSize = 0;
1347 bool ForcedDPP = false;
1348 bool ForcedSDWA = false;
1349 KernelScopeInfo KernelScope;
1350 const unsigned HwMode;
1351
1352 /// @name Auto-generated Match Functions
1353 /// {
1354
1355#define GET_ASSEMBLER_HEADER
1356#include "AMDGPUGenAsmMatcher.inc"
1357
1358 /// }
1359
1360 /// Get size of register operand
1361 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1362 assert(OpNo < Desc.NumOperands);
1363 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1364 return getRegBitWidth(RCID) / 8;
1365 }
1366
1367private:
1368 void createConstantSymbol(StringRef Id, int64_t Val);
1369
1370 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1371 bool OutOfRangeError(SMRange Range);
1372 /// Calculate VGPR/SGPR blocks required for given target, reserved
1373 /// registers, and user-specified NextFreeXGPR values.
1374 ///
1375 /// \param Features [in] Target features, used for bug corrections.
1376 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1377 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1378 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1379 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1380 /// descriptor field, if valid.
1381 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1382 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1383 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1384 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1385 /// \param VGPRBlocks [out] Result VGPR block count.
1386 /// \param SGPRBlocks [out] Result SGPR block count.
1387 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1388 const MCExpr *FlatScrUsed, bool XNACKUsed,
1389 std::optional<bool> EnableWavefrontSize32,
1390 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1391 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1392 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1393 bool ParseDirectiveAMDGCNTarget();
1394 bool ParseDirectiveAMDHSACodeObjectVersion();
1395 bool ParseDirectiveAMDHSAKernel();
1396 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1397 bool ParseDirectiveAMDKernelCodeT();
1398 // TODO: Possibly make subtargetHasRegister const.
1399 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1400 bool ParseDirectiveAMDGPUHsaKernel();
1401
1402 bool ParseDirectiveISAVersion();
1403 bool ParseDirectiveHSAMetadata();
1404 bool ParseDirectivePALMetadataBegin();
1405 bool ParseDirectivePALMetadata();
1406 bool ParseDirectiveAMDGPULDS();
1407
1408 /// Common code to parse out a block of text (typically YAML) between start and
1409 /// end directives.
1410 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1411 const char *AssemblerDirectiveEnd,
1412 std::string &CollectString);
1413
1414 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1415 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1416 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1417 unsigned &RegNum, unsigned &RegWidth,
1418 bool RestoreOnFailure = false);
1419 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1420 unsigned &RegNum, unsigned &RegWidth,
1421 SmallVectorImpl<AsmToken> &Tokens);
1422 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1423 unsigned &RegWidth,
1424 SmallVectorImpl<AsmToken> &Tokens);
1425 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1426 unsigned &RegWidth,
1427 SmallVectorImpl<AsmToken> &Tokens);
1428 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1429 unsigned &RegWidth,
1430 SmallVectorImpl<AsmToken> &Tokens);
1431 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1432 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1433 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1434
1435 bool isRegister();
1436 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1437 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1438 void initializeGprCountSymbol(RegisterKind RegKind);
1439 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1440 unsigned RegWidth);
1441 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1442 bool IsAtomic);
1443
1444public:
1445 enum OperandMode {
1446 OperandMode_Default,
1447 OperandMode_NSA,
1448 };
1449
1450 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1451
1452 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1453 const MCInstrInfo &MII, const MCTargetOptions &Options)
1454 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1455 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1457
1458 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1459
1460 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1461 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1462 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1463 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1464 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1465 } else {
1466 createConstantSymbol(".option.machine_version_major", ISA.Major);
1467 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1468 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1469 }
1470 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1471 initializeGprCountSymbol(IS_VGPR);
1472 initializeGprCountSymbol(IS_SGPR);
1473 } else
1474 KernelScope.initialize(getContext());
1475
1476 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1477 createConstantSymbol(Symbol, Code);
1478
1479 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1480 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1481 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1482 }
1483
1484 bool hasMIMG_R128() const {
1485 return AMDGPU::hasMIMG_R128(getSTI());
1486 }
1487
1488 bool hasPackedD16() const {
1489 return AMDGPU::hasPackedD16(getSTI());
1490 }
1491
1492 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1493
1494 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1495
1496 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1497
1498 bool isSI() const {
1499 return AMDGPU::isSI(getSTI());
1500 }
1501
1502 bool isCI() const {
1503 return AMDGPU::isCI(getSTI());
1504 }
1505
1506 bool isVI() const {
1507 return AMDGPU::isVI(getSTI());
1508 }
1509
1510 bool isGFX9() const {
1511 return AMDGPU::isGFX9(getSTI());
1512 }
1513
1514 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1515 bool isGFX90A() const {
1516 return AMDGPU::isGFX90A(getSTI());
1517 }
1518
1519 bool isGFX940() const {
1520 return AMDGPU::isGFX940(getSTI());
1521 }
1522
1523 bool isGFX9Plus() const {
1524 return AMDGPU::isGFX9Plus(getSTI());
1525 }
1526
1527 bool isGFX10() const {
1528 return AMDGPU::isGFX10(getSTI());
1529 }
1530
1531 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1532
1533 bool isGFX11() const {
1534 return AMDGPU::isGFX11(getSTI());
1535 }
1536
1537 bool isGFX11Plus() const {
1538 return AMDGPU::isGFX11Plus(getSTI());
1539 }
1540
1541 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1542
1543 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1544
1545 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1546
1547 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1548
1549 bool isGFX10_BEncoding() const {
1550 return AMDGPU::isGFX10_BEncoding(getSTI());
1551 }
1552
1553 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1554
1555 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1556
1557 bool hasInv2PiInlineImm() const {
1558 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1559 }
1560
1561 bool has64BitLiterals() const {
1562 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1563 }
1564
1565 bool hasFlatOffsets() const {
1566 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1567 }
1568
1569 bool hasTrue16Insts() const {
1570 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1571 }
1572
1573 bool hasArchitectedFlatScratch() const {
1574 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1575 }
1576
1577 bool hasSGPR102_SGPR103() const {
1578 return !isVI() && !isGFX9();
1579 }
1580
1581 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1582
1583 bool hasIntClamp() const {
1584 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1585 }
1586
1587 bool hasPartialNSAEncoding() const {
1588 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1589 }
1590
1591 bool hasGloballyAddressableScratch() const {
1592 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1593 }
1594
1595 unsigned getNSAMaxSize(bool HasSampler = false) const {
1596 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1597 }
1598
1599 unsigned getMaxNumUserSGPRs() const {
1600 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1601 }
1602
1603 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1604
1605 AMDGPUTargetStreamer &getTargetStreamer() {
1606 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1607 return static_cast<AMDGPUTargetStreamer &>(TS);
1608 }
1609
1610 MCContext &getContext() const {
1611 // We need this const_cast because for some reason getContext() is not const
1612 // in MCAsmParser.
1613 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1614 }
1615
1616 const MCRegisterInfo *getMRI() const {
1617 return getContext().getRegisterInfo();
1618 }
1619
1620 const MCInstrInfo *getMII() const {
1621 return &MII;
1622 }
1623
1624 // FIXME: This should not be used. Instead, should use queries derived from
1625 // getAvailableFeatures().
1626 const FeatureBitset &getFeatureBits() const {
1627 return getSTI().getFeatureBits();
1628 }
1629
1630 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1631 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1632 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1633
1634 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1635 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1636 bool isForcedDPP() const { return ForcedDPP; }
1637 bool isForcedSDWA() const { return ForcedSDWA; }
1638 ArrayRef<unsigned> getMatchedVariants() const;
1639 StringRef getMatchedVariantName() const;
1640
1641 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1642 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1643 bool RestoreOnFailure);
1644 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1645 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1646 SMLoc &EndLoc) override;
1647 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1648 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1649 unsigned Kind) override;
1650 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1651 OperandVector &Operands, MCStreamer &Out,
1652 uint64_t &ErrorInfo,
1653 bool MatchingInlineAsm) override;
1654 bool ParseDirective(AsmToken DirectiveID) override;
1655 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1656 OperandMode Mode = OperandMode_Default);
1657 StringRef parseMnemonicSuffix(StringRef Name);
1658 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1659 SMLoc NameLoc, OperandVector &Operands) override;
1660 //bool ProcessInstruction(MCInst &Inst);
1661
1662 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1663
1664 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1665
1666 ParseStatus
1667 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1668 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1669 std::function<bool(int64_t &)> ConvertResult = nullptr);
1670
1671 ParseStatus parseOperandArrayWithPrefix(
1672 const char *Prefix, OperandVector &Operands,
1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674 bool (*ConvertResult)(int64_t &) = nullptr);
1675
1676 ParseStatus
1677 parseNamedBit(StringRef Name, OperandVector &Operands,
1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1679 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1680 ParseStatus parseCPol(OperandVector &Operands);
1681 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1682 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1683 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1684 SMLoc &StringLoc);
1685 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1686 StringRef Name,
1687 ArrayRef<const char *> Ids,
1688 int64_t &IntVal);
1689 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1690 StringRef Name,
1691 ArrayRef<const char *> Ids,
1692 AMDGPUOperand::ImmTy Type);
1693
1694 bool isModifier();
1695 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1696 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1697 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1698 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1699 bool parseSP3NegModifier();
1700 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1701 LitModifier Lit = LitModifier::None);
1702 ParseStatus parseReg(OperandVector &Operands);
1703 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1704 LitModifier Lit = LitModifier::None);
1705 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1706 bool AllowImm = true);
1707 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1708 bool AllowImm = true);
1709 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1710 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1711 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1712 ParseStatus tryParseIndexKey(OperandVector &Operands,
1713 AMDGPUOperand::ImmTy ImmTy);
1714 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1715 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1716 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1717 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1718 AMDGPUOperand::ImmTy Type);
1719 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1720 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1721 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1722 AMDGPUOperand::ImmTy Type);
1723 ParseStatus parseMatrixAScale(OperandVector &Operands);
1724 ParseStatus parseMatrixBScale(OperandVector &Operands);
1725 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1726 AMDGPUOperand::ImmTy Type);
1727 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1728 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1729
1730 ParseStatus parseDfmtNfmt(int64_t &Format);
1731 ParseStatus parseUfmt(int64_t &Format);
1732 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1733 int64_t &Format);
1734 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1735 int64_t &Format);
1736 ParseStatus parseFORMAT(OperandVector &Operands);
1737 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1738 ParseStatus parseNumericFormat(int64_t &Format);
1739 ParseStatus parseFlatOffset(OperandVector &Operands);
1740 ParseStatus parseR128A16(OperandVector &Operands);
1741 ParseStatus parseBLGP(OperandVector &Operands);
1742 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1743 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1744
1745 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1746
1747 bool parseCnt(int64_t &IntVal);
1748 ParseStatus parseSWaitCnt(OperandVector &Operands);
1749
1750 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1751 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1752 ParseStatus parseDepCtr(OperandVector &Operands);
1753
1754 bool parseDelay(int64_t &Delay);
1755 ParseStatus parseSDelayALU(OperandVector &Operands);
1756
1757 ParseStatus parseHwreg(OperandVector &Operands);
1758
1759private:
1760 struct OperandInfoTy {
1761 SMLoc Loc;
1762 int64_t Val;
1763 bool IsSymbolic = false;
1764 bool IsDefined = false;
1765
1766 OperandInfoTy(int64_t Val) : Val(Val) {}
1767 };
1768
1769 struct StructuredOpField : OperandInfoTy {
1770 StringLiteral Id;
1771 StringLiteral Desc;
1772 unsigned Width;
1773 bool IsDefined = false;
1774
1775 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1776 int64_t Default)
1777 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1778 virtual ~StructuredOpField() = default;
1779
1780 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1781 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1782 return false;
1783 }
1784
1785 virtual bool validate(AMDGPUAsmParser &Parser) const {
1786 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1787 return Error(Parser, "not supported on this GPU");
1788 if (!isUIntN(Width, Val))
1789 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1790 return true;
1791 }
1792 };
1793
1794 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1795 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1796
1797 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1798 bool validateSendMsg(const OperandInfoTy &Msg,
1799 const OperandInfoTy &Op,
1800 const OperandInfoTy &Stream);
1801
1802 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1803 OperandInfoTy &Width);
1804
1805 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1806
1807 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1808 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1809 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1810
1811 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1812 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1813 const OperandVector &Operands) const;
1814 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1815 const OperandVector &Operands) const;
1816 SMLoc getInstLoc(const OperandVector &Operands) const;
1817
1818 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1819 const OperandVector &Operands);
1820 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1821 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1822 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1823 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1824 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1825 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1826 bool AsVOPD3);
1827 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1828 bool tryVOPD(const MCInst &Inst);
1829 bool tryVOPD3(const MCInst &Inst);
1830 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1831
1832 bool validateIntClampSupported(const MCInst &Inst);
1833 bool validateMIMGAtomicDMask(const MCInst &Inst);
1834 bool validateMIMGGatherDMask(const MCInst &Inst);
1835 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1836 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1837 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1838 bool validateMIMGD16(const MCInst &Inst);
1839 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1840 bool validateTensorR128(const MCInst &Inst);
1841 bool validateMIMGMSAA(const MCInst &Inst);
1842 bool validateOpSel(const MCInst &Inst);
1843 bool validateTrue16OpSel(const MCInst &Inst);
1844 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1845 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateVccOperand(MCRegister Reg) const;
1847 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1850 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1851 bool validateAGPRLdSt(const MCInst &Inst) const;
1852 bool validateVGPRAlign(const MCInst &Inst) const;
1853 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1854 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateDivScale(const MCInst &Inst);
1857 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1858 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1859 SMLoc IDLoc);
1860 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1861 const unsigned CPol);
1862 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1864 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1865 unsigned getConstantBusLimit(unsigned Opcode) const;
1866 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1867 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1868 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1869
1870 bool isSupportedMnemo(StringRef Mnemo,
1871 const FeatureBitset &FBS);
1872 bool isSupportedMnemo(StringRef Mnemo,
1873 const FeatureBitset &FBS,
1874 ArrayRef<unsigned> Variants);
1875 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1876
1877 bool isId(const StringRef Id) const;
1878 bool isId(const AsmToken &Token, const StringRef Id) const;
1879 bool isToken(const AsmToken::TokenKind Kind) const;
1880 StringRef getId() const;
1881 bool trySkipId(const StringRef Id);
1882 bool trySkipId(const StringRef Pref, const StringRef Id);
1883 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1884 bool trySkipToken(const AsmToken::TokenKind Kind);
1885 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1886 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1887 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1888
1889 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1890 AsmToken::TokenKind getTokenKind() const;
1891 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1892 bool parseExpr(OperandVector &Operands);
1893 StringRef getTokenStr() const;
1894 AsmToken peekToken(bool ShouldSkipSpace = true);
1895 AsmToken getToken() const;
1896 SMLoc getLoc() const;
1897 void lex();
1898
1899public:
1900 void onBeginOfFile() override;
1901 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1902
1903 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1904
1905 ParseStatus parseExpTgt(OperandVector &Operands);
1906 ParseStatus parseSendMsg(OperandVector &Operands);
1907 ParseStatus parseInterpSlot(OperandVector &Operands);
1908 ParseStatus parseInterpAttr(OperandVector &Operands);
1909 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1910 ParseStatus parseBoolReg(OperandVector &Operands);
1911
1912 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1913 const unsigned MaxVal, const Twine &ErrMsg,
1914 SMLoc &Loc);
1915 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1916 const unsigned MinVal,
1917 const unsigned MaxVal,
1918 const StringRef ErrMsg);
1919 ParseStatus parseSwizzle(OperandVector &Operands);
1920 bool parseSwizzleOffset(int64_t &Imm);
1921 bool parseSwizzleMacro(int64_t &Imm);
1922 bool parseSwizzleQuadPerm(int64_t &Imm);
1923 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1924 bool parseSwizzleBroadcast(int64_t &Imm);
1925 bool parseSwizzleSwap(int64_t &Imm);
1926 bool parseSwizzleReverse(int64_t &Imm);
1927 bool parseSwizzleFFT(int64_t &Imm);
1928 bool parseSwizzleRotate(int64_t &Imm);
1929
1930 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1931 int64_t parseGPRIdxMacro();
1932
1933 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1934 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1935
1936 ParseStatus parseOModSI(OperandVector &Operands);
1937
1938 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1939 OptionalImmIndexMap &OptionalIdx);
1940 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1941 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1942 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1943 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1944 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1945
1946 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1947 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1948 OptionalImmIndexMap &OptionalIdx);
1949 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1950 OptionalImmIndexMap &OptionalIdx);
1951
1952 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1953 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1954 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1955
1956 bool parseDimId(unsigned &Encoding);
1957 ParseStatus parseDim(OperandVector &Operands);
1958 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1959 ParseStatus parseDPP8(OperandVector &Operands);
1960 ParseStatus parseDPPCtrl(OperandVector &Operands);
1961 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1962 int64_t parseDPPCtrlSel(StringRef Ctrl);
1963 int64_t parseDPPCtrlPerm();
1964 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1965 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1966 cvtDPP(Inst, Operands, true);
1967 }
1968 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1969 bool IsDPP8 = false);
1970 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1971 cvtVOP3DPP(Inst, Operands, true);
1972 }
1973
1974 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1975 AMDGPUOperand::ImmTy Type);
1976 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1977 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1978 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1979 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1980 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1981 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1982 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1983 uint64_t BasicInstType,
1984 bool SkipDstVcc = false,
1985 bool SkipSrcVcc = false);
1986
1987 ParseStatus parseEndpgm(OperandVector &Operands);
1988
1989 ParseStatus parseVOPD(OperandVector &Operands);
1990};
1991
1992} // end anonymous namespace
1993
1994// May be called with integer type with equivalent bitwidth.
1995static const fltSemantics *getFltSemantics(unsigned Size) {
1996 switch (Size) {
1997 case 4:
1998 return &APFloat::IEEEsingle();
1999 case 8:
2000 return &APFloat::IEEEdouble();
2001 case 2:
2002 return &APFloat::IEEEhalf();
2003 default:
2004 llvm_unreachable("unsupported fp type");
2005 }
2006}
2007
2009 return getFltSemantics(VT.getSizeInBits() / 8);
2010}
2011
2013 switch (OperandType) {
2014 // When floating-point immediate is used as operand of type i16, the 32-bit
2015 // representation of the constant truncated to the 16 LSBs should be used.
2030 return &APFloat::IEEEsingle();
2037 return &APFloat::IEEEdouble();
2044 return &APFloat::IEEEhalf();
2049 return &APFloat::BFloat();
2050 default:
2051 llvm_unreachable("unsupported fp type");
2052 }
2053}
2054
2055//===----------------------------------------------------------------------===//
2056// Operand
2057//===----------------------------------------------------------------------===//
2058
2059static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2060 bool Lost;
2061
2062 // Convert literal to single precision
2065 &Lost);
2066 // We allow precision lost but not overflow or underflow
2067 if (Status != APFloat::opOK &&
2068 Lost &&
2069 ((Status & APFloat::opOverflow) != 0 ||
2070 (Status & APFloat::opUnderflow) != 0)) {
2071 return false;
2072 }
2073
2074 return true;
2075}
2076
2077static bool isSafeTruncation(int64_t Val, unsigned Size) {
2078 return isUIntN(Size, Val) || isIntN(Size, Val);
2079}
2080
2081static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2082 if (VT.getScalarType() == MVT::i16)
2083 return isInlinableLiteral32(Val, HasInv2Pi);
2084
2085 if (VT.getScalarType() == MVT::f16)
2086 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2087
2088 assert(VT.getScalarType() == MVT::bf16);
2089
2090 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2091}
2092
2093bool AMDGPUOperand::isInlinableImm(MVT type) const {
2094
2095 // This is a hack to enable named inline values like
2096 // shared_base with both 32-bit and 64-bit operands.
2097 // Note that these values are defined as
2098 // 32-bit operands only.
2099 if (isInlineValue()) {
2100 return true;
2101 }
2102
2103 if (!isImmTy(ImmTyNone)) {
2104 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2105 return false;
2106 }
2107
2108 if (getModifiers().Lit != LitModifier::None)
2109 return false;
2110
2111 // TODO: We should avoid using host float here. It would be better to
2112 // check the float bit values which is what a few other places do.
2113 // We've had bot failures before due to weird NaN support on mips hosts.
2114
2115 APInt Literal(64, Imm.Val);
2116
2117 if (Imm.IsFPImm) { // We got fp literal token
2118 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2120 AsmParser->hasInv2PiInlineImm());
2121 }
2122
2123 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2124 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2125 return false;
2126
2127 if (type.getScalarSizeInBits() == 16) {
2128 bool Lost = false;
2129 switch (type.getScalarType().SimpleTy) {
2130 default:
2131 llvm_unreachable("unknown 16-bit type");
2132 case MVT::bf16:
2133 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2134 &Lost);
2135 break;
2136 case MVT::f16:
2137 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2138 &Lost);
2139 break;
2140 case MVT::i16:
2141 FPLiteral.convert(APFloatBase::IEEEsingle(),
2142 APFloat::rmNearestTiesToEven, &Lost);
2143 break;
2144 }
2145 // We need to use 32-bit representation here because when a floating-point
2146 // inline constant is used as an i16 operand, its 32-bit representation
2147 // representation will be used. We will need the 32-bit value to check if
2148 // it is FP inline constant.
2149 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2150 return isInlineableLiteralOp16(ImmVal, type,
2151 AsmParser->hasInv2PiInlineImm());
2152 }
2153
2154 // Check if single precision literal is inlinable
2156 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2157 AsmParser->hasInv2PiInlineImm());
2158 }
2159
2160 // We got int literal token.
2161 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2163 AsmParser->hasInv2PiInlineImm());
2164 }
2165
2166 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2167 return false;
2168 }
2169
2170 if (type.getScalarSizeInBits() == 16) {
2172 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2173 type, AsmParser->hasInv2PiInlineImm());
2174 }
2175
2177 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2178 AsmParser->hasInv2PiInlineImm());
2179}
2180
2181bool AMDGPUOperand::isLiteralImm(MVT type) const {
2182 // Check that this immediate can be added as literal
2183 if (!isImmTy(ImmTyNone)) {
2184 return false;
2185 }
2186
2187 bool Allow64Bit =
2188 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2189
2190 if (!Imm.IsFPImm) {
2191 // We got int literal token.
2192
2193 if (type == MVT::f64 && hasFPModifiers()) {
2194 // Cannot apply fp modifiers to int literals preserving the same semantics
2195 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2196 // disable these cases.
2197 return false;
2198 }
2199
2200 unsigned Size = type.getSizeInBits();
2201 if (Size == 64) {
2202 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2203 return true;
2204 Size = 32;
2205 }
2206
2207 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2208 // types.
2209 return isSafeTruncation(Imm.Val, Size);
2210 }
2211
2212 // We got fp literal token
2213 if (type == MVT::f64) { // Expected 64-bit fp operand
2214 // We would set low 64-bits of literal to zeroes but we accept this literals
2215 return true;
2216 }
2217
2218 if (type == MVT::i64) { // Expected 64-bit int operand
2219 // We don't allow fp literals in 64-bit integer instructions. It is
2220 // unclear how we should encode them.
2221 return false;
2222 }
2223
2224 // We allow fp literals with f16x2 operands assuming that the specified
2225 // literal goes into the lower half and the upper half is zero. We also
2226 // require that the literal may be losslessly converted to f16.
2227 //
2228 // For i16x2 operands, we assume that the specified literal is encoded as a
2229 // single-precision float. This is pretty odd, but it matches SP3 and what
2230 // happens in hardware.
2231 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2232 : (type == MVT::v2i16) ? MVT::f32
2233 : (type == MVT::v2f32) ? MVT::f32
2234 : type;
2235
2236 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2237 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2238}
2239
2240bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2241 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2242}
2243
2244bool AMDGPUOperand::isVRegWithInputMods() const {
2245 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2246 // GFX90A allows DPP on 64-bit operands.
2247 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2248 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2249}
2250
2251template <bool IsFake16>
2252bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2253 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2254 : AMDGPU::VGPR_16_Lo128RegClassID);
2255}
2256
2257template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2258 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2259 : AMDGPU::VGPR_16RegClassID);
2260}
2261
2262bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2263 if (AsmParser->isVI())
2264 return isVReg32();
2265 if (AsmParser->isGFX9Plus())
2266 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2267 return false;
2268}
2269
2270bool AMDGPUOperand::isSDWAFP16Operand() const {
2271 return isSDWAOperand(MVT::f16);
2272}
2273
2274bool AMDGPUOperand::isSDWAFP32Operand() const {
2275 return isSDWAOperand(MVT::f32);
2276}
2277
2278bool AMDGPUOperand::isSDWAInt16Operand() const {
2279 return isSDWAOperand(MVT::i16);
2280}
2281
2282bool AMDGPUOperand::isSDWAInt32Operand() const {
2283 return isSDWAOperand(MVT::i32);
2284}
2285
2286bool AMDGPUOperand::isBoolReg() const {
2287 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2288 (AsmParser->isWave32() && isSCSrc_b32()));
2289}
2290
2291uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2292{
2293 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2294 assert(Size == 2 || Size == 4 || Size == 8);
2295
2296 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2297
2298 if (Imm.Mods.Abs) {
2299 Val &= ~FpSignMask;
2300 }
2301 if (Imm.Mods.Neg) {
2302 Val ^= FpSignMask;
2303 }
2304
2305 return Val;
2306}
2307
2308void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2309 MCOpIdx = Inst.getNumOperands();
2310
2311 if (isExpr()) {
2313 return;
2314 }
2315
2316 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2317 Inst.getNumOperands())) {
2318 addLiteralImmOperand(Inst, Imm.Val,
2319 ApplyModifiers &
2320 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2321 } else {
2322 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2324 }
2325}
2326
2327void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2328 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2329 auto OpNum = Inst.getNumOperands();
2330 // Check that this operand accepts literals
2331 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2332
2333 if (ApplyModifiers) {
2334 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2335 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2336 Val = applyInputFPModifiers(Val, Size);
2337 }
2338
2339 APInt Literal(64, Val);
2340 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2341
2342 bool CanUse64BitLiterals =
2343 AsmParser->has64BitLiterals() &&
2344 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2345 LitModifier Lit = getModifiers().Lit;
2346 MCContext &Ctx = AsmParser->getContext();
2347
2348 if (Imm.IsFPImm) { // We got fp literal token
2349 switch (OpTy) {
2355 if (Lit == LitModifier::None &&
2357 AsmParser->hasInv2PiInlineImm())) {
2358 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2359 return;
2360 }
2361
2362 // Non-inlineable
2363 if (AMDGPU::isSISrcFPOperand(InstDesc,
2364 OpNum)) { // Expected 64-bit fp operand
2365 bool HasMandatoryLiteral =
2366 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2367 // For fp operands we check if low 32 bits are zeros
2368 if (Literal.getLoBits(32) != 0 &&
2369 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2370 !HasMandatoryLiteral) {
2371 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2372 Inst.getLoc(),
2373 "Can't encode literal as exact 64-bit floating-point operand. "
2374 "Low 32-bits will be set to zero");
2375 Val &= 0xffffffff00000000u;
2376 }
2377
2378 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2381 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2382 (isInt<32>(Val) || isUInt<32>(Val))) {
2383 // The floating-point operand will be verbalized as an
2384 // integer one. If that integer happens to fit 32 bits, on
2385 // re-assembling it will be intepreted as the high half of
2386 // the actual value, so we have to wrap it into lit64().
2387 Lit = LitModifier::Lit64;
2388 } else if (Lit == LitModifier::Lit) {
2389 // For FP64 operands lit() specifies the high half of the value.
2390 Val = Hi_32(Val);
2391 }
2392 }
2393 break;
2394 }
2395
2396 // We don't allow fp literals in 64-bit integer instructions. It is
2397 // unclear how we should encode them. This case should be checked earlier
2398 // in predicate methods (isLiteralImm())
2399 llvm_unreachable("fp literal in 64-bit integer instruction.");
2400
2402 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2403 (isInt<32>(Val) || isUInt<32>(Val)))
2404 Lit = LitModifier::Lit64;
2405 break;
2406
2411 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2412 Literal == 0x3fc45f306725feed) {
2413 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2414 // loss of precision. The constant represents ideomatic fp32 value of
2415 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2416 // bits. Prevent rounding below.
2417 Inst.addOperand(MCOperand::createImm(0x3e22));
2418 return;
2419 }
2420 [[fallthrough]];
2421
2442 bool lost;
2443 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2444 // Convert literal to single precision
2445 FPLiteral.convert(*getOpFltSemantics(OpTy),
2446 APFloat::rmNearestTiesToEven, &lost);
2447 // We allow precision lost but not overflow or underflow. This should be
2448 // checked earlier in isLiteralImm()
2449
2450 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2451 break;
2452 }
2453 default:
2454 llvm_unreachable("invalid operand size");
2455 }
2456
2457 if (Lit != LitModifier::None) {
2458 Inst.addOperand(
2460 } else {
2462 }
2463 return;
2464 }
2465
2466 // We got int literal token.
2467 // Only sign extend inline immediates.
2468 switch (OpTy) {
2482 break;
2483
2486 if (Lit == LitModifier::None &&
2487 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2489 return;
2490 }
2491
2492 // When the 32 MSBs are not zero (effectively means it can't be safely
2493 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2494 // the lit modifier is explicitly used, we need to truncate it to the 32
2495 // LSBs.
2496 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2497 Val = Lo_32(Val);
2498 break;
2499
2503 if (Lit == LitModifier::None &&
2504 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2506 return;
2507 }
2508
2509 // If the target doesn't support 64-bit literals, we need to use the
2510 // constant as the high 32 MSBs of a double-precision floating point value.
2511 if (!AsmParser->has64BitLiterals()) {
2512 Val = static_cast<uint64_t>(Val) << 32;
2513 } else {
2514 // Now the target does support 64-bit literals, there are two cases
2515 // where we still want to use src_literal encoding:
2516 // 1) explicitly forced by using lit modifier;
2517 // 2) the value is a valid 32-bit representation (signed or unsigned),
2518 // meanwhile not forced by lit64 modifier.
2519 if (Lit == LitModifier::Lit ||
2520 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2521 Val = static_cast<uint64_t>(Val) << 32;
2522 }
2523
2524 // For FP64 operands lit() specifies the high half of the value.
2525 if (Lit == LitModifier::Lit)
2526 Val = Hi_32(Val);
2527 break;
2528
2540 break;
2541
2543 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2544 Val <<= 32;
2545 break;
2546
2547 default:
2548 llvm_unreachable("invalid operand type");
2549 }
2550
2551 if (Lit != LitModifier::None) {
2552 Inst.addOperand(
2554 } else {
2556 }
2557}
2558
2559void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2560 MCOpIdx = Inst.getNumOperands();
2561 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2562}
2563
2564bool AMDGPUOperand::isInlineValue() const {
2565 return isRegKind() && ::isInlineValue(getReg());
2566}
2567
2568//===----------------------------------------------------------------------===//
2569// AsmParser
2570//===----------------------------------------------------------------------===//
2571
2572void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2573 // TODO: make those pre-defined variables read-only.
2574 // Currently there is none suitable machinery in the core llvm-mc for this.
2575 // MCSymbol::isRedefinable is intended for another purpose, and
2576 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2577 MCContext &Ctx = getContext();
2578 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2580}
2581
2582static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2583 if (Is == IS_VGPR) {
2584 switch (RegWidth) {
2585 default: return -1;
2586 case 32:
2587 return AMDGPU::VGPR_32RegClassID;
2588 case 64:
2589 return AMDGPU::VReg_64RegClassID;
2590 case 96:
2591 return AMDGPU::VReg_96RegClassID;
2592 case 128:
2593 return AMDGPU::VReg_128RegClassID;
2594 case 160:
2595 return AMDGPU::VReg_160RegClassID;
2596 case 192:
2597 return AMDGPU::VReg_192RegClassID;
2598 case 224:
2599 return AMDGPU::VReg_224RegClassID;
2600 case 256:
2601 return AMDGPU::VReg_256RegClassID;
2602 case 288:
2603 return AMDGPU::VReg_288RegClassID;
2604 case 320:
2605 return AMDGPU::VReg_320RegClassID;
2606 case 352:
2607 return AMDGPU::VReg_352RegClassID;
2608 case 384:
2609 return AMDGPU::VReg_384RegClassID;
2610 case 512:
2611 return AMDGPU::VReg_512RegClassID;
2612 case 1024:
2613 return AMDGPU::VReg_1024RegClassID;
2614 }
2615 } else if (Is == IS_TTMP) {
2616 switch (RegWidth) {
2617 default: return -1;
2618 case 32:
2619 return AMDGPU::TTMP_32RegClassID;
2620 case 64:
2621 return AMDGPU::TTMP_64RegClassID;
2622 case 128:
2623 return AMDGPU::TTMP_128RegClassID;
2624 case 256:
2625 return AMDGPU::TTMP_256RegClassID;
2626 case 512:
2627 return AMDGPU::TTMP_512RegClassID;
2628 }
2629 } else if (Is == IS_SGPR) {
2630 switch (RegWidth) {
2631 default: return -1;
2632 case 32:
2633 return AMDGPU::SGPR_32RegClassID;
2634 case 64:
2635 return AMDGPU::SGPR_64RegClassID;
2636 case 96:
2637 return AMDGPU::SGPR_96RegClassID;
2638 case 128:
2639 return AMDGPU::SGPR_128RegClassID;
2640 case 160:
2641 return AMDGPU::SGPR_160RegClassID;
2642 case 192:
2643 return AMDGPU::SGPR_192RegClassID;
2644 case 224:
2645 return AMDGPU::SGPR_224RegClassID;
2646 case 256:
2647 return AMDGPU::SGPR_256RegClassID;
2648 case 288:
2649 return AMDGPU::SGPR_288RegClassID;
2650 case 320:
2651 return AMDGPU::SGPR_320RegClassID;
2652 case 352:
2653 return AMDGPU::SGPR_352RegClassID;
2654 case 384:
2655 return AMDGPU::SGPR_384RegClassID;
2656 case 512:
2657 return AMDGPU::SGPR_512RegClassID;
2658 }
2659 } else if (Is == IS_AGPR) {
2660 switch (RegWidth) {
2661 default: return -1;
2662 case 32:
2663 return AMDGPU::AGPR_32RegClassID;
2664 case 64:
2665 return AMDGPU::AReg_64RegClassID;
2666 case 96:
2667 return AMDGPU::AReg_96RegClassID;
2668 case 128:
2669 return AMDGPU::AReg_128RegClassID;
2670 case 160:
2671 return AMDGPU::AReg_160RegClassID;
2672 case 192:
2673 return AMDGPU::AReg_192RegClassID;
2674 case 224:
2675 return AMDGPU::AReg_224RegClassID;
2676 case 256:
2677 return AMDGPU::AReg_256RegClassID;
2678 case 288:
2679 return AMDGPU::AReg_288RegClassID;
2680 case 320:
2681 return AMDGPU::AReg_320RegClassID;
2682 case 352:
2683 return AMDGPU::AReg_352RegClassID;
2684 case 384:
2685 return AMDGPU::AReg_384RegClassID;
2686 case 512:
2687 return AMDGPU::AReg_512RegClassID;
2688 case 1024:
2689 return AMDGPU::AReg_1024RegClassID;
2690 }
2691 }
2692 return -1;
2693}
2694
2697 .Case("exec", AMDGPU::EXEC)
2698 .Case("vcc", AMDGPU::VCC)
2699 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2700 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2701 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2702 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2703 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2704 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2705 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2706 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2707 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2708 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2709 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2710 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2711 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2712 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2713 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2714 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2715 .Case("m0", AMDGPU::M0)
2716 .Case("vccz", AMDGPU::SRC_VCCZ)
2717 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2718 .Case("execz", AMDGPU::SRC_EXECZ)
2719 .Case("src_execz", AMDGPU::SRC_EXECZ)
2720 .Case("scc", AMDGPU::SRC_SCC)
2721 .Case("src_scc", AMDGPU::SRC_SCC)
2722 .Case("tba", AMDGPU::TBA)
2723 .Case("tma", AMDGPU::TMA)
2724 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2725 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2726 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2727 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2728 .Case("vcc_lo", AMDGPU::VCC_LO)
2729 .Case("vcc_hi", AMDGPU::VCC_HI)
2730 .Case("exec_lo", AMDGPU::EXEC_LO)
2731 .Case("exec_hi", AMDGPU::EXEC_HI)
2732 .Case("tma_lo", AMDGPU::TMA_LO)
2733 .Case("tma_hi", AMDGPU::TMA_HI)
2734 .Case("tba_lo", AMDGPU::TBA_LO)
2735 .Case("tba_hi", AMDGPU::TBA_HI)
2736 .Case("pc", AMDGPU::PC_REG)
2737 .Case("null", AMDGPU::SGPR_NULL)
2738 .Default(AMDGPU::NoRegister);
2739}
2740
2741bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2742 SMLoc &EndLoc, bool RestoreOnFailure) {
2743 auto R = parseRegister();
2744 if (!R) return true;
2745 assert(R->isReg());
2746 RegNo = R->getReg();
2747 StartLoc = R->getStartLoc();
2748 EndLoc = R->getEndLoc();
2749 return false;
2750}
2751
2752bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2753 SMLoc &EndLoc) {
2754 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2755}
2756
2757ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2758 SMLoc &EndLoc) {
2759 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2760 bool PendingErrors = getParser().hasPendingError();
2761 getParser().clearPendingErrors();
2762 if (PendingErrors)
2763 return ParseStatus::Failure;
2764 if (Result)
2765 return ParseStatus::NoMatch;
2766 return ParseStatus::Success;
2767}
2768
2769bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2770 RegisterKind RegKind,
2771 MCRegister Reg1, SMLoc Loc) {
2772 switch (RegKind) {
2773 case IS_SPECIAL:
2774 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2775 Reg = AMDGPU::EXEC;
2776 RegWidth = 64;
2777 return true;
2778 }
2779 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2780 Reg = AMDGPU::FLAT_SCR;
2781 RegWidth = 64;
2782 return true;
2783 }
2784 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2785 Reg = AMDGPU::XNACK_MASK;
2786 RegWidth = 64;
2787 return true;
2788 }
2789 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2790 Reg = AMDGPU::VCC;
2791 RegWidth = 64;
2792 return true;
2793 }
2794 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2795 Reg = AMDGPU::TBA;
2796 RegWidth = 64;
2797 return true;
2798 }
2799 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2800 Reg = AMDGPU::TMA;
2801 RegWidth = 64;
2802 return true;
2803 }
2804 Error(Loc, "register does not fit in the list");
2805 return false;
2806 case IS_VGPR:
2807 case IS_SGPR:
2808 case IS_AGPR:
2809 case IS_TTMP:
2810 if (Reg1 != Reg + RegWidth / 32) {
2811 Error(Loc, "registers in a list must have consecutive indices");
2812 return false;
2813 }
2814 RegWidth += 32;
2815 return true;
2816 default:
2817 llvm_unreachable("unexpected register kind");
2818 }
2819}
2820
2821struct RegInfo {
2823 RegisterKind Kind;
2824};
2825
2826static constexpr RegInfo RegularRegisters[] = {
2827 {{"v"}, IS_VGPR},
2828 {{"s"}, IS_SGPR},
2829 {{"ttmp"}, IS_TTMP},
2830 {{"acc"}, IS_AGPR},
2831 {{"a"}, IS_AGPR},
2832};
2833
2834static bool isRegularReg(RegisterKind Kind) {
2835 return Kind == IS_VGPR ||
2836 Kind == IS_SGPR ||
2837 Kind == IS_TTMP ||
2838 Kind == IS_AGPR;
2839}
2840
2842 for (const RegInfo &Reg : RegularRegisters)
2843 if (Str.starts_with(Reg.Name))
2844 return &Reg;
2845 return nullptr;
2846}
2847
2848static bool getRegNum(StringRef Str, unsigned& Num) {
2849 return !Str.getAsInteger(10, Num);
2850}
2851
2852bool
2853AMDGPUAsmParser::isRegister(const AsmToken &Token,
2854 const AsmToken &NextToken) const {
2855
2856 // A list of consecutive registers: [s0,s1,s2,s3]
2857 if (Token.is(AsmToken::LBrac))
2858 return true;
2859
2860 if (!Token.is(AsmToken::Identifier))
2861 return false;
2862
2863 // A single register like s0 or a range of registers like s[0:1]
2864
2865 StringRef Str = Token.getString();
2866 const RegInfo *Reg = getRegularRegInfo(Str);
2867 if (Reg) {
2868 StringRef RegName = Reg->Name;
2869 StringRef RegSuffix = Str.substr(RegName.size());
2870 if (!RegSuffix.empty()) {
2871 RegSuffix.consume_back(".l");
2872 RegSuffix.consume_back(".h");
2873 unsigned Num;
2874 // A single register with an index: rXX
2875 if (getRegNum(RegSuffix, Num))
2876 return true;
2877 } else {
2878 // A range of registers: r[XX:YY].
2879 if (NextToken.is(AsmToken::LBrac))
2880 return true;
2881 }
2882 }
2883
2884 return getSpecialRegForName(Str).isValid();
2885}
2886
2887bool
2888AMDGPUAsmParser::isRegister()
2889{
2890 return isRegister(getToken(), peekToken());
2891}
2892
2893MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2894 unsigned SubReg, unsigned RegWidth,
2895 SMLoc Loc) {
2896 assert(isRegularReg(RegKind));
2897
2898 unsigned AlignSize = 1;
2899 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2900 // SGPR and TTMP registers must be aligned.
2901 // Max required alignment is 4 dwords.
2902 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2903 }
2904
2905 if (RegNum % AlignSize != 0) {
2906 Error(Loc, "invalid register alignment");
2907 return MCRegister();
2908 }
2909
2910 unsigned RegIdx = RegNum / AlignSize;
2911 int RCID = getRegClass(RegKind, RegWidth);
2912 if (RCID == -1) {
2913 Error(Loc, "invalid or unsupported register size");
2914 return MCRegister();
2915 }
2916
2917 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2918 const MCRegisterClass RC = TRI->getRegClass(RCID);
2919 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2920 Error(Loc, "register index is out of range");
2921 return AMDGPU::NoRegister;
2922 }
2923
2924 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2925 Error(Loc, "register index is out of range");
2926 return MCRegister();
2927 }
2928
2929 MCRegister Reg = RC.getRegister(RegIdx);
2930
2931 if (SubReg) {
2932 Reg = TRI->getSubReg(Reg, SubReg);
2933
2934 // Currently all regular registers have their .l and .h subregisters, so
2935 // we should never need to generate an error here.
2936 assert(Reg && "Invalid subregister!");
2937 }
2938
2939 return Reg;
2940}
2941
2942bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2943 unsigned &SubReg) {
2944 int64_t RegLo, RegHi;
2945 if (!skipToken(AsmToken::LBrac, "missing register index"))
2946 return false;
2947
2948 SMLoc FirstIdxLoc = getLoc();
2949 SMLoc SecondIdxLoc;
2950
2951 if (!parseExpr(RegLo))
2952 return false;
2953
2954 if (trySkipToken(AsmToken::Colon)) {
2955 SecondIdxLoc = getLoc();
2956 if (!parseExpr(RegHi))
2957 return false;
2958 } else {
2959 RegHi = RegLo;
2960 }
2961
2962 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2963 return false;
2964
2965 if (!isUInt<32>(RegLo)) {
2966 Error(FirstIdxLoc, "invalid register index");
2967 return false;
2968 }
2969
2970 if (!isUInt<32>(RegHi)) {
2971 Error(SecondIdxLoc, "invalid register index");
2972 return false;
2973 }
2974
2975 if (RegLo > RegHi) {
2976 Error(FirstIdxLoc, "first register index should not exceed second index");
2977 return false;
2978 }
2979
2980 if (RegHi == RegLo) {
2981 StringRef RegSuffix = getTokenStr();
2982 if (RegSuffix == ".l") {
2983 SubReg = AMDGPU::lo16;
2984 lex();
2985 } else if (RegSuffix == ".h") {
2986 SubReg = AMDGPU::hi16;
2987 lex();
2988 }
2989 }
2990
2991 Num = static_cast<unsigned>(RegLo);
2992 RegWidth = 32 * ((RegHi - RegLo) + 1);
2993
2994 return true;
2995}
2996
2997MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2998 unsigned &RegNum,
2999 unsigned &RegWidth,
3000 SmallVectorImpl<AsmToken> &Tokens) {
3001 assert(isToken(AsmToken::Identifier));
3002 MCRegister Reg = getSpecialRegForName(getTokenStr());
3003 if (Reg) {
3004 RegNum = 0;
3005 RegWidth = 32;
3006 RegKind = IS_SPECIAL;
3007 Tokens.push_back(getToken());
3008 lex(); // skip register name
3009 }
3010 return Reg;
3011}
3012
3013MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3014 unsigned &RegNum,
3015 unsigned &RegWidth,
3016 SmallVectorImpl<AsmToken> &Tokens) {
3017 assert(isToken(AsmToken::Identifier));
3018 StringRef RegName = getTokenStr();
3019 auto Loc = getLoc();
3020
3021 const RegInfo *RI = getRegularRegInfo(RegName);
3022 if (!RI) {
3023 Error(Loc, "invalid register name");
3024 return MCRegister();
3025 }
3026
3027 Tokens.push_back(getToken());
3028 lex(); // skip register name
3029
3030 RegKind = RI->Kind;
3031 StringRef RegSuffix = RegName.substr(RI->Name.size());
3032 unsigned SubReg = NoSubRegister;
3033 if (!RegSuffix.empty()) {
3034 if (RegSuffix.consume_back(".l"))
3035 SubReg = AMDGPU::lo16;
3036 else if (RegSuffix.consume_back(".h"))
3037 SubReg = AMDGPU::hi16;
3038
3039 // Single 32-bit register: vXX.
3040 if (!getRegNum(RegSuffix, RegNum)) {
3041 Error(Loc, "invalid register index");
3042 return MCRegister();
3043 }
3044 RegWidth = 32;
3045 } else {
3046 // Range of registers: v[XX:YY]. ":YY" is optional.
3047 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3048 return MCRegister();
3049 }
3050
3051 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3052}
3053
3054MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3055 unsigned &RegNum, unsigned &RegWidth,
3056 SmallVectorImpl<AsmToken> &Tokens) {
3057 MCRegister Reg;
3058 auto ListLoc = getLoc();
3059
3060 if (!skipToken(AsmToken::LBrac,
3061 "expected a register or a list of registers")) {
3062 return MCRegister();
3063 }
3064
3065 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3066
3067 auto Loc = getLoc();
3068 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3069 return MCRegister();
3070 if (RegWidth != 32) {
3071 Error(Loc, "expected a single 32-bit register");
3072 return MCRegister();
3073 }
3074
3075 for (; trySkipToken(AsmToken::Comma); ) {
3076 RegisterKind NextRegKind;
3077 MCRegister NextReg;
3078 unsigned NextRegNum, NextRegWidth;
3079 Loc = getLoc();
3080
3081 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3082 NextRegNum, NextRegWidth,
3083 Tokens)) {
3084 return MCRegister();
3085 }
3086 if (NextRegWidth != 32) {
3087 Error(Loc, "expected a single 32-bit register");
3088 return MCRegister();
3089 }
3090 if (NextRegKind != RegKind) {
3091 Error(Loc, "registers in a list must be of the same kind");
3092 return MCRegister();
3093 }
3094 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3095 return MCRegister();
3096 }
3097
3098 if (!skipToken(AsmToken::RBrac,
3099 "expected a comma or a closing square bracket")) {
3100 return MCRegister();
3101 }
3102
3103 if (isRegularReg(RegKind))
3104 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3105
3106 return Reg;
3107}
3108
3109bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3110 MCRegister &Reg, unsigned &RegNum,
3111 unsigned &RegWidth,
3112 SmallVectorImpl<AsmToken> &Tokens) {
3113 auto Loc = getLoc();
3114 Reg = MCRegister();
3115
3116 if (isToken(AsmToken::Identifier)) {
3117 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3118 if (!Reg)
3119 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3120 } else {
3121 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3122 }
3123
3124 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3125 if (!Reg) {
3126 assert(Parser.hasPendingError());
3127 return false;
3128 }
3129
3130 if (!subtargetHasRegister(*TRI, Reg)) {
3131 if (Reg == AMDGPU::SGPR_NULL) {
3132 Error(Loc, "'null' operand is not supported on this GPU");
3133 } else {
3135 " register not available on this GPU");
3136 }
3137 return false;
3138 }
3139
3140 return true;
3141}
3142
3143bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3144 MCRegister &Reg, unsigned &RegNum,
3145 unsigned &RegWidth,
3146 bool RestoreOnFailure /*=false*/) {
3147 Reg = MCRegister();
3148
3150 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3151 if (RestoreOnFailure) {
3152 while (!Tokens.empty()) {
3153 getLexer().UnLex(Tokens.pop_back_val());
3154 }
3155 }
3156 return true;
3157 }
3158 return false;
3159}
3160
3161std::optional<StringRef>
3162AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3163 switch (RegKind) {
3164 case IS_VGPR:
3165 return StringRef(".amdgcn.next_free_vgpr");
3166 case IS_SGPR:
3167 return StringRef(".amdgcn.next_free_sgpr");
3168 default:
3169 return std::nullopt;
3170 }
3171}
3172
3173void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3174 auto SymbolName = getGprCountSymbolName(RegKind);
3175 assert(SymbolName && "initializing invalid register kind");
3176 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3178 Sym->setRedefinable(true);
3179}
3180
3181bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3182 unsigned DwordRegIndex,
3183 unsigned RegWidth) {
3184 // Symbols are only defined for GCN targets
3185 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3186 return true;
3187
3188 auto SymbolName = getGprCountSymbolName(RegKind);
3189 if (!SymbolName)
3190 return true;
3191 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3192
3193 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3194 int64_t OldCount;
3195
3196 if (!Sym->isVariable())
3197 return !Error(getLoc(),
3198 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3199 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3200 return !Error(
3201 getLoc(),
3202 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3203
3204 if (OldCount <= NewMax)
3206
3207 return true;
3208}
3209
3210std::unique_ptr<AMDGPUOperand>
3211AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3212 const auto &Tok = getToken();
3213 SMLoc StartLoc = Tok.getLoc();
3214 SMLoc EndLoc = Tok.getEndLoc();
3215 RegisterKind RegKind;
3216 MCRegister Reg;
3217 unsigned RegNum, RegWidth;
3218
3219 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3220 return nullptr;
3221 }
3222 if (isHsaAbi(getSTI())) {
3223 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3224 return nullptr;
3225 } else
3226 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3227 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3228}
3229
3230ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3231 bool HasSP3AbsModifier, LitModifier Lit) {
3232 // TODO: add syntactic sugar for 1/(2*PI)
3233
3234 if (isRegister() || isModifier())
3235 return ParseStatus::NoMatch;
3236
3237 if (Lit == LitModifier::None) {
3238 if (trySkipId("lit"))
3239 Lit = LitModifier::Lit;
3240 else if (trySkipId("lit64"))
3241 Lit = LitModifier::Lit64;
3242
3243 if (Lit != LitModifier::None) {
3244 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3245 return ParseStatus::Failure;
3246 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3247 if (S.isSuccess() &&
3248 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3249 return ParseStatus::Failure;
3250 return S;
3251 }
3252 }
3253
3254 const auto& Tok = getToken();
3255 const auto& NextTok = peekToken();
3256 bool IsReal = Tok.is(AsmToken::Real);
3257 SMLoc S = getLoc();
3258 bool Negate = false;
3259
3260 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3261 lex();
3262 IsReal = true;
3263 Negate = true;
3264 }
3265
3266 AMDGPUOperand::Modifiers Mods;
3267 Mods.Lit = Lit;
3268
3269 if (IsReal) {
3270 // Floating-point expressions are not supported.
3271 // Can only allow floating-point literals with an
3272 // optional sign.
3273
3274 StringRef Num = getTokenStr();
3275 lex();
3276
3277 APFloat RealVal(APFloat::IEEEdouble());
3278 auto roundMode = APFloat::rmNearestTiesToEven;
3279 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3280 return ParseStatus::Failure;
3281 if (Negate)
3282 RealVal.changeSign();
3283
3284 Operands.push_back(
3285 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3286 AMDGPUOperand::ImmTyNone, true));
3287 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3288 Op.setModifiers(Mods);
3289
3290 return ParseStatus::Success;
3291
3292 } else {
3293 int64_t IntVal;
3294 const MCExpr *Expr;
3295 SMLoc S = getLoc();
3296
3297 if (HasSP3AbsModifier) {
3298 // This is a workaround for handling expressions
3299 // as arguments of SP3 'abs' modifier, for example:
3300 // |1.0|
3301 // |-1|
3302 // |1+x|
3303 // This syntax is not compatible with syntax of standard
3304 // MC expressions (due to the trailing '|').
3305 SMLoc EndLoc;
3306 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3307 return ParseStatus::Failure;
3308 } else {
3309 if (Parser.parseExpression(Expr))
3310 return ParseStatus::Failure;
3311 }
3312
3313 if (Expr->evaluateAsAbsolute(IntVal)) {
3314 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3315 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3316 Op.setModifiers(Mods);
3317 } else {
3318 if (Lit != LitModifier::None)
3319 return ParseStatus::NoMatch;
3320 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3321 }
3322
3323 return ParseStatus::Success;
3324 }
3325
3326 return ParseStatus::NoMatch;
3327}
3328
3329ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3330 if (!isRegister())
3331 return ParseStatus::NoMatch;
3332
3333 if (auto R = parseRegister()) {
3334 assert(R->isReg());
3335 Operands.push_back(std::move(R));
3336 return ParseStatus::Success;
3337 }
3338 return ParseStatus::Failure;
3339}
3340
3341ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3342 bool HasSP3AbsMod, LitModifier Lit) {
3343 ParseStatus Res = parseReg(Operands);
3344 if (!Res.isNoMatch())
3345 return Res;
3346 if (isModifier())
3347 return ParseStatus::NoMatch;
3348 return parseImm(Operands, HasSP3AbsMod, Lit);
3349}
3350
3351bool
3352AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3353 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3354 const auto &str = Token.getString();
3355 return str == "abs" || str == "neg" || str == "sext";
3356 }
3357 return false;
3358}
3359
3360bool
3361AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3362 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3363}
3364
3365bool
3366AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3367 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3368}
3369
3370bool
3371AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3372 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3373}
3374
3375// Check if this is an operand modifier or an opcode modifier
3376// which may look like an expression but it is not. We should
3377// avoid parsing these modifiers as expressions. Currently
3378// recognized sequences are:
3379// |...|
3380// abs(...)
3381// neg(...)
3382// sext(...)
3383// -reg
3384// -|...|
3385// -abs(...)
3386// name:...
3387//
3388bool
3389AMDGPUAsmParser::isModifier() {
3390
3391 AsmToken Tok = getToken();
3392 AsmToken NextToken[2];
3393 peekTokens(NextToken);
3394
3395 return isOperandModifier(Tok, NextToken[0]) ||
3396 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3397 isOpcodeModifierWithVal(Tok, NextToken[0]);
3398}
3399
3400// Check if the current token is an SP3 'neg' modifier.
3401// Currently this modifier is allowed in the following context:
3402//
3403// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3404// 2. Before an 'abs' modifier: -abs(...)
3405// 3. Before an SP3 'abs' modifier: -|...|
3406//
3407// In all other cases "-" is handled as a part
3408// of an expression that follows the sign.
3409//
3410// Note: When "-" is followed by an integer literal,
3411// this is interpreted as integer negation rather
3412// than a floating-point NEG modifier applied to N.
3413// Beside being contr-intuitive, such use of floating-point
3414// NEG modifier would have resulted in different meaning
3415// of integer literals used with VOP1/2/C and VOP3,
3416// for example:
3417// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3418// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3419// Negative fp literals with preceding "-" are
3420// handled likewise for uniformity
3421//
3422bool
3423AMDGPUAsmParser::parseSP3NegModifier() {
3424
3425 AsmToken NextToken[2];
3426 peekTokens(NextToken);
3427
3428 if (isToken(AsmToken::Minus) &&
3429 (isRegister(NextToken[0], NextToken[1]) ||
3430 NextToken[0].is(AsmToken::Pipe) ||
3431 isId(NextToken[0], "abs"))) {
3432 lex();
3433 return true;
3434 }
3435
3436 return false;
3437}
3438
3439ParseStatus
3440AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3441 bool AllowImm) {
3442 bool Neg, SP3Neg;
3443 bool Abs, SP3Abs;
3444 SMLoc Loc;
3445
3446 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3447 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3448 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3449
3450 SP3Neg = parseSP3NegModifier();
3451
3452 Loc = getLoc();
3453 Neg = trySkipId("neg");
3454 if (Neg && SP3Neg)
3455 return Error(Loc, "expected register or immediate");
3456 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3457 return ParseStatus::Failure;
3458
3459 Abs = trySkipId("abs");
3460 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3461 return ParseStatus::Failure;
3462
3463 LitModifier Lit = LitModifier::None;
3464 if (trySkipId("lit")) {
3465 Lit = LitModifier::Lit;
3466 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3467 return ParseStatus::Failure;
3468 } else if (trySkipId("lit64")) {
3469 Lit = LitModifier::Lit64;
3470 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3471 return ParseStatus::Failure;
3472 if (!has64BitLiterals())
3473 return Error(Loc, "lit64 is not supported on this GPU");
3474 }
3475
3476 Loc = getLoc();
3477 SP3Abs = trySkipToken(AsmToken::Pipe);
3478 if (Abs && SP3Abs)
3479 return Error(Loc, "expected register or immediate");
3480
3481 ParseStatus Res;
3482 if (AllowImm) {
3483 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3484 } else {
3485 Res = parseReg(Operands);
3486 }
3487 if (!Res.isSuccess())
3488 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3490 : Res;
3491
3492 if (Lit != LitModifier::None && !Operands.back()->isImm())
3493 Error(Loc, "expected immediate with lit modifier");
3494
3495 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3496 return ParseStatus::Failure;
3497 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3498 return ParseStatus::Failure;
3499 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3500 return ParseStatus::Failure;
3501 if (Lit != LitModifier::None &&
3502 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3503 return ParseStatus::Failure;
3504
3505 AMDGPUOperand::Modifiers Mods;
3506 Mods.Abs = Abs || SP3Abs;
3507 Mods.Neg = Neg || SP3Neg;
3508 Mods.Lit = Lit;
3509
3510 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3511 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3512 if (Op.isExpr())
3513 return Error(Op.getStartLoc(), "expected an absolute expression");
3514 Op.setModifiers(Mods);
3515 }
3516 return ParseStatus::Success;
3517}
3518
3519ParseStatus
3520AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3521 bool AllowImm) {
3522 bool Sext = trySkipId("sext");
3523 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3524 return ParseStatus::Failure;
3525
3526 ParseStatus Res;
3527 if (AllowImm) {
3528 Res = parseRegOrImm(Operands);
3529 } else {
3530 Res = parseReg(Operands);
3531 }
3532 if (!Res.isSuccess())
3533 return Sext ? ParseStatus::Failure : Res;
3534
3535 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3536 return ParseStatus::Failure;
3537
3538 AMDGPUOperand::Modifiers Mods;
3539 Mods.Sext = Sext;
3540
3541 if (Mods.hasIntModifiers()) {
3542 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3543 if (Op.isExpr())
3544 return Error(Op.getStartLoc(), "expected an absolute expression");
3545 Op.setModifiers(Mods);
3546 }
3547
3548 return ParseStatus::Success;
3549}
3550
3551ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3552 return parseRegOrImmWithFPInputMods(Operands, false);
3553}
3554
3555ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3556 return parseRegOrImmWithIntInputMods(Operands, false);
3557}
3558
3559ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3560 auto Loc = getLoc();
3561 if (trySkipId("off")) {
3562 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3563 AMDGPUOperand::ImmTyOff, false));
3564 return ParseStatus::Success;
3565 }
3566
3567 if (!isRegister())
3568 return ParseStatus::NoMatch;
3569
3570 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3571 if (Reg) {
3572 Operands.push_back(std::move(Reg));
3573 return ParseStatus::Success;
3574 }
3575
3576 return ParseStatus::Failure;
3577}
3578
3579unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3580 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3581
3582 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3583 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3584 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3585 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3586 return Match_InvalidOperand;
3587
3588 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3589 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3590 // v_mac_f32/16 allow only dst_sel == DWORD;
3591 auto OpNum =
3592 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3593 const auto &Op = Inst.getOperand(OpNum);
3594 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3595 return Match_InvalidOperand;
3596 }
3597 }
3598
3599 // Asm can first try to match VOPD or VOPD3. By failing early here with
3600 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3601 // Checking later during validateInstruction does not give a chance to retry
3602 // parsing as a different encoding.
3603 if (tryAnotherVOPDEncoding(Inst))
3604 return Match_InvalidOperand;
3605
3606 return Match_Success;
3607}
3608
3618
3619// What asm variants we should check
3620ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3621 if (isForcedDPP() && isForcedVOP3()) {
3622 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3623 return ArrayRef(Variants);
3624 }
3625 if (getForcedEncodingSize() == 32) {
3626 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3627 return ArrayRef(Variants);
3628 }
3629
3630 if (isForcedVOP3()) {
3631 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3632 return ArrayRef(Variants);
3633 }
3634
3635 if (isForcedSDWA()) {
3636 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3638 return ArrayRef(Variants);
3639 }
3640
3641 if (isForcedDPP()) {
3642 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3643 return ArrayRef(Variants);
3644 }
3645
3646 return getAllVariants();
3647}
3648
3649StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3650 if (isForcedDPP() && isForcedVOP3())
3651 return "e64_dpp";
3652
3653 if (getForcedEncodingSize() == 32)
3654 return "e32";
3655
3656 if (isForcedVOP3())
3657 return "e64";
3658
3659 if (isForcedSDWA())
3660 return "sdwa";
3661
3662 if (isForcedDPP())
3663 return "dpp";
3664
3665 return "";
3666}
3667
3668unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3669 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3670 for (MCPhysReg Reg : Desc.implicit_uses()) {
3671 switch (Reg) {
3672 case AMDGPU::FLAT_SCR:
3673 case AMDGPU::VCC:
3674 case AMDGPU::VCC_LO:
3675 case AMDGPU::VCC_HI:
3676 case AMDGPU::M0:
3677 return Reg;
3678 default:
3679 break;
3680 }
3681 }
3682 return AMDGPU::NoRegister;
3683}
3684
3685// NB: This code is correct only when used to check constant
3686// bus limitations because GFX7 support no f16 inline constants.
3687// Note that there are no cases when a GFX7 opcode violates
3688// constant bus limitations due to the use of an f16 constant.
3689bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3690 unsigned OpIdx) const {
3691 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3692
3695 return false;
3696 }
3697
3698 const MCOperand &MO = Inst.getOperand(OpIdx);
3699
3700 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3701 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3702
3703 switch (OpSize) { // expected operand size
3704 case 8:
3705 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3706 case 4:
3707 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3708 case 2: {
3709 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3712 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3713
3717
3721
3725
3728 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3729
3732 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3733
3735 return false;
3736
3737 llvm_unreachable("invalid operand type");
3738 }
3739 default:
3740 llvm_unreachable("invalid operand size");
3741 }
3742}
3743
3744unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3745 if (!isGFX10Plus())
3746 return 1;
3747
3748 switch (Opcode) {
3749 // 64-bit shift instructions can use only one scalar value input
3750 case AMDGPU::V_LSHLREV_B64_e64:
3751 case AMDGPU::V_LSHLREV_B64_gfx10:
3752 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3753 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3754 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3755 case AMDGPU::V_LSHRREV_B64_e64:
3756 case AMDGPU::V_LSHRREV_B64_gfx10:
3757 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3758 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3759 case AMDGPU::V_ASHRREV_I64_e64:
3760 case AMDGPU::V_ASHRREV_I64_gfx10:
3761 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3762 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3763 case AMDGPU::V_LSHL_B64_e64:
3764 case AMDGPU::V_LSHR_B64_e64:
3765 case AMDGPU::V_ASHR_I64_e64:
3766 return 1;
3767 default:
3768 return 2;
3769 }
3770}
3771
3772constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3774
3775// Get regular operand indices in the same order as specified
3776// in the instruction (but append mandatory literals to the end).
3778 bool AddMandatoryLiterals = false) {
3779
3780 int16_t ImmIdx =
3781 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3782
3783 if (isVOPD(Opcode)) {
3784 int16_t ImmXIdx =
3785 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3786
3787 return {getNamedOperandIdx(Opcode, OpName::src0X),
3788 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3789 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3790 getNamedOperandIdx(Opcode, OpName::src0Y),
3791 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3792 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3793 ImmXIdx,
3794 ImmIdx};
3795 }
3796
3797 return {getNamedOperandIdx(Opcode, OpName::src0),
3798 getNamedOperandIdx(Opcode, OpName::src1),
3799 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3800}
3801
3802bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3803 const MCOperand &MO = Inst.getOperand(OpIdx);
3804 if (MO.isImm())
3805 return !isInlineConstant(Inst, OpIdx);
3806 if (MO.isReg()) {
3807 auto Reg = MO.getReg();
3808 if (!Reg)
3809 return false;
3810 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3811 auto PReg = mc2PseudoReg(Reg);
3812 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3813 }
3814 return true;
3815}
3816
3817// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3818// Writelane is special in that it can use SGPR and M0 (which would normally
3819// count as using the constant bus twice - but in this case it is allowed since
3820// the lane selector doesn't count as a use of the constant bus). However, it is
3821// still required to abide by the 1 SGPR rule.
3822static bool checkWriteLane(const MCInst &Inst) {
3823 const unsigned Opcode = Inst.getOpcode();
3824 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3825 return false;
3826 const MCOperand &LaneSelOp = Inst.getOperand(2);
3827 if (!LaneSelOp.isReg())
3828 return false;
3829 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3830 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3831}
3832
3833bool AMDGPUAsmParser::validateConstantBusLimitations(
3834 const MCInst &Inst, const OperandVector &Operands) {
3835 const unsigned Opcode = Inst.getOpcode();
3836 const MCInstrDesc &Desc = MII.get(Opcode);
3837 MCRegister LastSGPR;
3838 unsigned ConstantBusUseCount = 0;
3839 unsigned NumLiterals = 0;
3840 unsigned LiteralSize;
3841
3842 if (!(Desc.TSFlags &
3845 !isVOPD(Opcode))
3846 return true;
3847
3848 if (checkWriteLane(Inst))
3849 return true;
3850
3851 // Check special imm operands (used by madmk, etc)
3852 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3853 ++NumLiterals;
3854 LiteralSize = 4;
3855 }
3856
3857 SmallDenseSet<unsigned> SGPRsUsed;
3858 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3859 if (SGPRUsed != AMDGPU::NoRegister) {
3860 SGPRsUsed.insert(SGPRUsed);
3861 ++ConstantBusUseCount;
3862 }
3863
3864 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3865
3866 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3867
3868 for (int OpIdx : OpIndices) {
3869 if (OpIdx == -1)
3870 continue;
3871
3872 const MCOperand &MO = Inst.getOperand(OpIdx);
3873 if (usesConstantBus(Inst, OpIdx)) {
3874 if (MO.isReg()) {
3875 LastSGPR = mc2PseudoReg(MO.getReg());
3876 // Pairs of registers with a partial intersections like these
3877 // s0, s[0:1]
3878 // flat_scratch_lo, flat_scratch
3879 // flat_scratch_lo, flat_scratch_hi
3880 // are theoretically valid but they are disabled anyway.
3881 // Note that this code mimics SIInstrInfo::verifyInstruction
3882 if (SGPRsUsed.insert(LastSGPR).second) {
3883 ++ConstantBusUseCount;
3884 }
3885 } else { // Expression or a literal
3886
3887 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3888 continue; // special operand like VINTERP attr_chan
3889
3890 // An instruction may use only one literal.
3891 // This has been validated on the previous step.
3892 // See validateVOPLiteral.
3893 // This literal may be used as more than one operand.
3894 // If all these operands are of the same size,
3895 // this literal counts as one scalar value.
3896 // Otherwise it counts as 2 scalar values.
3897 // See "GFX10 Shader Programming", section 3.6.2.3.
3898
3900 if (Size < 4)
3901 Size = 4;
3902
3903 if (NumLiterals == 0) {
3904 NumLiterals = 1;
3905 LiteralSize = Size;
3906 } else if (LiteralSize != Size) {
3907 NumLiterals = 2;
3908 }
3909 }
3910 }
3911
3912 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3913 Error(getOperandLoc(Operands, OpIdx),
3914 "invalid operand (violates constant bus restrictions)");
3915 return false;
3916 }
3917 }
3918 return true;
3919}
3920
3921std::optional<unsigned>
3922AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3923
3924 const unsigned Opcode = Inst.getOpcode();
3925 if (!isVOPD(Opcode))
3926 return {};
3927
3928 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3929
3930 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3931 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3932 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3933 ? Opr.getReg()
3934 : MCRegister();
3935 };
3936
3937 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3938 // source-cache.
3939 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3940 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3941 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3942 bool AllowSameVGPR = isGFX1250();
3943
3944 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3945 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3946 int I = getNamedOperandIdx(Opcode, OpName);
3947 const MCOperand &Op = Inst.getOperand(I);
3948 if (!Op.isImm())
3949 continue;
3950 int64_t Imm = Op.getImm();
3951 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3952 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3953 return (unsigned)I;
3954 }
3955
3956 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3957 OpName::vsrc2Y, OpName::imm}) {
3958 int I = getNamedOperandIdx(Opcode, OpName);
3959 if (I == -1)
3960 continue;
3961 const MCOperand &Op = Inst.getOperand(I);
3962 if (Op.isImm())
3963 return (unsigned)I;
3964 }
3965 }
3966
3967 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3968 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3969 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3970
3971 return InvalidCompOprIdx;
3972}
3973
3974bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3975 const OperandVector &Operands) {
3976
3977 unsigned Opcode = Inst.getOpcode();
3978 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3979
3980 if (AsVOPD3) {
3981 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3982 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3983 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3984 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3985 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3986 }
3987 }
3988
3989 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3990 if (!InvalidCompOprIdx.has_value())
3991 return true;
3992
3993 auto CompOprIdx = *InvalidCompOprIdx;
3994 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3995 auto ParsedIdx =
3996 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3997 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3998 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3999
4000 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4001 if (CompOprIdx == VOPD::Component::DST) {
4002 if (AsVOPD3)
4003 Error(Loc, "dst registers must be distinct");
4004 else
4005 Error(Loc, "one dst register must be even and the other odd");
4006 } else {
4007 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4008 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4009 " operands must use different VGPR banks");
4010 }
4011
4012 return false;
4013}
4014
4015// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4016// potentially used as VOPD3 with the same operands.
4017bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4018 // First check if it fits VOPD
4019 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4020 if (!InvalidCompOprIdx.has_value())
4021 return false;
4022
4023 // Then if it fits VOPD3
4024 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4025 if (InvalidCompOprIdx.has_value()) {
4026 // If failed operand is dst it is better to show error about VOPD3
4027 // instruction as it has more capabilities and error message will be
4028 // more informative. If the dst is not legal for VOPD3, then it is not
4029 // legal for VOPD either.
4030 if (*InvalidCompOprIdx == VOPD::Component::DST)
4031 return true;
4032
4033 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4034 // with a conflict in tied implicit src2 of fmac and no asm operand to
4035 // to point to.
4036 return false;
4037 }
4038 return true;
4039}
4040
4041// \returns true is a VOPD3 instruction can be also represented as a shorter
4042// VOPD encoding.
4043bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4044 const unsigned Opcode = Inst.getOpcode();
4045 const auto &II = getVOPDInstInfo(Opcode, &MII);
4046 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4047 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4048 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4049 return false;
4050
4051 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4052 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4053 // be parsed as VOPD which does not accept src2.
4054 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4055 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4056 return false;
4057
4058 // If any modifiers are set this cannot be VOPD.
4059 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4060 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4061 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4062 int I = getNamedOperandIdx(Opcode, OpName);
4063 if (I == -1)
4064 continue;
4065 if (Inst.getOperand(I).getImm())
4066 return false;
4067 }
4068
4069 return !tryVOPD3(Inst);
4070}
4071
4072// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4073// form but switch to VOPD3 otherwise.
4074bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4075 const unsigned Opcode = Inst.getOpcode();
4076 if (!isGFX1250() || !isVOPD(Opcode))
4077 return false;
4078
4079 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4080 return tryVOPD(Inst);
4081 return tryVOPD3(Inst);
4082}
4083
4084bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4085
4086 const unsigned Opc = Inst.getOpcode();
4087 const MCInstrDesc &Desc = MII.get(Opc);
4088
4089 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4090 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4091 assert(ClampIdx != -1);
4092 return Inst.getOperand(ClampIdx).getImm() == 0;
4093 }
4094
4095 return true;
4096}
4097
4100
4101bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4102
4103 const unsigned Opc = Inst.getOpcode();
4104 const MCInstrDesc &Desc = MII.get(Opc);
4105
4106 if ((Desc.TSFlags & MIMGFlags) == 0)
4107 return true;
4108
4109 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4110 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4111 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4112
4113 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4114 return true;
4115
4116 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4117 return true;
4118
4119 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4120 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4121 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4122 if (DMask == 0)
4123 DMask = 1;
4124
4125 bool IsPackedD16 = false;
4126 unsigned DataSize =
4127 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4128 if (hasPackedD16()) {
4129 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4130 IsPackedD16 = D16Idx >= 0;
4131 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4132 DataSize = (DataSize + 1) / 2;
4133 }
4134
4135 if ((VDataSize / 4) == DataSize + TFESize)
4136 return true;
4137
4138 StringRef Modifiers;
4139 if (isGFX90A())
4140 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4141 else
4142 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4143
4144 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4145 return false;
4146}
4147
4148bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4149 const unsigned Opc = Inst.getOpcode();
4150 const MCInstrDesc &Desc = MII.get(Opc);
4151
4152 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4153 return true;
4154
4155 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4156
4157 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4159 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4160 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4161 ? AMDGPU::OpName::srsrc
4162 : AMDGPU::OpName::rsrc;
4163 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4164 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4165 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4166
4167 assert(VAddr0Idx != -1);
4168 assert(SrsrcIdx != -1);
4169 assert(SrsrcIdx > VAddr0Idx);
4170
4171 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4172 if (BaseOpcode->BVH) {
4173 if (IsA16 == BaseOpcode->A16)
4174 return true;
4175 Error(IDLoc, "image address size does not match a16");
4176 return false;
4177 }
4178
4179 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4180 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4181 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4182 unsigned ActualAddrSize =
4183 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4184
4185 unsigned ExpectedAddrSize =
4186 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4187
4188 if (IsNSA) {
4189 if (hasPartialNSAEncoding() &&
4190 ExpectedAddrSize >
4192 int VAddrLastIdx = SrsrcIdx - 1;
4193 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4194
4195 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4196 }
4197 } else {
4198 if (ExpectedAddrSize > 12)
4199 ExpectedAddrSize = 16;
4200
4201 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4202 // This provides backward compatibility for assembly created
4203 // before 160b/192b/224b types were directly supported.
4204 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4205 return true;
4206 }
4207
4208 if (ActualAddrSize == ExpectedAddrSize)
4209 return true;
4210
4211 Error(IDLoc, "image address size does not match dim and a16");
4212 return false;
4213}
4214
4215bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4216
4217 const unsigned Opc = Inst.getOpcode();
4218 const MCInstrDesc &Desc = MII.get(Opc);
4219
4220 if ((Desc.TSFlags & MIMGFlags) == 0)
4221 return true;
4222 if (!Desc.mayLoad() || !Desc.mayStore())
4223 return true; // Not atomic
4224
4225 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4226 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4227
4228 // This is an incomplete check because image_atomic_cmpswap
4229 // may only use 0x3 and 0xf while other atomic operations
4230 // may use 0x1 and 0x3. However these limitations are
4231 // verified when we check that dmask matches dst size.
4232 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4233}
4234
4235bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4236
4237 const unsigned Opc = Inst.getOpcode();
4238 const MCInstrDesc &Desc = MII.get(Opc);
4239
4240 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4241 return true;
4242
4243 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4244 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4245
4246 // GATHER4 instructions use dmask in a different fashion compared to
4247 // other MIMG instructions. The only useful DMASK values are
4248 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4249 // (red,red,red,red) etc.) The ISA document doesn't mention
4250 // this.
4251 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4252}
4253
4254bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4255 const OperandVector &Operands) {
4256 if (!isGFX10Plus())
4257 return true;
4258
4259 const unsigned Opc = Inst.getOpcode();
4260 const MCInstrDesc &Desc = MII.get(Opc);
4261
4262 if ((Desc.TSFlags & MIMGFlags) == 0)
4263 return true;
4264
4265 // image_bvh_intersect_ray instructions do not have dim
4267 return true;
4268
4269 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4270 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4271 if (Op.isDim())
4272 return true;
4273 }
4274 return false;
4275}
4276
4277bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4278 const unsigned Opc = Inst.getOpcode();
4279 const MCInstrDesc &Desc = MII.get(Opc);
4280
4281 if ((Desc.TSFlags & MIMGFlags) == 0)
4282 return true;
4283
4284 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4285 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4287
4288 if (!BaseOpcode->MSAA)
4289 return true;
4290
4291 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4292 assert(DimIdx != -1);
4293
4294 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4295 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4296
4297 return DimInfo->MSAA;
4298}
4299
4300static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4301{
4302 switch (Opcode) {
4303 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4304 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4305 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4306 return true;
4307 default:
4308 return false;
4309 }
4310}
4311
4312// movrels* opcodes should only allow VGPRS as src0.
4313// This is specified in .td description for vop1/vop3,
4314// but sdwa is handled differently. See isSDWAOperand.
4315bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4316 const OperandVector &Operands) {
4317
4318 const unsigned Opc = Inst.getOpcode();
4319 const MCInstrDesc &Desc = MII.get(Opc);
4320
4321 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4322 return true;
4323
4324 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4325 assert(Src0Idx != -1);
4326
4327 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4328 if (Src0.isReg()) {
4329 auto Reg = mc2PseudoReg(Src0.getReg());
4330 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4331 if (!isSGPR(Reg, TRI))
4332 return true;
4333 }
4334
4335 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4336 return false;
4337}
4338
4339bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4340 const OperandVector &Operands) {
4341
4342 const unsigned Opc = Inst.getOpcode();
4343
4344 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4345 return true;
4346
4347 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4348 assert(Src0Idx != -1);
4349
4350 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4351 if (!Src0.isReg())
4352 return true;
4353
4354 auto Reg = mc2PseudoReg(Src0.getReg());
4355 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4356 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4357 Error(getOperandLoc(Operands, Src0Idx),
4358 "source operand must be either a VGPR or an inline constant");
4359 return false;
4360 }
4361
4362 return true;
4363}
4364
4365bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4366 const OperandVector &Operands) {
4367 unsigned Opcode = Inst.getOpcode();
4368 const MCInstrDesc &Desc = MII.get(Opcode);
4369
4370 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4371 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4372 return true;
4373
4374 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4375 if (Src2Idx == -1)
4376 return true;
4377
4378 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4379 Error(getOperandLoc(Operands, Src2Idx),
4380 "inline constants are not allowed for this operand");
4381 return false;
4382 }
4383
4384 return true;
4385}
4386
4387bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4388 const OperandVector &Operands) {
4389 const unsigned Opc = Inst.getOpcode();
4390 const MCInstrDesc &Desc = MII.get(Opc);
4391
4392 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4393 return true;
4394
4395 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4396 if (BlgpIdx != -1) {
4397 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4398 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4399
4400 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4401 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4402
4403 // Validate the correct register size was used for the floating point
4404 // format operands
4405
4406 bool Success = true;
4407 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4408 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4409 Error(getOperandLoc(Operands, Src0Idx),
4410 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4411 Success = false;
4412 }
4413
4414 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4415 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4416 Error(getOperandLoc(Operands, Src1Idx),
4417 "wrong register tuple size for blgp value " + Twine(BLGP));
4418 Success = false;
4419 }
4420
4421 return Success;
4422 }
4423 }
4424
4425 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4426 if (Src2Idx == -1)
4427 return true;
4428
4429 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4430 if (!Src2.isReg())
4431 return true;
4432
4433 MCRegister Src2Reg = Src2.getReg();
4434 MCRegister DstReg = Inst.getOperand(0).getReg();
4435 if (Src2Reg == DstReg)
4436 return true;
4437
4438 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4439 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4440 .getSizeInBits() <= 128)
4441 return true;
4442
4443 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4444 Error(getOperandLoc(Operands, Src2Idx),
4445 "source 2 operand must not partially overlap with dst");
4446 return false;
4447 }
4448
4449 return true;
4450}
4451
4452bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4453 switch (Inst.getOpcode()) {
4454 default:
4455 return true;
4456 case V_DIV_SCALE_F32_gfx6_gfx7:
4457 case V_DIV_SCALE_F32_vi:
4458 case V_DIV_SCALE_F32_gfx10:
4459 case V_DIV_SCALE_F64_gfx6_gfx7:
4460 case V_DIV_SCALE_F64_vi:
4461 case V_DIV_SCALE_F64_gfx10:
4462 break;
4463 }
4464
4465 // TODO: Check that src0 = src1 or src2.
4466
4467 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4468 AMDGPU::OpName::src2_modifiers,
4469 AMDGPU::OpName::src2_modifiers}) {
4470 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4471 .getImm() &
4473 return false;
4474 }
4475 }
4476
4477 return true;
4478}
4479
4480bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4481
4482 const unsigned Opc = Inst.getOpcode();
4483 const MCInstrDesc &Desc = MII.get(Opc);
4484
4485 if ((Desc.TSFlags & MIMGFlags) == 0)
4486 return true;
4487
4488 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4489 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4490 if (isCI() || isSI())
4491 return false;
4492 }
4493
4494 return true;
4495}
4496
4497bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4498 const unsigned Opc = Inst.getOpcode();
4499 const MCInstrDesc &Desc = MII.get(Opc);
4500
4501 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4502 return true;
4503
4504 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4505
4506 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4507}
4508
4509static bool IsRevOpcode(const unsigned Opcode)
4510{
4511 switch (Opcode) {
4512 case AMDGPU::V_SUBREV_F32_e32:
4513 case AMDGPU::V_SUBREV_F32_e64:
4514 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4515 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4516 case AMDGPU::V_SUBREV_F32_e32_vi:
4517 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4518 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4519 case AMDGPU::V_SUBREV_F32_e64_vi:
4520
4521 case AMDGPU::V_SUBREV_CO_U32_e32:
4522 case AMDGPU::V_SUBREV_CO_U32_e64:
4523 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4524 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4525
4526 case AMDGPU::V_SUBBREV_U32_e32:
4527 case AMDGPU::V_SUBBREV_U32_e64:
4528 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4529 case AMDGPU::V_SUBBREV_U32_e32_vi:
4530 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4531 case AMDGPU::V_SUBBREV_U32_e64_vi:
4532
4533 case AMDGPU::V_SUBREV_U32_e32:
4534 case AMDGPU::V_SUBREV_U32_e64:
4535 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4536 case AMDGPU::V_SUBREV_U32_e32_vi:
4537 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4538 case AMDGPU::V_SUBREV_U32_e64_vi:
4539
4540 case AMDGPU::V_SUBREV_F16_e32:
4541 case AMDGPU::V_SUBREV_F16_e64:
4542 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4543 case AMDGPU::V_SUBREV_F16_e32_vi:
4544 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4545 case AMDGPU::V_SUBREV_F16_e64_vi:
4546
4547 case AMDGPU::V_SUBREV_U16_e32:
4548 case AMDGPU::V_SUBREV_U16_e64:
4549 case AMDGPU::V_SUBREV_U16_e32_vi:
4550 case AMDGPU::V_SUBREV_U16_e64_vi:
4551
4552 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4553 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4554 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4555
4556 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4557 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4558
4559 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4560 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4561
4562 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4563 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4564
4565 case AMDGPU::V_LSHRREV_B32_e32:
4566 case AMDGPU::V_LSHRREV_B32_e64:
4567 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4568 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4569 case AMDGPU::V_LSHRREV_B32_e32_vi:
4570 case AMDGPU::V_LSHRREV_B32_e64_vi:
4571 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4572 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4573
4574 case AMDGPU::V_ASHRREV_I32_e32:
4575 case AMDGPU::V_ASHRREV_I32_e64:
4576 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4577 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4578 case AMDGPU::V_ASHRREV_I32_e32_vi:
4579 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4580 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4581 case AMDGPU::V_ASHRREV_I32_e64_vi:
4582
4583 case AMDGPU::V_LSHLREV_B32_e32:
4584 case AMDGPU::V_LSHLREV_B32_e64:
4585 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4586 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4587 case AMDGPU::V_LSHLREV_B32_e32_vi:
4588 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4589 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4590 case AMDGPU::V_LSHLREV_B32_e64_vi:
4591
4592 case AMDGPU::V_LSHLREV_B16_e32:
4593 case AMDGPU::V_LSHLREV_B16_e64:
4594 case AMDGPU::V_LSHLREV_B16_e32_vi:
4595 case AMDGPU::V_LSHLREV_B16_e64_vi:
4596 case AMDGPU::V_LSHLREV_B16_gfx10:
4597
4598 case AMDGPU::V_LSHRREV_B16_e32:
4599 case AMDGPU::V_LSHRREV_B16_e64:
4600 case AMDGPU::V_LSHRREV_B16_e32_vi:
4601 case AMDGPU::V_LSHRREV_B16_e64_vi:
4602 case AMDGPU::V_LSHRREV_B16_gfx10:
4603
4604 case AMDGPU::V_ASHRREV_I16_e32:
4605 case AMDGPU::V_ASHRREV_I16_e64:
4606 case AMDGPU::V_ASHRREV_I16_e32_vi:
4607 case AMDGPU::V_ASHRREV_I16_e64_vi:
4608 case AMDGPU::V_ASHRREV_I16_gfx10:
4609
4610 case AMDGPU::V_LSHLREV_B64_e64:
4611 case AMDGPU::V_LSHLREV_B64_gfx10:
4612 case AMDGPU::V_LSHLREV_B64_vi:
4613
4614 case AMDGPU::V_LSHRREV_B64_e64:
4615 case AMDGPU::V_LSHRREV_B64_gfx10:
4616 case AMDGPU::V_LSHRREV_B64_vi:
4617
4618 case AMDGPU::V_ASHRREV_I64_e64:
4619 case AMDGPU::V_ASHRREV_I64_gfx10:
4620 case AMDGPU::V_ASHRREV_I64_vi:
4621
4622 case AMDGPU::V_PK_LSHLREV_B16:
4623 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4624 case AMDGPU::V_PK_LSHLREV_B16_vi:
4625
4626 case AMDGPU::V_PK_LSHRREV_B16:
4627 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4628 case AMDGPU::V_PK_LSHRREV_B16_vi:
4629 case AMDGPU::V_PK_ASHRREV_I16:
4630 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4631 case AMDGPU::V_PK_ASHRREV_I16_vi:
4632 return true;
4633 default:
4634 return false;
4635 }
4636}
4637
4638bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4639 const OperandVector &Operands) {
4640 using namespace SIInstrFlags;
4641 const unsigned Opcode = Inst.getOpcode();
4642 const MCInstrDesc &Desc = MII.get(Opcode);
4643
4644 // lds_direct register is defined so that it can be used
4645 // with 9-bit operands only. Ignore encodings which do not accept these.
4646 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4647 if ((Desc.TSFlags & Enc) == 0)
4648 return true;
4649
4650 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4651 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4652 if (SrcIdx == -1)
4653 break;
4654 const auto &Src = Inst.getOperand(SrcIdx);
4655 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4656
4657 if (isGFX90A() || isGFX11Plus()) {
4658 Error(getOperandLoc(Operands, SrcIdx),
4659 "lds_direct is not supported on this GPU");
4660 return false;
4661 }
4662
4663 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4664 Error(getOperandLoc(Operands, SrcIdx),
4665 "lds_direct cannot be used with this instruction");
4666 return false;
4667 }
4668
4669 if (SrcName != OpName::src0) {
4670 Error(getOperandLoc(Operands, SrcIdx),
4671 "lds_direct may be used as src0 only");
4672 return false;
4673 }
4674 }
4675 }
4676
4677 return true;
4678}
4679
4680SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4681 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4682 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4683 if (Op.isFlatOffset())
4684 return Op.getStartLoc();
4685 }
4686 return getLoc();
4687}
4688
4689bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4690 const OperandVector &Operands) {
4691 auto Opcode = Inst.getOpcode();
4692 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4693 if (OpNum == -1)
4694 return true;
4695
4696 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4697 if ((TSFlags & SIInstrFlags::FLAT))
4698 return validateFlatOffset(Inst, Operands);
4699
4700 if ((TSFlags & SIInstrFlags::SMRD))
4701 return validateSMEMOffset(Inst, Operands);
4702
4703 const auto &Op = Inst.getOperand(OpNum);
4704 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4705 if (isGFX12Plus() &&
4706 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4707 const unsigned OffsetSize = 24;
4708 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4709 Error(getFlatOffsetLoc(Operands),
4710 Twine("expected a ") + Twine(OffsetSize - 1) +
4711 "-bit unsigned offset for buffer ops");
4712 return false;
4713 }
4714 } else {
4715 const unsigned OffsetSize = 16;
4716 if (!isUIntN(OffsetSize, Op.getImm())) {
4717 Error(getFlatOffsetLoc(Operands),
4718 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4719 return false;
4720 }
4721 }
4722 return true;
4723}
4724
4725bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4726 const OperandVector &Operands) {
4727 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4728 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4729 return true;
4730
4731 auto Opcode = Inst.getOpcode();
4732 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4733 assert(OpNum != -1);
4734
4735 const auto &Op = Inst.getOperand(OpNum);
4736 if (!hasFlatOffsets() && Op.getImm() != 0) {
4737 Error(getFlatOffsetLoc(Operands),
4738 "flat offset modifier is not supported on this GPU");
4739 return false;
4740 }
4741
4742 // For pre-GFX12 FLAT instructions the offset must be positive;
4743 // MSB is ignored and forced to zero.
4744 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4745 bool AllowNegative =
4747 isGFX12Plus();
4748 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4749 Error(getFlatOffsetLoc(Operands),
4750 Twine("expected a ") +
4751 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4752 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4753 return false;
4754 }
4755
4756 return true;
4757}
4758
4759SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4760 // Start with second operand because SMEM Offset cannot be dst or src0.
4761 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4762 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4763 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4764 return Op.getStartLoc();
4765 }
4766 return getLoc();
4767}
4768
4769bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4770 const OperandVector &Operands) {
4771 if (isCI() || isSI())
4772 return true;
4773
4774 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4775 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4776 return true;
4777
4778 auto Opcode = Inst.getOpcode();
4779 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4780 if (OpNum == -1)
4781 return true;
4782
4783 const auto &Op = Inst.getOperand(OpNum);
4784 if (!Op.isImm())
4785 return true;
4786
4787 uint64_t Offset = Op.getImm();
4788 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4791 return true;
4792
4793 Error(getSMEMOffsetLoc(Operands),
4794 isGFX12Plus() && IsBuffer
4795 ? "expected a 23-bit unsigned offset for buffer ops"
4796 : isGFX12Plus() ? "expected a 24-bit signed offset"
4797 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4798 : "expected a 21-bit signed offset");
4799
4800 return false;
4801}
4802
4803bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4804 const OperandVector &Operands) {
4805 unsigned Opcode = Inst.getOpcode();
4806 const MCInstrDesc &Desc = MII.get(Opcode);
4807 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4808 return true;
4809
4810 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4811 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4812
4813 const int OpIndices[] = { Src0Idx, Src1Idx };
4814
4815 unsigned NumExprs = 0;
4816 unsigned NumLiterals = 0;
4817 int64_t LiteralValue;
4818
4819 for (int OpIdx : OpIndices) {
4820 if (OpIdx == -1) break;
4821
4822 const MCOperand &MO = Inst.getOperand(OpIdx);
4823 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4825 bool IsLit = false;
4826 std::optional<int64_t> Imm;
4827 if (MO.isImm()) {
4828 Imm = MO.getImm();
4829 } else if (MO.isExpr()) {
4830 if (isLitExpr(MO.getExpr())) {
4831 IsLit = true;
4832 Imm = getLitValue(MO.getExpr());
4833 }
4834 } else {
4835 continue;
4836 }
4837
4838 if (!Imm.has_value()) {
4839 ++NumExprs;
4840 } else if (!isInlineConstant(Inst, OpIdx)) {
4841 auto OpType = static_cast<AMDGPU::OperandType>(
4842 Desc.operands()[OpIdx].OperandType);
4843 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4844 if (NumLiterals == 0 || LiteralValue != Value) {
4846 ++NumLiterals;
4847 }
4848 }
4849 }
4850 }
4851
4852 if (NumLiterals + NumExprs <= 1)
4853 return true;
4854
4855 Error(getOperandLoc(Operands, Src1Idx),
4856 "only one unique literal operand is allowed");
4857 return false;
4858}
4859
4860bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4861 const unsigned Opc = Inst.getOpcode();
4862 if (isPermlane16(Opc)) {
4863 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4864 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4865
4866 if (OpSel & ~3)
4867 return false;
4868 }
4869
4870 uint64_t TSFlags = MII.get(Opc).TSFlags;
4871
4872 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4873 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4874 if (OpSelIdx != -1) {
4875 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4876 return false;
4877 }
4878 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4879 if (OpSelHiIdx != -1) {
4880 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4881 return false;
4882 }
4883 }
4884
4885 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4886 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4887 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4888 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4889 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4890 if (OpSel & 3)
4891 return false;
4892 }
4893
4894 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4895 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4896 // the first SGPR and use it for both the low and high operations.
4897 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4898 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4899 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4900 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4901 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4902
4903 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4904 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4905 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4906 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4907
4908 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4909
4910 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4911 unsigned Mask = 1U << Index;
4912 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4913 };
4914
4915 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4916 !VerifyOneSGPR(/*Index=*/0))
4917 return false;
4918 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4919 !VerifyOneSGPR(/*Index=*/1))
4920 return false;
4921
4922 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4923 if (Src2Idx != -1) {
4924 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4925 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4926 !VerifyOneSGPR(/*Index=*/2))
4927 return false;
4928 }
4929 }
4930
4931 return true;
4932}
4933
4934bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4935 if (!hasTrue16Insts())
4936 return true;
4937 const MCRegisterInfo *MRI = getMRI();
4938 const unsigned Opc = Inst.getOpcode();
4939 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4940 if (OpSelIdx == -1)
4941 return true;
4942 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4943 // If the value is 0 we could have a default OpSel Operand, so conservatively
4944 // allow it.
4945 if (OpSelOpValue == 0)
4946 return true;
4947 unsigned OpCount = 0;
4948 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4949 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4950 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4951 if (OpIdx == -1)
4952 continue;
4953 const MCOperand &Op = Inst.getOperand(OpIdx);
4954 if (Op.isReg() &&
4955 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4956 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4957 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4958 if (OpSelOpIsHi != VGPRSuffixIsHi)
4959 return false;
4960 }
4961 ++OpCount;
4962 }
4963
4964 return true;
4965}
4966
4967bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4968 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4969
4970 const unsigned Opc = Inst.getOpcode();
4971 uint64_t TSFlags = MII.get(Opc).TSFlags;
4972
4973 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4974 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4975 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4976 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4977 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4978 !(TSFlags & SIInstrFlags::IsSWMMAC))
4979 return true;
4980
4981 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4982 if (NegIdx == -1)
4983 return true;
4984
4985 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4986
4987 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4988 // on some src operands but not allowed on other.
4989 // It is convenient that such instructions don't have src_modifiers operand
4990 // for src operands that don't allow neg because they also don't allow opsel.
4991
4992 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4993 AMDGPU::OpName::src1_modifiers,
4994 AMDGPU::OpName::src2_modifiers};
4995
4996 for (unsigned i = 0; i < 3; ++i) {
4997 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4998 if (Neg & (1 << i))
4999 return false;
5000 }
5001 }
5002
5003 return true;
5004}
5005
5006bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5007 const OperandVector &Operands) {
5008 const unsigned Opc = Inst.getOpcode();
5009 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5010 if (DppCtrlIdx >= 0) {
5011 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5012
5013 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5014 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5015 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5016 // only on GFX12.
5017 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5018 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5019 : "DP ALU dpp only supports row_newbcast");
5020 return false;
5021 }
5022 }
5023
5024 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5025 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5026
5027 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5028 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5029 if (Src1Idx >= 0) {
5030 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5031 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5032 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5033 Error(getOperandLoc(Operands, Src1Idx),
5034 "invalid operand for instruction");
5035 return false;
5036 }
5037 if (Src1.isImm()) {
5038 Error(getInstLoc(Operands),
5039 "src1 immediate operand invalid for instruction");
5040 return false;
5041 }
5042 }
5043 }
5044
5045 return true;
5046}
5047
5048// Check if VCC register matches wavefront size
5049bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5050 return (Reg == AMDGPU::VCC && isWave64()) ||
5051 (Reg == AMDGPU::VCC_LO && isWave32());
5052}
5053
5054// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5055bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5056 const OperandVector &Operands) {
5057 unsigned Opcode = Inst.getOpcode();
5058 const MCInstrDesc &Desc = MII.get(Opcode);
5059 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5060 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5061 !HasMandatoryLiteral && !isVOPD(Opcode))
5062 return true;
5063
5064 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5065
5066 std::optional<unsigned> LiteralOpIdx;
5067 std::optional<uint64_t> LiteralValue;
5068
5069 for (int OpIdx : OpIndices) {
5070 if (OpIdx == -1)
5071 continue;
5072
5073 const MCOperand &MO = Inst.getOperand(OpIdx);
5074 if (!MO.isImm() && !MO.isExpr())
5075 continue;
5076 if (!isSISrcOperand(Desc, OpIdx))
5077 continue;
5078
5079 std::optional<int64_t> Imm;
5080 if (MO.isImm())
5081 Imm = MO.getImm();
5082 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5083 Imm = getLitValue(MO.getExpr());
5084
5085 bool IsAnotherLiteral = false;
5086 if (!Imm.has_value()) {
5087 // Literal value not known, so we conservately assume it's different.
5088 IsAnotherLiteral = true;
5089 } else if (!isInlineConstant(Inst, OpIdx)) {
5090 uint64_t Value = *Imm;
5091 bool IsForcedFP64 =
5092 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5094 HasMandatoryLiteral);
5095 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5096 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5097 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5098
5099 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5100 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5101 Error(getOperandLoc(Operands, OpIdx),
5102 "invalid operand for instruction");
5103 return false;
5104 }
5105
5106 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5107 Value = Hi_32(Value);
5108
5109 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5111 }
5112
5113 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5114 !getFeatureBits()[FeatureVOP3Literal]) {
5115 Error(getOperandLoc(Operands, OpIdx),
5116 "literal operands are not supported");
5117 return false;
5118 }
5119
5120 if (LiteralOpIdx && IsAnotherLiteral) {
5121 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5122 getOperandLoc(Operands, *LiteralOpIdx)),
5123 "only one unique literal operand is allowed");
5124 return false;
5125 }
5126
5127 if (IsAnotherLiteral)
5128 LiteralOpIdx = OpIdx;
5129 }
5130
5131 return true;
5132}
5133
5134// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5135static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5136 const MCRegisterInfo *MRI) {
5137 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5138 if (OpIdx < 0)
5139 return -1;
5140
5141 const MCOperand &Op = Inst.getOperand(OpIdx);
5142 if (!Op.isReg())
5143 return -1;
5144
5145 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5146 auto Reg = Sub ? Sub : Op.getReg();
5147 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5148 return AGPR32.contains(Reg) ? 1 : 0;
5149}
5150
5151bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5152 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5153 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5155 SIInstrFlags::DS)) == 0)
5156 return true;
5157
5158 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5159 ? AMDGPU::OpName::data0
5160 : AMDGPU::OpName::vdata;
5161
5162 const MCRegisterInfo *MRI = getMRI();
5163 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5164 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5165
5166 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5167 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5168 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5169 return false;
5170 }
5171
5172 auto FB = getFeatureBits();
5173 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5174 if (DataAreg < 0 || DstAreg < 0)
5175 return true;
5176 return DstAreg == DataAreg;
5177 }
5178
5179 return DstAreg < 1 && DataAreg < 1;
5180}
5181
5182bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5183 auto FB = getFeatureBits();
5184 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5185 return true;
5186
5187 unsigned Opc = Inst.getOpcode();
5188 const MCRegisterInfo *MRI = getMRI();
5189 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5190 // unaligned VGPR. All others only allow even aligned VGPRs.
5191 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5192 return true;
5193
5194 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5195 switch (Opc) {
5196 default:
5197 break;
5198 case AMDGPU::DS_LOAD_TR6_B96:
5199 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5200 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5201 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5202 return true;
5203 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5204 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5205 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5206 // allows unaligned VGPR for vdst, but other operands still only allow
5207 // even aligned VGPRs.
5208 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5209 if (VAddrIdx != -1) {
5210 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5211 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5212 if ((Sub - AMDGPU::VGPR0) & 1)
5213 return false;
5214 }
5215 return true;
5216 }
5217 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5218 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5219 return true;
5220 }
5221 }
5222
5223 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5224 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5225 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5226 const MCOperand &Op = Inst.getOperand(I);
5227 if (!Op.isReg())
5228 continue;
5229
5230 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5231 if (!Sub)
5232 continue;
5233
5234 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5235 return false;
5236 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5237 return false;
5238 }
5239
5240 return true;
5241}
5242
5243SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5244 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5245 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5246 if (Op.isBLGP())
5247 return Op.getStartLoc();
5248 }
5249 return SMLoc();
5250}
5251
5252bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5253 const OperandVector &Operands) {
5254 unsigned Opc = Inst.getOpcode();
5255 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5256 if (BlgpIdx == -1)
5257 return true;
5258 SMLoc BLGPLoc = getBLGPLoc(Operands);
5259 if (!BLGPLoc.isValid())
5260 return true;
5261 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5262 auto FB = getFeatureBits();
5263 bool UsesNeg = false;
5264 if (FB[AMDGPU::FeatureGFX940Insts]) {
5265 switch (Opc) {
5266 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5267 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5268 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5269 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5270 UsesNeg = true;
5271 }
5272 }
5273
5274 if (IsNeg == UsesNeg)
5275 return true;
5276
5277 Error(BLGPLoc,
5278 UsesNeg ? "invalid modifier: blgp is not supported"
5279 : "invalid modifier: neg is not supported");
5280
5281 return false;
5282}
5283
5284bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5285 const OperandVector &Operands) {
5286 if (!isGFX11Plus())
5287 return true;
5288
5289 unsigned Opc = Inst.getOpcode();
5290 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5291 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5292 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5293 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5294 return true;
5295
5296 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5297 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5298 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5299 if (Reg == AMDGPU::SGPR_NULL)
5300 return true;
5301
5302 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5303 return false;
5304}
5305
5306bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5307 const OperandVector &Operands) {
5308 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5309 if ((TSFlags & SIInstrFlags::DS) == 0)
5310 return true;
5311 if (TSFlags & SIInstrFlags::GWS)
5312 return validateGWS(Inst, Operands);
5313 // Only validate GDS for non-GWS instructions.
5314 if (hasGDS())
5315 return true;
5316 int GDSIdx =
5317 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5318 if (GDSIdx < 0)
5319 return true;
5320 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5321 if (GDS) {
5322 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5323 Error(S, "gds modifier is not supported on this GPU");
5324 return false;
5325 }
5326 return true;
5327}
5328
5329// gfx90a has an undocumented limitation:
5330// DS_GWS opcodes must use even aligned registers.
5331bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5332 const OperandVector &Operands) {
5333 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5334 return true;
5335
5336 int Opc = Inst.getOpcode();
5337 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5338 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5339 return true;
5340
5341 const MCRegisterInfo *MRI = getMRI();
5342 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5343 int Data0Pos =
5344 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5345 assert(Data0Pos != -1);
5346 auto Reg = Inst.getOperand(Data0Pos).getReg();
5347 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5348 if (RegIdx & 1) {
5349 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5350 return false;
5351 }
5352
5353 return true;
5354}
5355
5356bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5357 const OperandVector &Operands,
5358 SMLoc IDLoc) {
5359 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5360 AMDGPU::OpName::cpol);
5361 if (CPolPos == -1)
5362 return true;
5363
5364 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5365
5366 if (!isGFX1250()) {
5367 if (CPol & CPol::SCAL) {
5368 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5369 StringRef CStr(S.getPointer());
5370 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5371 Error(S, "scale_offset is not supported on this GPU");
5372 }
5373 if (CPol & CPol::NV) {
5374 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5375 StringRef CStr(S.getPointer());
5376 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5377 Error(S, "nv is not supported on this GPU");
5378 }
5379 }
5380
5381 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5382 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5383 StringRef CStr(S.getPointer());
5384 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5385 Error(S, "scale_offset is not supported for this instruction");
5386 }
5387
5388 if (isGFX12Plus())
5389 return validateTHAndScopeBits(Inst, Operands, CPol);
5390
5391 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5392 if (TSFlags & SIInstrFlags::SMRD) {
5393 if (CPol && (isSI() || isCI())) {
5394 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5395 Error(S, "cache policy is not supported for SMRD instructions");
5396 return false;
5397 }
5398 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5399 Error(IDLoc, "invalid cache policy for SMEM instruction");
5400 return false;
5401 }
5402 }
5403
5404 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5405 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5408 if (!(TSFlags & AllowSCCModifier)) {
5409 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5410 StringRef CStr(S.getPointer());
5411 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5412 Error(S,
5413 "scc modifier is not supported for this instruction on this GPU");
5414 return false;
5415 }
5416 }
5417
5419 return true;
5420
5421 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5422 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5423 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5424 : "instruction must use glc");
5425 return false;
5426 }
5427 } else {
5428 if (CPol & CPol::GLC) {
5429 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5430 StringRef CStr(S.getPointer());
5432 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5433 Error(S, isGFX940() ? "instruction must not use sc0"
5434 : "instruction must not use glc");
5435 return false;
5436 }
5437 }
5438
5439 return true;
5440}
5441
5442bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5443 const OperandVector &Operands,
5444 const unsigned CPol) {
5445 const unsigned TH = CPol & AMDGPU::CPol::TH;
5446 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5447
5448 const unsigned Opcode = Inst.getOpcode();
5449 const MCInstrDesc &TID = MII.get(Opcode);
5450
5451 auto PrintError = [&](StringRef Msg) {
5452 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5453 Error(S, Msg);
5454 return false;
5455 };
5456
5457 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5460 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5461
5462 if (TH == 0)
5463 return true;
5464
5465 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5466 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5467 (TH == AMDGPU::CPol::TH_NT_HT)))
5468 return PrintError("invalid th value for SMEM instruction");
5469
5470 if (TH == AMDGPU::CPol::TH_BYPASS) {
5471 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5473 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5475 return PrintError("scope and th combination is not valid");
5476 }
5477
5478 unsigned THType = AMDGPU::getTemporalHintType(TID);
5479 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5480 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5481 return PrintError("invalid th value for atomic instructions");
5482 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5483 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5484 return PrintError("invalid th value for store instructions");
5485 } else {
5486 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5487 return PrintError("invalid th value for load instructions");
5488 }
5489
5490 return true;
5491}
5492
5493bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5494 const OperandVector &Operands) {
5495 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5496 if (Desc.mayStore() &&
5498 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5499 if (Loc != getInstLoc(Operands)) {
5500 Error(Loc, "TFE modifier has no meaning for store instructions");
5501 return false;
5502 }
5503 }
5504
5505 return true;
5506}
5507
5508bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5509 const OperandVector &Operands) {
5510 unsigned Opc = Inst.getOpcode();
5511 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5512 const MCInstrDesc &Desc = MII.get(Opc);
5513
5514 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5515 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5516 if (FmtIdx == -1)
5517 return true;
5518 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5519 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5520 unsigned RegSize =
5521 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5522 .getSizeInBits();
5523
5525 return true;
5526
5527 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5528 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5529 "MATRIX_FMT_FP4"};
5530
5531 Error(getOperandLoc(Operands, SrcIdx),
5532 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5533 return false;
5534 };
5535
5536 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5537 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5538}
5539
5540bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5541 const OperandVector &Operands) {
5542 if (!validateLdsDirect(Inst, Operands))
5543 return false;
5544 if (!validateTrue16OpSel(Inst)) {
5545 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5546 "op_sel operand conflicts with 16-bit operand suffix");
5547 return false;
5548 }
5549 if (!validateSOPLiteral(Inst, Operands))
5550 return false;
5551 if (!validateVOPLiteral(Inst, Operands)) {
5552 return false;
5553 }
5554 if (!validateConstantBusLimitations(Inst, Operands)) {
5555 return false;
5556 }
5557 if (!validateVOPD(Inst, Operands)) {
5558 return false;
5559 }
5560 if (!validateIntClampSupported(Inst)) {
5561 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5562 "integer clamping is not supported on this GPU");
5563 return false;
5564 }
5565 if (!validateOpSel(Inst)) {
5566 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5567 "invalid op_sel operand");
5568 return false;
5569 }
5570 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5571 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5572 "invalid neg_lo operand");
5573 return false;
5574 }
5575 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5576 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5577 "invalid neg_hi operand");
5578 return false;
5579 }
5580 if (!validateDPP(Inst, Operands)) {
5581 return false;
5582 }
5583 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5584 if (!validateMIMGD16(Inst)) {
5585 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5586 "d16 modifier is not supported on this GPU");
5587 return false;
5588 }
5589 if (!validateMIMGDim(Inst, Operands)) {
5590 Error(IDLoc, "missing dim operand");
5591 return false;
5592 }
5593 if (!validateTensorR128(Inst)) {
5594 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5595 "instruction must set modifier r128=0");
5596 return false;
5597 }
5598 if (!validateMIMGMSAA(Inst)) {
5599 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5600 "invalid dim; must be MSAA type");
5601 return false;
5602 }
5603 if (!validateMIMGDataSize(Inst, IDLoc)) {
5604 return false;
5605 }
5606 if (!validateMIMGAddrSize(Inst, IDLoc))
5607 return false;
5608 if (!validateMIMGAtomicDMask(Inst)) {
5609 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5610 "invalid atomic image dmask");
5611 return false;
5612 }
5613 if (!validateMIMGGatherDMask(Inst)) {
5614 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5615 "invalid image_gather dmask: only one bit must be set");
5616 return false;
5617 }
5618 if (!validateMovrels(Inst, Operands)) {
5619 return false;
5620 }
5621 if (!validateOffset(Inst, Operands)) {
5622 return false;
5623 }
5624 if (!validateMAIAccWrite(Inst, Operands)) {
5625 return false;
5626 }
5627 if (!validateMAISrc2(Inst, Operands)) {
5628 return false;
5629 }
5630 if (!validateMFMA(Inst, Operands)) {
5631 return false;
5632 }
5633 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5634 return false;
5635 }
5636
5637 if (!validateAGPRLdSt(Inst)) {
5638 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5639 ? "invalid register class: data and dst should be all VGPR or AGPR"
5640 : "invalid register class: agpr loads and stores not supported on this GPU"
5641 );
5642 return false;
5643 }
5644 if (!validateVGPRAlign(Inst)) {
5645 Error(IDLoc,
5646 "invalid register class: vgpr tuples must be 64 bit aligned");
5647 return false;
5648 }
5649 if (!validateDS(Inst, Operands)) {
5650 return false;
5651 }
5652
5653 if (!validateBLGP(Inst, Operands)) {
5654 return false;
5655 }
5656
5657 if (!validateDivScale(Inst)) {
5658 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5659 return false;
5660 }
5661 if (!validateWaitCnt(Inst, Operands)) {
5662 return false;
5663 }
5664 if (!validateTFE(Inst, Operands)) {
5665 return false;
5666 }
5667 if (!validateWMMA(Inst, Operands)) {
5668 return false;
5669 }
5670
5671 return true;
5672}
5673
5675 const FeatureBitset &FBS,
5676 unsigned VariantID = 0);
5677
5678static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5679 const FeatureBitset &AvailableFeatures,
5680 unsigned VariantID);
5681
5682bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5683 const FeatureBitset &FBS) {
5684 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5685}
5686
5687bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5688 const FeatureBitset &FBS,
5689 ArrayRef<unsigned> Variants) {
5690 for (auto Variant : Variants) {
5691 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5692 return true;
5693 }
5694
5695 return false;
5696}
5697
5698bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5699 SMLoc IDLoc) {
5700 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5701
5702 // Check if requested instruction variant is supported.
5703 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5704 return false;
5705
5706 // This instruction is not supported.
5707 // Clear any other pending errors because they are no longer relevant.
5708 getParser().clearPendingErrors();
5709
5710 // Requested instruction variant is not supported.
5711 // Check if any other variants are supported.
5712 StringRef VariantName = getMatchedVariantName();
5713 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5714 return Error(IDLoc,
5715 Twine(VariantName,
5716 " variant of this instruction is not supported"));
5717 }
5718
5719 // Check if this instruction may be used with a different wavesize.
5720 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5721 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5722 // FIXME: Use getAvailableFeatures, and do not manually recompute
5723 FeatureBitset FeaturesWS32 = getFeatureBits();
5724 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5725 .flip(AMDGPU::FeatureWavefrontSize32);
5726 FeatureBitset AvailableFeaturesWS32 =
5727 ComputeAvailableFeatures(FeaturesWS32);
5728
5729 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5730 return Error(IDLoc, "instruction requires wavesize=32");
5731 }
5732
5733 // Finally check if this instruction is supported on any other GPU.
5734 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5735 return Error(IDLoc, "instruction not supported on this GPU");
5736 }
5737
5738 // Instruction not supported on any GPU. Probably a typo.
5739 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5740 return Error(IDLoc, "invalid instruction" + Suggestion);
5741}
5742
5743static bool isInvalidVOPDY(const OperandVector &Operands,
5744 uint64_t InvalidOprIdx) {
5745 assert(InvalidOprIdx < Operands.size());
5746 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5747 if (Op.isToken() && InvalidOprIdx > 1) {
5748 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5749 return PrevOp.isToken() && PrevOp.getToken() == "::";
5750 }
5751 return false;
5752}
5753
5754bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5755 OperandVector &Operands,
5756 MCStreamer &Out,
5757 uint64_t &ErrorInfo,
5758 bool MatchingInlineAsm) {
5759 MCInst Inst;
5760 Inst.setLoc(IDLoc);
5761 unsigned Result = Match_Success;
5762 for (auto Variant : getMatchedVariants()) {
5763 uint64_t EI;
5764 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5765 Variant);
5766 // We order match statuses from least to most specific. We use most specific
5767 // status as resulting
5768 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5769 if (R == Match_Success || R == Match_MissingFeature ||
5770 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5771 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5772 Result != Match_MissingFeature)) {
5773 Result = R;
5774 ErrorInfo = EI;
5775 }
5776 if (R == Match_Success)
5777 break;
5778 }
5779
5780 if (Result == Match_Success) {
5781 if (!validateInstruction(Inst, IDLoc, Operands)) {
5782 return true;
5783 }
5784 Out.emitInstruction(Inst, getSTI());
5785 return false;
5786 }
5787
5788 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5789 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5790 return true;
5791 }
5792
5793 switch (Result) {
5794 default: break;
5795 case Match_MissingFeature:
5796 // It has been verified that the specified instruction
5797 // mnemonic is valid. A match was found but it requires
5798 // features which are not supported on this GPU.
5799 return Error(IDLoc, "operands are not valid for this GPU or mode");
5800
5801 case Match_InvalidOperand: {
5802 SMLoc ErrorLoc = IDLoc;
5803 if (ErrorInfo != ~0ULL) {
5804 if (ErrorInfo >= Operands.size()) {
5805 return Error(IDLoc, "too few operands for instruction");
5806 }
5807 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5808 if (ErrorLoc == SMLoc())
5809 ErrorLoc = IDLoc;
5810
5811 if (isInvalidVOPDY(Operands, ErrorInfo))
5812 return Error(ErrorLoc, "invalid VOPDY instruction");
5813 }
5814 return Error(ErrorLoc, "invalid operand for instruction");
5815 }
5816
5817 case Match_MnemonicFail:
5818 llvm_unreachable("Invalid instructions should have been handled already");
5819 }
5820 llvm_unreachable("Implement any new match types added!");
5821}
5822
5823bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5824 int64_t Tmp = -1;
5825 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5826 return true;
5827 }
5828 if (getParser().parseAbsoluteExpression(Tmp)) {
5829 return true;
5830 }
5831 Ret = static_cast<uint32_t>(Tmp);
5832 return false;
5833}
5834
5835bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5836 if (!getSTI().getTargetTriple().isAMDGCN())
5837 return TokError("directive only supported for amdgcn architecture");
5838
5839 std::string TargetIDDirective;
5840 SMLoc TargetStart = getTok().getLoc();
5841 if (getParser().parseEscapedString(TargetIDDirective))
5842 return true;
5843
5844 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5845 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5846 return getParser().Error(TargetRange.Start,
5847 (Twine(".amdgcn_target directive's target id ") +
5848 Twine(TargetIDDirective) +
5849 Twine(" does not match the specified target id ") +
5850 Twine(getTargetStreamer().getTargetID()->toString())).str());
5851
5852 return false;
5853}
5854
5855bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5856 return Error(Range.Start, "value out of range", Range);
5857}
5858
5859bool AMDGPUAsmParser::calculateGPRBlocks(
5860 const FeatureBitset &Features, const MCExpr *VCCUsed,
5861 const MCExpr *FlatScrUsed, bool XNACKUsed,
5862 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5863 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5864 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5865 // TODO(scott.linder): These calculations are duplicated from
5866 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5867 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5868 MCContext &Ctx = getContext();
5869
5870 const MCExpr *NumSGPRs = NextFreeSGPR;
5871 int64_t EvaluatedSGPRs;
5872
5873 if (Version.Major >= 10)
5875 else {
5876 unsigned MaxAddressableNumSGPRs =
5878
5879 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5880 !Features.test(FeatureSGPRInitBug) &&
5881 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5882 return OutOfRangeError(SGPRRange);
5883
5884 const MCExpr *ExtraSGPRs =
5885 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5886 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5887
5888 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5889 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5890 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5891 return OutOfRangeError(SGPRRange);
5892
5893 if (Features.test(FeatureSGPRInitBug))
5894 NumSGPRs =
5896 }
5897
5898 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5899 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5900 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5901 unsigned Granule) -> const MCExpr * {
5902 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5903 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5904 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5905 const MCExpr *AlignToGPR =
5906 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5907 const MCExpr *DivGPR =
5908 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5909 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5910 return SubGPR;
5911 };
5912
5913 VGPRBlocks = GetNumGPRBlocks(
5914 NextFreeVGPR,
5915 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5916 SGPRBlocks =
5917 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5918
5919 return false;
5920}
5921
5922bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5923 if (!getSTI().getTargetTriple().isAMDGCN())
5924 return TokError("directive only supported for amdgcn architecture");
5925
5926 if (!isHsaAbi(getSTI()))
5927 return TokError("directive only supported for amdhsa OS");
5928
5929 StringRef KernelName;
5930 if (getParser().parseIdentifier(KernelName))
5931 return true;
5932
5933 AMDGPU::MCKernelDescriptor KD =
5935 &getSTI(), getContext());
5936
5937 StringSet<> Seen;
5938
5939 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5940
5941 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5942 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5943
5944 SMRange VGPRRange;
5945 const MCExpr *NextFreeVGPR = ZeroExpr;
5946 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5947 const MCExpr *NamedBarCnt = ZeroExpr;
5948 uint64_t SharedVGPRCount = 0;
5949 uint64_t PreloadLength = 0;
5950 uint64_t PreloadOffset = 0;
5951 SMRange SGPRRange;
5952 const MCExpr *NextFreeSGPR = ZeroExpr;
5953
5954 // Count the number of user SGPRs implied from the enabled feature bits.
5955 unsigned ImpliedUserSGPRCount = 0;
5956
5957 // Track if the asm explicitly contains the directive for the user SGPR
5958 // count.
5959 std::optional<unsigned> ExplicitUserSGPRCount;
5960 const MCExpr *ReserveVCC = OneExpr;
5961 const MCExpr *ReserveFlatScr = OneExpr;
5962 std::optional<bool> EnableWavefrontSize32;
5963
5964 while (true) {
5965 while (trySkipToken(AsmToken::EndOfStatement));
5966
5967 StringRef ID;
5968 SMRange IDRange = getTok().getLocRange();
5969 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5970 return true;
5971
5972 if (ID == ".end_amdhsa_kernel")
5973 break;
5974
5975 if (!Seen.insert(ID).second)
5976 return TokError(".amdhsa_ directives cannot be repeated");
5977
5978 SMLoc ValStart = getLoc();
5979 const MCExpr *ExprVal;
5980 if (getParser().parseExpression(ExprVal))
5981 return true;
5982 SMLoc ValEnd = getLoc();
5983 SMRange ValRange = SMRange(ValStart, ValEnd);
5984
5985 int64_t IVal = 0;
5986 uint64_t Val = IVal;
5987 bool EvaluatableExpr;
5988 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5989 if (IVal < 0)
5990 return OutOfRangeError(ValRange);
5991 Val = IVal;
5992 }
5993
5994#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5995 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5996 return OutOfRangeError(RANGE); \
5997 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5998 getContext());
5999
6000// Some fields use the parsed value immediately which requires the expression to
6001// be solvable.
6002#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6003 if (!(RESOLVED)) \
6004 return Error(IDRange.Start, "directive should have resolvable expression", \
6005 IDRange);
6006
6007 if (ID == ".amdhsa_group_segment_fixed_size") {
6009 CHAR_BIT>(Val))
6010 return OutOfRangeError(ValRange);
6011 KD.group_segment_fixed_size = ExprVal;
6012 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6014 CHAR_BIT>(Val))
6015 return OutOfRangeError(ValRange);
6016 KD.private_segment_fixed_size = ExprVal;
6017 } else if (ID == ".amdhsa_kernarg_size") {
6018 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6019 return OutOfRangeError(ValRange);
6020 KD.kernarg_size = ExprVal;
6021 } else if (ID == ".amdhsa_user_sgpr_count") {
6022 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6023 ExplicitUserSGPRCount = Val;
6024 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6025 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6027 return Error(IDRange.Start,
6028 "directive is not supported with architected flat scratch",
6029 IDRange);
6031 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6032 ExprVal, ValRange);
6033 if (Val)
6034 ImpliedUserSGPRCount += 4;
6035 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6036 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6037 if (!hasKernargPreload())
6038 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6039
6040 if (Val > getMaxNumUserSGPRs())
6041 return OutOfRangeError(ValRange);
6042 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6043 ValRange);
6044 if (Val) {
6045 ImpliedUserSGPRCount += Val;
6046 PreloadLength = Val;
6047 }
6048 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6049 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6050 if (!hasKernargPreload())
6051 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6052
6053 if (Val >= 1024)
6054 return OutOfRangeError(ValRange);
6055 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6056 ValRange);
6057 if (Val)
6058 PreloadOffset = Val;
6059 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6060 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6062 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6063 ValRange);
6064 if (Val)
6065 ImpliedUserSGPRCount += 2;
6066 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6067 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6069 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6070 ValRange);
6071 if (Val)
6072 ImpliedUserSGPRCount += 2;
6073 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6074 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6077 ExprVal, ValRange);
6078 if (Val)
6079 ImpliedUserSGPRCount += 2;
6080 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6081 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6083 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6084 ValRange);
6085 if (Val)
6086 ImpliedUserSGPRCount += 2;
6087 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6089 return Error(IDRange.Start,
6090 "directive is not supported with architected flat scratch",
6091 IDRange);
6092 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6094 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6095 ExprVal, ValRange);
6096 if (Val)
6097 ImpliedUserSGPRCount += 2;
6098 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6099 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6101 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6102 ExprVal, ValRange);
6103 if (Val)
6104 ImpliedUserSGPRCount += 1;
6105 } else if (ID == ".amdhsa_wavefront_size32") {
6106 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6107 if (IVersion.Major < 10)
6108 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6109 EnableWavefrontSize32 = Val;
6111 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6112 ValRange);
6113 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6115 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6116 ValRange);
6117 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6119 return Error(IDRange.Start,
6120 "directive is not supported with architected flat scratch",
6121 IDRange);
6123 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6124 ValRange);
6125 } else if (ID == ".amdhsa_enable_private_segment") {
6127 return Error(
6128 IDRange.Start,
6129 "directive is not supported without architected flat scratch",
6130 IDRange);
6132 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6133 ValRange);
6134 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6136 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6137 ValRange);
6138 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6140 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6141 ValRange);
6142 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6144 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6145 ValRange);
6146 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6148 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6149 ValRange);
6150 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6152 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6153 ValRange);
6154 } else if (ID == ".amdhsa_next_free_vgpr") {
6155 VGPRRange = ValRange;
6156 NextFreeVGPR = ExprVal;
6157 } else if (ID == ".amdhsa_next_free_sgpr") {
6158 SGPRRange = ValRange;
6159 NextFreeSGPR = ExprVal;
6160 } else if (ID == ".amdhsa_accum_offset") {
6161 if (!isGFX90A())
6162 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6163 AccumOffset = ExprVal;
6164 } else if (ID == ".amdhsa_named_barrier_count") {
6165 if (!isGFX1250())
6166 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6167 NamedBarCnt = ExprVal;
6168 } else if (ID == ".amdhsa_reserve_vcc") {
6169 if (EvaluatableExpr && !isUInt<1>(Val))
6170 return OutOfRangeError(ValRange);
6171 ReserveVCC = ExprVal;
6172 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6173 if (IVersion.Major < 7)
6174 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6176 return Error(IDRange.Start,
6177 "directive is not supported with architected flat scratch",
6178 IDRange);
6179 if (EvaluatableExpr && !isUInt<1>(Val))
6180 return OutOfRangeError(ValRange);
6181 ReserveFlatScr = ExprVal;
6182 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6183 if (IVersion.Major < 8)
6184 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6185 if (!isUInt<1>(Val))
6186 return OutOfRangeError(ValRange);
6187 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6188 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6189 IDRange);
6190 } else if (ID == ".amdhsa_float_round_mode_32") {
6192 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6193 ValRange);
6194 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6196 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6197 ValRange);
6198 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6200 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6201 ValRange);
6202 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6204 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6205 ValRange);
6206 } else if (ID == ".amdhsa_dx10_clamp") {
6207 if (IVersion.Major >= 12)
6208 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6210 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6211 ValRange);
6212 } else if (ID == ".amdhsa_ieee_mode") {
6213 if (IVersion.Major >= 12)
6214 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6216 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6217 ValRange);
6218 } else if (ID == ".amdhsa_fp16_overflow") {
6219 if (IVersion.Major < 9)
6220 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6222 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6223 ValRange);
6224 } else if (ID == ".amdhsa_tg_split") {
6225 if (!isGFX90A())
6226 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6227 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6228 ExprVal, ValRange);
6229 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6230 if (!supportsWGP(getSTI()))
6231 return Error(IDRange.Start,
6232 "directive unsupported on " + getSTI().getCPU(), IDRange);
6234 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6235 ValRange);
6236 } else if (ID == ".amdhsa_memory_ordered") {
6237 if (IVersion.Major < 10)
6238 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6240 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6241 ValRange);
6242 } else if (ID == ".amdhsa_forward_progress") {
6243 if (IVersion.Major < 10)
6244 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6246 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6247 ValRange);
6248 } else if (ID == ".amdhsa_shared_vgpr_count") {
6249 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6250 if (IVersion.Major < 10 || IVersion.Major >= 12)
6251 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6252 IDRange);
6253 SharedVGPRCount = Val;
6255 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6256 ValRange);
6257 } else if (ID == ".amdhsa_inst_pref_size") {
6258 if (IVersion.Major < 11)
6259 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6260 if (IVersion.Major == 11) {
6262 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6263 ValRange);
6264 } else {
6266 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6267 ValRange);
6268 }
6269 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6272 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6273 ExprVal, ValRange);
6274 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6276 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6277 ExprVal, ValRange);
6278 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6281 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6282 ExprVal, ValRange);
6283 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6285 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6286 ExprVal, ValRange);
6287 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6289 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6290 ExprVal, ValRange);
6291 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6293 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6294 ExprVal, ValRange);
6295 } else if (ID == ".amdhsa_exception_int_div_zero") {
6297 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6298 ExprVal, ValRange);
6299 } else if (ID == ".amdhsa_round_robin_scheduling") {
6300 if (IVersion.Major < 12)
6301 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6303 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6304 ValRange);
6305 } else {
6306 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6307 }
6308
6309#undef PARSE_BITS_ENTRY
6310 }
6311
6312 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6313 return TokError(".amdhsa_next_free_vgpr directive is required");
6314
6315 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6316 return TokError(".amdhsa_next_free_sgpr directive is required");
6317
6318 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6319
6320 // Consider the case where the total number of UserSGPRs with trailing
6321 // allocated preload SGPRs, is greater than the number of explicitly
6322 // referenced SGPRs.
6323 if (PreloadLength) {
6324 MCContext &Ctx = getContext();
6325 NextFreeSGPR = AMDGPUMCExpr::createMax(
6326 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6327 }
6328
6329 const MCExpr *VGPRBlocks;
6330 const MCExpr *SGPRBlocks;
6331 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6332 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6333 EnableWavefrontSize32, NextFreeVGPR,
6334 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6335 SGPRBlocks))
6336 return true;
6337
6338 int64_t EvaluatedVGPRBlocks;
6339 bool VGPRBlocksEvaluatable =
6340 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6341 if (VGPRBlocksEvaluatable &&
6343 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6344 return OutOfRangeError(VGPRRange);
6345 }
6347 KD.compute_pgm_rsrc1, VGPRBlocks,
6348 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6349 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6350
6351 int64_t EvaluatedSGPRBlocks;
6352 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6354 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6355 return OutOfRangeError(SGPRRange);
6357 KD.compute_pgm_rsrc1, SGPRBlocks,
6358 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6359 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6360
6361 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6362 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6363 "enabled user SGPRs");
6364
6365 if (isGFX1250()) {
6367 return TokError("too many user SGPRs enabled");
6370 MCConstantExpr::create(UserSGPRCount, getContext()),
6371 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6372 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6373 } else {
6375 UserSGPRCount))
6376 return TokError("too many user SGPRs enabled");
6379 MCConstantExpr::create(UserSGPRCount, getContext()),
6380 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6381 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6382 }
6383
6384 int64_t IVal = 0;
6385 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6386 return TokError("Kernarg size should be resolvable");
6387 uint64_t kernarg_size = IVal;
6388 if (PreloadLength && kernarg_size &&
6389 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6390 return TokError("Kernarg preload length + offset is larger than the "
6391 "kernarg segment size");
6392
6393 if (isGFX90A()) {
6394 if (!Seen.contains(".amdhsa_accum_offset"))
6395 return TokError(".amdhsa_accum_offset directive is required");
6396 int64_t EvaluatedAccum;
6397 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6398 uint64_t UEvaluatedAccum = EvaluatedAccum;
6399 if (AccumEvaluatable &&
6400 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6401 return TokError("accum_offset should be in range [4..256] in "
6402 "increments of 4");
6403
6404 int64_t EvaluatedNumVGPR;
6405 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6406 AccumEvaluatable &&
6407 UEvaluatedAccum >
6408 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6409 return TokError("accum_offset exceeds total VGPR allocation");
6410 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6412 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6415 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6416 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6417 getContext());
6418 }
6419
6420 if (isGFX1250())
6422 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6423 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6424 getContext());
6425
6426 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6427 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6428 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6429 return TokError("shared_vgpr_count directive not valid on "
6430 "wavefront size 32");
6431 }
6432
6433 if (VGPRBlocksEvaluatable &&
6434 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6435 63)) {
6436 return TokError("shared_vgpr_count*2 + "
6437 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6438 "exceed 63\n");
6439 }
6440 }
6441
6442 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6443 NextFreeVGPR, NextFreeSGPR,
6444 ReserveVCC, ReserveFlatScr);
6445 return false;
6446}
6447
6448bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6449 uint32_t Version;
6450 if (ParseAsAbsoluteExpression(Version))
6451 return true;
6452
6453 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6454 return false;
6455}
6456
6457bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6458 AMDGPUMCKernelCodeT &C) {
6459 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6460 // assembly for backwards compatibility.
6461 if (ID == "max_scratch_backing_memory_byte_size") {
6462 Parser.eatToEndOfStatement();
6463 return false;
6464 }
6465
6466 SmallString<40> ErrStr;
6467 raw_svector_ostream Err(ErrStr);
6468 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6469 return TokError(Err.str());
6470 }
6471 Lex();
6472
6473 if (ID == "enable_wavefront_size32") {
6474 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6475 if (!isGFX10Plus())
6476 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6477 if (!isWave32())
6478 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6479 } else {
6480 if (!isWave64())
6481 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6482 }
6483 }
6484
6485 if (ID == "wavefront_size") {
6486 if (C.wavefront_size == 5) {
6487 if (!isGFX10Plus())
6488 return TokError("wavefront_size=5 is only allowed on GFX10+");
6489 if (!isWave32())
6490 return TokError("wavefront_size=5 requires +WavefrontSize32");
6491 } else if (C.wavefront_size == 6) {
6492 if (!isWave64())
6493 return TokError("wavefront_size=6 requires +WavefrontSize64");
6494 }
6495 }
6496
6497 return false;
6498}
6499
6500bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6501 AMDGPUMCKernelCodeT KernelCode;
6502 KernelCode.initDefault(&getSTI(), getContext());
6503
6504 while (true) {
6505 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6506 // will set the current token to EndOfStatement.
6507 while(trySkipToken(AsmToken::EndOfStatement));
6508
6509 StringRef ID;
6510 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6511 return true;
6512
6513 if (ID == ".end_amd_kernel_code_t")
6514 break;
6515
6516 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6517 return true;
6518 }
6519
6520 KernelCode.validate(&getSTI(), getContext());
6521 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6522
6523 return false;
6524}
6525
6526bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6527 StringRef KernelName;
6528 if (!parseId(KernelName, "expected symbol name"))
6529 return true;
6530
6531 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6533
6534 KernelScope.initialize(getContext());
6535 return false;
6536}
6537
6538bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6539 if (!getSTI().getTargetTriple().isAMDGCN()) {
6540 return Error(getLoc(),
6541 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6542 "architectures");
6543 }
6544
6545 auto TargetIDDirective = getLexer().getTok().getStringContents();
6546 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6547 return Error(getParser().getTok().getLoc(), "target id must match options");
6548
6549 getTargetStreamer().EmitISAVersion();
6550 Lex();
6551
6552 return false;
6553}
6554
6555bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6556 assert(isHsaAbi(getSTI()));
6557
6558 std::string HSAMetadataString;
6559 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6560 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6561 return true;
6562
6563 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6564 return Error(getLoc(), "invalid HSA metadata");
6565
6566 return false;
6567}
6568
6569/// Common code to parse out a block of text (typically YAML) between start and
6570/// end directives.
6571bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6572 const char *AssemblerDirectiveEnd,
6573 std::string &CollectString) {
6574
6575 raw_string_ostream CollectStream(CollectString);
6576
6577 getLexer().setSkipSpace(false);
6578
6579 bool FoundEnd = false;
6580 while (!isToken(AsmToken::Eof)) {
6581 while (isToken(AsmToken::Space)) {
6582 CollectStream << getTokenStr();
6583 Lex();
6584 }
6585
6586 if (trySkipId(AssemblerDirectiveEnd)) {
6587 FoundEnd = true;
6588 break;
6589 }
6590
6591 CollectStream << Parser.parseStringToEndOfStatement()
6592 << getContext().getAsmInfo()->getSeparatorString();
6593
6594 Parser.eatToEndOfStatement();
6595 }
6596
6597 getLexer().setSkipSpace(true);
6598
6599 if (isToken(AsmToken::Eof) && !FoundEnd) {
6600 return TokError(Twine("expected directive ") +
6601 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6602 }
6603
6604 return false;
6605}
6606
6607/// Parse the assembler directive for new MsgPack-format PAL metadata.
6608bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6609 std::string String;
6610 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6612 return true;
6613
6614 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6615 if (!PALMetadata->setFromString(String))
6616 return Error(getLoc(), "invalid PAL metadata");
6617 return false;
6618}
6619
6620/// Parse the assembler directive for old linear-format PAL metadata.
6621bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6622 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6623 return Error(getLoc(),
6624 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6625 "not available on non-amdpal OSes")).str());
6626 }
6627
6628 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6629 PALMetadata->setLegacy();
6630 for (;;) {
6631 uint32_t Key, Value;
6632 if (ParseAsAbsoluteExpression(Key)) {
6633 return TokError(Twine("invalid value in ") +
6635 }
6636 if (!trySkipToken(AsmToken::Comma)) {
6637 return TokError(Twine("expected an even number of values in ") +
6639 }
6640 if (ParseAsAbsoluteExpression(Value)) {
6641 return TokError(Twine("invalid value in ") +
6643 }
6644 PALMetadata->setRegister(Key, Value);
6645 if (!trySkipToken(AsmToken::Comma))
6646 break;
6647 }
6648 return false;
6649}
6650
6651/// ParseDirectiveAMDGPULDS
6652/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6653bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6654 if (getParser().checkForValidSection())
6655 return true;
6656
6657 StringRef Name;
6658 SMLoc NameLoc = getLoc();
6659 if (getParser().parseIdentifier(Name))
6660 return TokError("expected identifier in directive");
6661
6662 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6663 if (getParser().parseComma())
6664 return true;
6665
6666 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6667
6668 int64_t Size;
6669 SMLoc SizeLoc = getLoc();
6670 if (getParser().parseAbsoluteExpression(Size))
6671 return true;
6672 if (Size < 0)
6673 return Error(SizeLoc, "size must be non-negative");
6674 if (Size > LocalMemorySize)
6675 return Error(SizeLoc, "size is too large");
6676
6677 int64_t Alignment = 4;
6678 if (trySkipToken(AsmToken::Comma)) {
6679 SMLoc AlignLoc = getLoc();
6680 if (getParser().parseAbsoluteExpression(Alignment))
6681 return true;
6682 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6683 return Error(AlignLoc, "alignment must be a power of two");
6684
6685 // Alignment larger than the size of LDS is possible in theory, as long
6686 // as the linker manages to place to symbol at address 0, but we do want
6687 // to make sure the alignment fits nicely into a 32-bit integer.
6688 if (Alignment >= 1u << 31)
6689 return Error(AlignLoc, "alignment is too large");
6690 }
6691
6692 if (parseEOL())
6693 return true;
6694
6695 Symbol->redefineIfPossible();
6696 if (!Symbol->isUndefined())
6697 return Error(NameLoc, "invalid symbol redefinition");
6698
6699 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6700 return false;
6701}
6702
6703bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6704 StringRef IDVal = DirectiveID.getString();
6705
6706 if (isHsaAbi(getSTI())) {
6707 if (IDVal == ".amdhsa_kernel")
6708 return ParseDirectiveAMDHSAKernel();
6709
6710 if (IDVal == ".amdhsa_code_object_version")
6711 return ParseDirectiveAMDHSACodeObjectVersion();
6712
6713 // TODO: Restructure/combine with PAL metadata directive.
6715 return ParseDirectiveHSAMetadata();
6716 } else {
6717 if (IDVal == ".amd_kernel_code_t")
6718 return ParseDirectiveAMDKernelCodeT();
6719
6720 if (IDVal == ".amdgpu_hsa_kernel")
6721 return ParseDirectiveAMDGPUHsaKernel();
6722
6723 if (IDVal == ".amd_amdgpu_isa")
6724 return ParseDirectiveISAVersion();
6725
6727 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6728 Twine(" directive is "
6729 "not available on non-amdhsa OSes"))
6730 .str());
6731 }
6732 }
6733
6734 if (IDVal == ".amdgcn_target")
6735 return ParseDirectiveAMDGCNTarget();
6736
6737 if (IDVal == ".amdgpu_lds")
6738 return ParseDirectiveAMDGPULDS();
6739
6740 if (IDVal == PALMD::AssemblerDirectiveBegin)
6741 return ParseDirectivePALMetadataBegin();
6742
6743 if (IDVal == PALMD::AssemblerDirective)
6744 return ParseDirectivePALMetadata();
6745
6746 return true;
6747}
6748
6749bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6750 MCRegister Reg) {
6751 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6752 return isGFX9Plus();
6753
6754 // GFX10+ has 2 more SGPRs 104 and 105.
6755 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6756 return hasSGPR104_SGPR105();
6757
6758 switch (Reg.id()) {
6759 case SRC_SHARED_BASE_LO:
6760 case SRC_SHARED_BASE:
6761 case SRC_SHARED_LIMIT_LO:
6762 case SRC_SHARED_LIMIT:
6763 case SRC_PRIVATE_BASE_LO:
6764 case SRC_PRIVATE_BASE:
6765 case SRC_PRIVATE_LIMIT_LO:
6766 case SRC_PRIVATE_LIMIT:
6767 return isGFX9Plus();
6768 case SRC_FLAT_SCRATCH_BASE_LO:
6769 case SRC_FLAT_SCRATCH_BASE_HI:
6770 return hasGloballyAddressableScratch();
6771 case SRC_POPS_EXITING_WAVE_ID:
6772 return isGFX9Plus() && !isGFX11Plus();
6773 case TBA:
6774 case TBA_LO:
6775 case TBA_HI:
6776 case TMA:
6777 case TMA_LO:
6778 case TMA_HI:
6779 return !isGFX9Plus();
6780 case XNACK_MASK:
6781 case XNACK_MASK_LO:
6782 case XNACK_MASK_HI:
6783 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6784 case SGPR_NULL:
6785 return isGFX10Plus();
6786 case SRC_EXECZ:
6787 case SRC_VCCZ:
6788 return !isGFX11Plus();
6789 default:
6790 break;
6791 }
6792
6793 if (isCI())
6794 return true;
6795
6796 if (isSI() || isGFX10Plus()) {
6797 // No flat_scr on SI.
6798 // On GFX10Plus flat scratch is not a valid register operand and can only be
6799 // accessed with s_setreg/s_getreg.
6800 switch (Reg.id()) {
6801 case FLAT_SCR:
6802 case FLAT_SCR_LO:
6803 case FLAT_SCR_HI:
6804 return false;
6805 default:
6806 return true;
6807 }
6808 }
6809
6810 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6811 // SI/CI have.
6812 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6813 return hasSGPR102_SGPR103();
6814
6815 return true;
6816}
6817
6818ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6819 StringRef Mnemonic,
6820 OperandMode Mode) {
6821 ParseStatus Res = parseVOPD(Operands);
6822 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6823 return Res;
6824
6825 // Try to parse with a custom parser
6826 Res = MatchOperandParserImpl(Operands, Mnemonic);
6827
6828 // If we successfully parsed the operand or if there as an error parsing,
6829 // we are done.
6830 //
6831 // If we are parsing after we reach EndOfStatement then this means we
6832 // are appending default values to the Operands list. This is only done
6833 // by custom parser, so we shouldn't continue on to the generic parsing.
6834 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6835 return Res;
6836
6837 SMLoc RBraceLoc;
6838 SMLoc LBraceLoc = getLoc();
6839 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6840 unsigned Prefix = Operands.size();
6841
6842 for (;;) {
6843 auto Loc = getLoc();
6844 Res = parseReg(Operands);
6845 if (Res.isNoMatch())
6846 Error(Loc, "expected a register");
6847 if (!Res.isSuccess())
6848 return ParseStatus::Failure;
6849
6850 RBraceLoc = getLoc();
6851 if (trySkipToken(AsmToken::RBrac))
6852 break;
6853
6854 if (!skipToken(AsmToken::Comma,
6855 "expected a comma or a closing square bracket"))
6856 return ParseStatus::Failure;
6857 }
6858
6859 if (Operands.size() - Prefix > 1) {
6860 Operands.insert(Operands.begin() + Prefix,
6861 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6862 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6863 }
6864
6865 return ParseStatus::Success;
6866 }
6867
6868 return parseRegOrImm(Operands);
6869}
6870
6871StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6872 // Clear any forced encodings from the previous instruction.
6873 setForcedEncodingSize(0);
6874 setForcedDPP(false);
6875 setForcedSDWA(false);
6876
6877 if (Name.consume_back("_e64_dpp")) {
6878 setForcedDPP(true);
6879 setForcedEncodingSize(64);
6880 return Name;
6881 }
6882 if (Name.consume_back("_e64")) {
6883 setForcedEncodingSize(64);
6884 return Name;
6885 }
6886 if (Name.consume_back("_e32")) {
6887 setForcedEncodingSize(32);
6888 return Name;
6889 }
6890 if (Name.consume_back("_dpp")) {
6891 setForcedDPP(true);
6892 return Name;
6893 }
6894 if (Name.consume_back("_sdwa")) {
6895 setForcedSDWA(true);
6896 return Name;
6897 }
6898 return Name;
6899}
6900
6901static void applyMnemonicAliases(StringRef &Mnemonic,
6902 const FeatureBitset &Features,
6903 unsigned VariantID);
6904
6905bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6906 StringRef Name, SMLoc NameLoc,
6907 OperandVector &Operands) {
6908 // Add the instruction mnemonic
6909 Name = parseMnemonicSuffix(Name);
6910
6911 // If the target architecture uses MnemonicAlias, call it here to parse
6912 // operands correctly.
6913 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6914
6915 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6916
6917 bool IsMIMG = Name.starts_with("image_");
6918
6919 while (!trySkipToken(AsmToken::EndOfStatement)) {
6920 OperandMode Mode = OperandMode_Default;
6921 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6922 Mode = OperandMode_NSA;
6923 ParseStatus Res = parseOperand(Operands, Name, Mode);
6924
6925 if (!Res.isSuccess()) {
6926 checkUnsupportedInstruction(Name, NameLoc);
6927 if (!Parser.hasPendingError()) {
6928 // FIXME: use real operand location rather than the current location.
6929 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6930 : "not a valid operand.";
6931 Error(getLoc(), Msg);
6932 }
6933 while (!trySkipToken(AsmToken::EndOfStatement)) {
6934 lex();
6935 }
6936 return true;
6937 }
6938
6939 // Eat the comma or space if there is one.
6940 trySkipToken(AsmToken::Comma);
6941 }
6942
6943 return false;
6944}
6945
6946//===----------------------------------------------------------------------===//
6947// Utility functions
6948//===----------------------------------------------------------------------===//
6949
6950ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6951 OperandVector &Operands) {
6952 SMLoc S = getLoc();
6953 if (!trySkipId(Name))
6954 return ParseStatus::NoMatch;
6955
6956 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6957 return ParseStatus::Success;
6958}
6959
6960ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6961 int64_t &IntVal) {
6962
6963 if (!trySkipId(Prefix, AsmToken::Colon))
6964 return ParseStatus::NoMatch;
6965
6967}
6968
6969ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6970 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6971 std::function<bool(int64_t &)> ConvertResult) {
6972 SMLoc S = getLoc();
6973 int64_t Value = 0;
6974
6975 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6976 if (!Res.isSuccess())
6977 return Res;
6978
6979 if (ConvertResult && !ConvertResult(Value)) {
6980 Error(S, "invalid " + StringRef(Prefix) + " value.");
6981 }
6982
6983 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6984 return ParseStatus::Success;
6985}
6986
6987ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6988 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6989 bool (*ConvertResult)(int64_t &)) {
6990 SMLoc S = getLoc();
6991 if (!trySkipId(Prefix, AsmToken::Colon))
6992 return ParseStatus::NoMatch;
6993
6994 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6995 return ParseStatus::Failure;
6996
6997 unsigned Val = 0;
6998 const unsigned MaxSize = 4;
6999
7000 // FIXME: How to verify the number of elements matches the number of src
7001 // operands?
7002 for (int I = 0; ; ++I) {
7003 int64_t Op;
7004 SMLoc Loc = getLoc();
7005 if (!parseExpr(Op))
7006 return ParseStatus::Failure;
7007
7008 if (Op != 0 && Op != 1)
7009 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7010
7011 Val |= (Op << I);
7012
7013 if (trySkipToken(AsmToken::RBrac))
7014 break;
7015
7016 if (I + 1 == MaxSize)
7017 return Error(getLoc(), "expected a closing square bracket");
7018
7019 if (!skipToken(AsmToken::Comma, "expected a comma"))
7020 return ParseStatus::Failure;
7021 }
7022
7023 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7024 return ParseStatus::Success;
7025}
7026
7027ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7028 OperandVector &Operands,
7029 AMDGPUOperand::ImmTy ImmTy) {
7030 int64_t Bit;
7031 SMLoc S = getLoc();
7032
7033 if (trySkipId(Name)) {
7034 Bit = 1;
7035 } else if (trySkipId("no", Name)) {
7036 Bit = 0;
7037 } else {
7038 return ParseStatus::NoMatch;
7039 }
7040
7041 if (Name == "r128" && !hasMIMG_R128())
7042 return Error(S, "r128 modifier is not supported on this GPU");
7043 if (Name == "a16" && !hasA16())
7044 return Error(S, "a16 modifier is not supported on this GPU");
7045
7046 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7047 ImmTy = AMDGPUOperand::ImmTyR128A16;
7048
7049 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7050 return ParseStatus::Success;
7051}
7052
7053unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7054 bool &Disabling) const {
7055 Disabling = Id.consume_front("no");
7056
7057 if (isGFX940() && !Mnemo.starts_with("s_")) {
7058 return StringSwitch<unsigned>(Id)
7059 .Case("nt", AMDGPU::CPol::NT)
7060 .Case("sc0", AMDGPU::CPol::SC0)
7061 .Case("sc1", AMDGPU::CPol::SC1)
7062 .Default(0);
7063 }
7064
7065 return StringSwitch<unsigned>(Id)
7066 .Case("dlc", AMDGPU::CPol::DLC)
7067 .Case("glc", AMDGPU::CPol::GLC)
7068 .Case("scc", AMDGPU::CPol::SCC)
7069 .Case("slc", AMDGPU::CPol::SLC)
7070 .Default(0);
7071}
7072
7073ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7074 if (isGFX12Plus()) {
7075 SMLoc StringLoc = getLoc();
7076
7077 int64_t CPolVal = 0;
7078 ParseStatus ResTH = ParseStatus::NoMatch;
7079 ParseStatus ResScope = ParseStatus::NoMatch;
7080 ParseStatus ResNV = ParseStatus::NoMatch;
7081 ParseStatus ResScal = ParseStatus::NoMatch;
7082
7083 for (;;) {
7084 if (ResTH.isNoMatch()) {
7085 int64_t TH;
7086 ResTH = parseTH(Operands, TH);
7087 if (ResTH.isFailure())
7088 return ResTH;
7089 if (ResTH.isSuccess()) {
7090 CPolVal |= TH;
7091 continue;
7092 }
7093 }
7094
7095 if (ResScope.isNoMatch()) {
7096 int64_t Scope;
7097 ResScope = parseScope(Operands, Scope);
7098 if (ResScope.isFailure())
7099 return ResScope;
7100 if (ResScope.isSuccess()) {
7101 CPolVal |= Scope;
7102 continue;
7103 }
7104 }
7105
7106 // NV bit exists on GFX12+, but does something starting from GFX1250.
7107 // Allow parsing on all GFX12 and fail on validation for better
7108 // diagnostics.
7109 if (ResNV.isNoMatch()) {
7110 if (trySkipId("nv")) {
7111 ResNV = ParseStatus::Success;
7112 CPolVal |= CPol::NV;
7113 continue;
7114 } else if (trySkipId("no", "nv")) {
7115 ResNV = ParseStatus::Success;
7116 continue;
7117 }
7118 }
7119
7120 if (ResScal.isNoMatch()) {
7121 if (trySkipId("scale_offset")) {
7122 ResScal = ParseStatus::Success;
7123 CPolVal |= CPol::SCAL;
7124 continue;
7125 } else if (trySkipId("no", "scale_offset")) {
7126 ResScal = ParseStatus::Success;
7127 continue;
7128 }
7129 }
7130
7131 break;
7132 }
7133
7134 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7135 ResScal.isNoMatch())
7136 return ParseStatus::NoMatch;
7137
7138 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7139 AMDGPUOperand::ImmTyCPol));
7140 return ParseStatus::Success;
7141 }
7142
7143 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7144 SMLoc OpLoc = getLoc();
7145 unsigned Enabled = 0, Seen = 0;
7146 for (;;) {
7147 SMLoc S = getLoc();
7148 bool Disabling;
7149 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7150 if (!CPol)
7151 break;
7152
7153 lex();
7154
7155 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7156 return Error(S, "dlc modifier is not supported on this GPU");
7157
7158 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7159 return Error(S, "scc modifier is not supported on this GPU");
7160
7161 if (Seen & CPol)
7162 return Error(S, "duplicate cache policy modifier");
7163
7164 if (!Disabling)
7165 Enabled |= CPol;
7166
7167 Seen |= CPol;
7168 }
7169
7170 if (!Seen)
7171 return ParseStatus::NoMatch;
7172
7173 Operands.push_back(
7174 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7175 return ParseStatus::Success;
7176}
7177
7178ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7179 int64_t &Scope) {
7180 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7182
7183 ParseStatus Res = parseStringOrIntWithPrefix(
7184 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7185 Scope);
7186
7187 if (Res.isSuccess())
7188 Scope = Scopes[Scope];
7189
7190 return Res;
7191}
7192
7193ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7194 TH = AMDGPU::CPol::TH_RT; // default
7195
7196 StringRef Value;
7197 SMLoc StringLoc;
7198 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7199 if (!Res.isSuccess())
7200 return Res;
7201
7202 if (Value == "TH_DEFAULT")
7204 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7205 Value == "TH_LOAD_NT_WB") {
7206 return Error(StringLoc, "invalid th value");
7207 } else if (Value.consume_front("TH_ATOMIC_")) {
7209 } else if (Value.consume_front("TH_LOAD_")) {
7211 } else if (Value.consume_front("TH_STORE_")) {
7213 } else {
7214 return Error(StringLoc, "invalid th value");
7215 }
7216
7217 if (Value == "BYPASS")
7219
7220 if (TH != 0) {
7222 TH |= StringSwitch<int64_t>(Value)
7223 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7224 .Case("RT", AMDGPU::CPol::TH_RT)
7225 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7226 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7227 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7229 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7230 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7232 .Default(0xffffffff);
7233 else
7234 TH |= StringSwitch<int64_t>(Value)
7235 .Case("RT", AMDGPU::CPol::TH_RT)
7236 .Case("NT", AMDGPU::CPol::TH_NT)
7237 .Case("HT", AMDGPU::CPol::TH_HT)
7238 .Case("LU", AMDGPU::CPol::TH_LU)
7239 .Case("WB", AMDGPU::CPol::TH_WB)
7240 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7241 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7242 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7243 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7244 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7245 .Default(0xffffffff);
7246 }
7247
7248 if (TH == 0xffffffff)
7249 return Error(StringLoc, "invalid th value");
7250
7251 return ParseStatus::Success;
7252}
7253
7254static void
7256 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7257 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7258 std::optional<unsigned> InsertAt = std::nullopt) {
7259 auto i = OptionalIdx.find(ImmT);
7260 if (i != OptionalIdx.end()) {
7261 unsigned Idx = i->second;
7262 const AMDGPUOperand &Op =
7263 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7264 if (InsertAt)
7265 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7266 else
7267 Op.addImmOperands(Inst, 1);
7268 } else {
7269 if (InsertAt.has_value())
7270 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7271 else
7273 }
7274}
7275
7276ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7277 StringRef &Value,
7278 SMLoc &StringLoc) {
7279 if (!trySkipId(Prefix, AsmToken::Colon))
7280 return ParseStatus::NoMatch;
7281
7282 StringLoc = getLoc();
7283 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7285}
7286
7287ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7288 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7289 int64_t &IntVal) {
7290 if (!trySkipId(Name, AsmToken::Colon))
7291 return ParseStatus::NoMatch;
7292
7293 SMLoc StringLoc = getLoc();
7294
7295 StringRef Value;
7296 if (isToken(AsmToken::Identifier)) {
7297 Value = getTokenStr();
7298 lex();
7299
7300 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7301 if (Value == Ids[IntVal])
7302 break;
7303 } else if (!parseExpr(IntVal))
7304 return ParseStatus::Failure;
7305
7306 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7307 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7308
7309 return ParseStatus::Success;
7310}
7311
7312ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7313 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7314 AMDGPUOperand::ImmTy Type) {
7315 SMLoc S = getLoc();
7316 int64_t IntVal;
7317
7318 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7319 if (Res.isSuccess())
7320 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7321
7322 return Res;
7323}
7324
7325//===----------------------------------------------------------------------===//
7326// MTBUF format
7327//===----------------------------------------------------------------------===//
7328
7329bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7330 int64_t MaxVal,
7331 int64_t &Fmt) {
7332 int64_t Val;
7333 SMLoc Loc = getLoc();
7334
7335 auto Res = parseIntWithPrefix(Pref, Val);
7336 if (Res.isFailure())
7337 return false;
7338 if (Res.isNoMatch())
7339 return true;
7340
7341 if (Val < 0 || Val > MaxVal) {
7342 Error(Loc, Twine("out of range ", StringRef(Pref)));
7343 return false;
7344 }
7345
7346 Fmt = Val;
7347 return true;
7348}
7349
7350ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7351 AMDGPUOperand::ImmTy ImmTy) {
7352 const char *Pref = "index_key";
7353 int64_t ImmVal = 0;
7354 SMLoc Loc = getLoc();
7355 auto Res = parseIntWithPrefix(Pref, ImmVal);
7356 if (!Res.isSuccess())
7357 return Res;
7358
7359 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7360 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7361 (ImmVal < 0 || ImmVal > 1))
7362 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7363
7364 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7365 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7366
7367 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7368 return ParseStatus::Success;
7369}
7370
7371ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7372 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7373}
7374
7375ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7376 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7377}
7378
7379ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7380 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7381}
7382
7383ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7384 StringRef Name,
7385 AMDGPUOperand::ImmTy Type) {
7386 return parseStringOrIntWithPrefix(Operands, Name,
7387 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7388 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7389 "MATRIX_FMT_FP4"},
7390 Type);
7391}
7392
7393ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7394 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7395 AMDGPUOperand::ImmTyMatrixAFMT);
7396}
7397
7398ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7399 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7400 AMDGPUOperand::ImmTyMatrixBFMT);
7401}
7402
7403ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7404 StringRef Name,
7405 AMDGPUOperand::ImmTy Type) {
7406 return parseStringOrIntWithPrefix(
7407 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7408}
7409
7410ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7411 return tryParseMatrixScale(Operands, "matrix_a_scale",
7412 AMDGPUOperand::ImmTyMatrixAScale);
7413}
7414
7415ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7416 return tryParseMatrixScale(Operands, "matrix_b_scale",
7417 AMDGPUOperand::ImmTyMatrixBScale);
7418}
7419
7420ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7421 StringRef Name,
7422 AMDGPUOperand::ImmTy Type) {
7423 return parseStringOrIntWithPrefix(
7424 Operands, Name,
7425 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7426 Type);
7427}
7428
7429ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7430 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7431 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7432}
7433
7434ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7435 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7436 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7437}
7438
7439// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7440// values to live in a joint format operand in the MCInst encoding.
7441ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7442 using namespace llvm::AMDGPU::MTBUFFormat;
7443
7444 int64_t Dfmt = DFMT_UNDEF;
7445 int64_t Nfmt = NFMT_UNDEF;
7446
7447 // dfmt and nfmt can appear in either order, and each is optional.
7448 for (int I = 0; I < 2; ++I) {
7449 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7450 return ParseStatus::Failure;
7451
7452 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7453 return ParseStatus::Failure;
7454
7455 // Skip optional comma between dfmt/nfmt
7456 // but guard against 2 commas following each other.
7457 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7458 !peekToken().is(AsmToken::Comma)) {
7459 trySkipToken(AsmToken::Comma);
7460 }
7461 }
7462
7463 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7464 return ParseStatus::NoMatch;
7465
7466 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7467 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7468
7469 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7470 return ParseStatus::Success;
7471}
7472
7473ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7474 using namespace llvm::AMDGPU::MTBUFFormat;
7475
7476 int64_t Fmt = UFMT_UNDEF;
7477
7478 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7479 return ParseStatus::Failure;
7480
7481 if (Fmt == UFMT_UNDEF)
7482 return ParseStatus::NoMatch;
7483
7484 Format = Fmt;
7485 return ParseStatus::Success;
7486}
7487
7488bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7489 int64_t &Nfmt,
7490 StringRef FormatStr,
7491 SMLoc Loc) {
7492 using namespace llvm::AMDGPU::MTBUFFormat;
7493 int64_t Format;
7494
7495 Format = getDfmt(FormatStr);
7496 if (Format != DFMT_UNDEF) {
7497 Dfmt = Format;
7498 return true;
7499 }
7500
7501 Format = getNfmt(FormatStr, getSTI());
7502 if (Format != NFMT_UNDEF) {
7503 Nfmt = Format;
7504 return true;
7505 }
7506
7507 Error(Loc, "unsupported format");
7508 return false;
7509}
7510
7511ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7512 SMLoc FormatLoc,
7513 int64_t &Format) {
7514 using namespace llvm::AMDGPU::MTBUFFormat;
7515
7516 int64_t Dfmt = DFMT_UNDEF;
7517 int64_t Nfmt = NFMT_UNDEF;
7518 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7519 return ParseStatus::Failure;
7520
7521 if (trySkipToken(AsmToken::Comma)) {
7522 StringRef Str;
7523 SMLoc Loc = getLoc();
7524 if (!parseId(Str, "expected a format string") ||
7525 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7526 return ParseStatus::Failure;
7527 if (Dfmt == DFMT_UNDEF)
7528 return Error(Loc, "duplicate numeric format");
7529 if (Nfmt == NFMT_UNDEF)
7530 return Error(Loc, "duplicate data format");
7531 }
7532
7533 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7534 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7535
7536 if (isGFX10Plus()) {
7537 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7538 if (Ufmt == UFMT_UNDEF)
7539 return Error(FormatLoc, "unsupported format");
7540 Format = Ufmt;
7541 } else {
7542 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7543 }
7544
7545 return ParseStatus::Success;
7546}
7547
7548ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7549 SMLoc Loc,
7550 int64_t &Format) {
7551 using namespace llvm::AMDGPU::MTBUFFormat;
7552
7553 auto Id = getUnifiedFormat(FormatStr, getSTI());
7554 if (Id == UFMT_UNDEF)
7555 return ParseStatus::NoMatch;
7556
7557 if (!isGFX10Plus())
7558 return Error(Loc, "unified format is not supported on this GPU");
7559
7560 Format = Id;
7561 return ParseStatus::Success;
7562}
7563
7564ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7565 using namespace llvm::AMDGPU::MTBUFFormat;
7566 SMLoc Loc = getLoc();
7567
7568 if (!parseExpr(Format))
7569 return ParseStatus::Failure;
7570 if (!isValidFormatEncoding(Format, getSTI()))
7571 return Error(Loc, "out of range format");
7572
7573 return ParseStatus::Success;
7574}
7575
7576ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7577 using namespace llvm::AMDGPU::MTBUFFormat;
7578
7579 if (!trySkipId("format", AsmToken::Colon))
7580 return ParseStatus::NoMatch;
7581
7582 if (trySkipToken(AsmToken::LBrac)) {
7583 StringRef FormatStr;
7584 SMLoc Loc = getLoc();
7585 if (!parseId(FormatStr, "expected a format string"))
7586 return ParseStatus::Failure;
7587
7588 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7589 if (Res.isNoMatch())
7590 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7591 if (!Res.isSuccess())
7592 return Res;
7593
7594 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7595 return ParseStatus::Failure;
7596
7597 return ParseStatus::Success;
7598 }
7599
7600 return parseNumericFormat(Format);
7601}
7602
7603ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7604 using namespace llvm::AMDGPU::MTBUFFormat;
7605
7606 int64_t Format = getDefaultFormatEncoding(getSTI());
7607 ParseStatus Res;
7608 SMLoc Loc = getLoc();
7609
7610 // Parse legacy format syntax.
7611 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7612 if (Res.isFailure())
7613 return Res;
7614
7615 bool FormatFound = Res.isSuccess();
7616
7617 Operands.push_back(
7618 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7619
7620 if (FormatFound)
7621 trySkipToken(AsmToken::Comma);
7622
7623 if (isToken(AsmToken::EndOfStatement)) {
7624 // We are expecting an soffset operand,
7625 // but let matcher handle the error.
7626 return ParseStatus::Success;
7627 }
7628
7629 // Parse soffset.
7630 Res = parseRegOrImm(Operands);
7631 if (!Res.isSuccess())
7632 return Res;
7633
7634 trySkipToken(AsmToken::Comma);
7635
7636 if (!FormatFound) {
7637 Res = parseSymbolicOrNumericFormat(Format);
7638 if (Res.isFailure())
7639 return Res;
7640 if (Res.isSuccess()) {
7641 auto Size = Operands.size();
7642 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7643 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7644 Op.setImm(Format);
7645 }
7646 return ParseStatus::Success;
7647 }
7648
7649 if (isId("format") && peekToken().is(AsmToken::Colon))
7650 return Error(getLoc(), "duplicate format");
7651 return ParseStatus::Success;
7652}
7653
7654ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7655 ParseStatus Res =
7656 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7657 if (Res.isNoMatch()) {
7658 Res = parseIntWithPrefix("inst_offset", Operands,
7659 AMDGPUOperand::ImmTyInstOffset);
7660 }
7661 return Res;
7662}
7663
7664ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7665 ParseStatus Res =
7666 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7667 if (Res.isNoMatch())
7668 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7669 return Res;
7670}
7671
7672ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7673 ParseStatus Res =
7674 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7675 if (Res.isNoMatch()) {
7676 Res =
7677 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7678 }
7679 return Res;
7680}
7681
7682//===----------------------------------------------------------------------===//
7683// Exp
7684//===----------------------------------------------------------------------===//
7685
7686void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7687 OptionalImmIndexMap OptionalIdx;
7688
7689 unsigned OperandIdx[4];
7690 unsigned EnMask = 0;
7691 int SrcIdx = 0;
7692
7693 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7694 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7695
7696 // Add the register arguments
7697 if (Op.isReg()) {
7698 assert(SrcIdx < 4);
7699 OperandIdx[SrcIdx] = Inst.size();
7700 Op.addRegOperands(Inst, 1);
7701 ++SrcIdx;
7702 continue;
7703 }
7704
7705 if (Op.isOff()) {
7706 assert(SrcIdx < 4);
7707 OperandIdx[SrcIdx] = Inst.size();
7708 Inst.addOperand(MCOperand::createReg(MCRegister()));
7709 ++SrcIdx;
7710 continue;
7711 }
7712
7713 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7714 Op.addImmOperands(Inst, 1);
7715 continue;
7716 }
7717
7718 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7719 continue;
7720
7721 // Handle optional arguments
7722 OptionalIdx[Op.getImmTy()] = i;
7723 }
7724
7725 assert(SrcIdx == 4);
7726
7727 bool Compr = false;
7728 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7729 Compr = true;
7730 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7731 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7732 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7733 }
7734
7735 for (auto i = 0; i < SrcIdx; ++i) {
7736 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7737 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7738 }
7739 }
7740
7741 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7743
7744 Inst.addOperand(MCOperand::createImm(EnMask));
7745}
7746
7747//===----------------------------------------------------------------------===//
7748// s_waitcnt
7749//===----------------------------------------------------------------------===//
7750
7751static bool
7753 const AMDGPU::IsaVersion ISA,
7754 int64_t &IntVal,
7755 int64_t CntVal,
7756 bool Saturate,
7757 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7758 unsigned (*decode)(const IsaVersion &Version, unsigned))
7759{
7760 bool Failed = false;
7761
7762 IntVal = encode(ISA, IntVal, CntVal);
7763 if (CntVal != decode(ISA, IntVal)) {
7764 if (Saturate) {
7765 IntVal = encode(ISA, IntVal, -1);
7766 } else {
7767 Failed = true;
7768 }
7769 }
7770 return Failed;
7771}
7772
7773bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7774
7775 SMLoc CntLoc = getLoc();
7776 StringRef CntName = getTokenStr();
7777
7778 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7779 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7780 return false;
7781
7782 int64_t CntVal;
7783 SMLoc ValLoc = getLoc();
7784 if (!parseExpr(CntVal))
7785 return false;
7786
7787 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7788
7789 bool Failed = true;
7790 bool Sat = CntName.ends_with("_sat");
7791
7792 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7793 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7794 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7795 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7796 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7797 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7798 } else {
7799 Error(CntLoc, "invalid counter name " + CntName);
7800 return false;
7801 }
7802
7803 if (Failed) {
7804 Error(ValLoc, "too large value for " + CntName);
7805 return false;
7806 }
7807
7808 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7809 return false;
7810
7811 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7812 if (isToken(AsmToken::EndOfStatement)) {
7813 Error(getLoc(), "expected a counter name");
7814 return false;
7815 }
7816 }
7817
7818 return true;
7819}
7820
7821ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7822 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7823 int64_t Waitcnt = getWaitcntBitMask(ISA);
7824 SMLoc S = getLoc();
7825
7826 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7827 while (!isToken(AsmToken::EndOfStatement)) {
7828 if (!parseCnt(Waitcnt))
7829 return ParseStatus::Failure;
7830 }
7831 } else {
7832 if (!parseExpr(Waitcnt))
7833 return ParseStatus::Failure;
7834 }
7835
7836 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7837 return ParseStatus::Success;
7838}
7839
7840bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7841 SMLoc FieldLoc = getLoc();
7842 StringRef FieldName = getTokenStr();
7843 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7844 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7845 return false;
7846
7847 SMLoc ValueLoc = getLoc();
7848 StringRef ValueName = getTokenStr();
7849 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7850 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7851 return false;
7852
7853 unsigned Shift;
7854 if (FieldName == "instid0") {
7855 Shift = 0;
7856 } else if (FieldName == "instskip") {
7857 Shift = 4;
7858 } else if (FieldName == "instid1") {
7859 Shift = 7;
7860 } else {
7861 Error(FieldLoc, "invalid field name " + FieldName);
7862 return false;
7863 }
7864
7865 int Value;
7866 if (Shift == 4) {
7867 // Parse values for instskip.
7868 Value = StringSwitch<int>(ValueName)
7869 .Case("SAME", 0)
7870 .Case("NEXT", 1)
7871 .Case("SKIP_1", 2)
7872 .Case("SKIP_2", 3)
7873 .Case("SKIP_3", 4)
7874 .Case("SKIP_4", 5)
7875 .Default(-1);
7876 } else {
7877 // Parse values for instid0 and instid1.
7878 Value = StringSwitch<int>(ValueName)
7879 .Case("NO_DEP", 0)
7880 .Case("VALU_DEP_1", 1)
7881 .Case("VALU_DEP_2", 2)
7882 .Case("VALU_DEP_3", 3)
7883 .Case("VALU_DEP_4", 4)
7884 .Case("TRANS32_DEP_1", 5)
7885 .Case("TRANS32_DEP_2", 6)
7886 .Case("TRANS32_DEP_3", 7)
7887 .Case("FMA_ACCUM_CYCLE_1", 8)
7888 .Case("SALU_CYCLE_1", 9)
7889 .Case("SALU_CYCLE_2", 10)
7890 .Case("SALU_CYCLE_3", 11)
7891 .Default(-1);
7892 }
7893 if (Value < 0) {
7894 Error(ValueLoc, "invalid value name " + ValueName);
7895 return false;
7896 }
7897
7898 Delay |= Value << Shift;
7899 return true;
7900}
7901
7902ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7903 int64_t Delay = 0;
7904 SMLoc S = getLoc();
7905
7906 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7907 do {
7908 if (!parseDelay(Delay))
7909 return ParseStatus::Failure;
7910 } while (trySkipToken(AsmToken::Pipe));
7911 } else {
7912 if (!parseExpr(Delay))
7913 return ParseStatus::Failure;
7914 }
7915
7916 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7917 return ParseStatus::Success;
7918}
7919
7920bool
7921AMDGPUOperand::isSWaitCnt() const {
7922 return isImm();
7923}
7924
7925bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7926
7927//===----------------------------------------------------------------------===//
7928// DepCtr
7929//===----------------------------------------------------------------------===//
7930
7931void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7932 StringRef DepCtrName) {
7933 switch (ErrorId) {
7934 case OPR_ID_UNKNOWN:
7935 Error(Loc, Twine("invalid counter name ", DepCtrName));
7936 return;
7937 case OPR_ID_UNSUPPORTED:
7938 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7939 return;
7940 case OPR_ID_DUPLICATE:
7941 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7942 return;
7943 case OPR_VAL_INVALID:
7944 Error(Loc, Twine("invalid value for ", DepCtrName));
7945 return;
7946 default:
7947 assert(false);
7948 }
7949}
7950
7951bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7952
7953 using namespace llvm::AMDGPU::DepCtr;
7954
7955 SMLoc DepCtrLoc = getLoc();
7956 StringRef DepCtrName = getTokenStr();
7957
7958 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7959 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7960 return false;
7961
7962 int64_t ExprVal;
7963 if (!parseExpr(ExprVal))
7964 return false;
7965
7966 unsigned PrevOprMask = UsedOprMask;
7967 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7968
7969 if (CntVal < 0) {
7970 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7971 return false;
7972 }
7973
7974 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7975 return false;
7976
7977 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7978 if (isToken(AsmToken::EndOfStatement)) {
7979 Error(getLoc(), "expected a counter name");
7980 return false;
7981 }
7982 }
7983
7984 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7985 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7986 return true;
7987}
7988
7989ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7990 using namespace llvm::AMDGPU::DepCtr;
7991
7992 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7993 SMLoc Loc = getLoc();
7994
7995 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7996 unsigned UsedOprMask = 0;
7997 while (!isToken(AsmToken::EndOfStatement)) {
7998 if (!parseDepCtr(DepCtr, UsedOprMask))
7999 return ParseStatus::Failure;
8000 }
8001 } else {
8002 if (!parseExpr(DepCtr))
8003 return ParseStatus::Failure;
8004 }
8005
8006 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8007 return ParseStatus::Success;
8008}
8009
8010bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8011
8012//===----------------------------------------------------------------------===//
8013// hwreg
8014//===----------------------------------------------------------------------===//
8015
8016ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8017 OperandInfoTy &Offset,
8018 OperandInfoTy &Width) {
8019 using namespace llvm::AMDGPU::Hwreg;
8020
8021 if (!trySkipId("hwreg", AsmToken::LParen))
8022 return ParseStatus::NoMatch;
8023
8024 // The register may be specified by name or using a numeric code
8025 HwReg.Loc = getLoc();
8026 if (isToken(AsmToken::Identifier) &&
8027 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8028 HwReg.IsSymbolic = true;
8029 lex(); // skip register name
8030 } else if (!parseExpr(HwReg.Val, "a register name")) {
8031 return ParseStatus::Failure;
8032 }
8033
8034 if (trySkipToken(AsmToken::RParen))
8035 return ParseStatus::Success;
8036
8037 // parse optional params
8038 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8039 return ParseStatus::Failure;
8040
8041 Offset.Loc = getLoc();
8042 if (!parseExpr(Offset.Val))
8043 return ParseStatus::Failure;
8044
8045 if (!skipToken(AsmToken::Comma, "expected a comma"))
8046 return ParseStatus::Failure;
8047
8048 Width.Loc = getLoc();
8049 if (!parseExpr(Width.Val) ||
8050 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8051 return ParseStatus::Failure;
8052
8053 return ParseStatus::Success;
8054}
8055
8056ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8057 using namespace llvm::AMDGPU::Hwreg;
8058
8059 int64_t ImmVal = 0;
8060 SMLoc Loc = getLoc();
8061
8062 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8063 HwregId::Default);
8064 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8065 HwregOffset::Default);
8066 struct : StructuredOpField {
8067 using StructuredOpField::StructuredOpField;
8068 bool validate(AMDGPUAsmParser &Parser) const override {
8069 if (!isUIntN(Width, Val - 1))
8070 return Error(Parser, "only values from 1 to 32 are legal");
8071 return true;
8072 }
8073 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8074 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8075
8076 if (Res.isNoMatch())
8077 Res = parseHwregFunc(HwReg, Offset, Width);
8078
8079 if (Res.isSuccess()) {
8080 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8081 return ParseStatus::Failure;
8082 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8083 }
8084
8085 if (Res.isNoMatch() &&
8086 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8088
8089 if (!Res.isSuccess())
8090 return ParseStatus::Failure;
8091
8092 if (!isUInt<16>(ImmVal))
8093 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8094 Operands.push_back(
8095 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8096 return ParseStatus::Success;
8097}
8098
8099bool AMDGPUOperand::isHwreg() const {
8100 return isImmTy(ImmTyHwreg);
8101}
8102
8103//===----------------------------------------------------------------------===//
8104// sendmsg
8105//===----------------------------------------------------------------------===//
8106
8107bool
8108AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8109 OperandInfoTy &Op,
8110 OperandInfoTy &Stream) {
8111 using namespace llvm::AMDGPU::SendMsg;
8112
8113 Msg.Loc = getLoc();
8114 if (isToken(AsmToken::Identifier) &&
8115 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8116 Msg.IsSymbolic = true;
8117 lex(); // skip message name
8118 } else if (!parseExpr(Msg.Val, "a message name")) {
8119 return false;
8120 }
8121
8122 if (trySkipToken(AsmToken::Comma)) {
8123 Op.IsDefined = true;
8124 Op.Loc = getLoc();
8125 if (isToken(AsmToken::Identifier) &&
8126 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8128 lex(); // skip operation name
8129 } else if (!parseExpr(Op.Val, "an operation name")) {
8130 return false;
8131 }
8132
8133 if (trySkipToken(AsmToken::Comma)) {
8134 Stream.IsDefined = true;
8135 Stream.Loc = getLoc();
8136 if (!parseExpr(Stream.Val))
8137 return false;
8138 }
8139 }
8140
8141 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8142}
8143
8144bool
8145AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8146 const OperandInfoTy &Op,
8147 const OperandInfoTy &Stream) {
8148 using namespace llvm::AMDGPU::SendMsg;
8149
8150 // Validation strictness depends on whether message is specified
8151 // in a symbolic or in a numeric form. In the latter case
8152 // only encoding possibility is checked.
8153 bool Strict = Msg.IsSymbolic;
8154
8155 if (Strict) {
8156 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8157 Error(Msg.Loc, "specified message id is not supported on this GPU");
8158 return false;
8159 }
8160 } else {
8161 if (!isValidMsgId(Msg.Val, getSTI())) {
8162 Error(Msg.Loc, "invalid message id");
8163 return false;
8164 }
8165 }
8166 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8167 if (Op.IsDefined) {
8168 Error(Op.Loc, "message does not support operations");
8169 } else {
8170 Error(Msg.Loc, "missing message operation");
8171 }
8172 return false;
8173 }
8174 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8175 if (Op.Val == OPR_ID_UNSUPPORTED)
8176 Error(Op.Loc, "specified operation id is not supported on this GPU");
8177 else
8178 Error(Op.Loc, "invalid operation id");
8179 return false;
8180 }
8181 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8182 Stream.IsDefined) {
8183 Error(Stream.Loc, "message operation does not support streams");
8184 return false;
8185 }
8186 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8187 Error(Stream.Loc, "invalid message stream id");
8188 return false;
8189 }
8190 return true;
8191}
8192
8193ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8194 using namespace llvm::AMDGPU::SendMsg;
8195
8196 int64_t ImmVal = 0;
8197 SMLoc Loc = getLoc();
8198
8199 if (trySkipId("sendmsg", AsmToken::LParen)) {
8200 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8201 OperandInfoTy Op(OP_NONE_);
8202 OperandInfoTy Stream(STREAM_ID_NONE_);
8203 if (parseSendMsgBody(Msg, Op, Stream) &&
8204 validateSendMsg(Msg, Op, Stream)) {
8205 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8206 } else {
8207 return ParseStatus::Failure;
8208 }
8209 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8210 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8211 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8212 } else {
8213 return ParseStatus::Failure;
8214 }
8215
8216 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8217 return ParseStatus::Success;
8218}
8219
8220bool AMDGPUOperand::isSendMsg() const {
8221 return isImmTy(ImmTySendMsg);
8222}
8223
8224//===----------------------------------------------------------------------===//
8225// v_interp
8226//===----------------------------------------------------------------------===//
8227
8228ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8229 StringRef Str;
8230 SMLoc S = getLoc();
8231
8232 if (!parseId(Str))
8233 return ParseStatus::NoMatch;
8234
8235 int Slot = StringSwitch<int>(Str)
8236 .Case("p10", 0)
8237 .Case("p20", 1)
8238 .Case("p0", 2)
8239 .Default(-1);
8240
8241 if (Slot == -1)
8242 return Error(S, "invalid interpolation slot");
8243
8244 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8245 AMDGPUOperand::ImmTyInterpSlot));
8246 return ParseStatus::Success;
8247}
8248
8249ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8250 StringRef Str;
8251 SMLoc S = getLoc();
8252
8253 if (!parseId(Str))
8254 return ParseStatus::NoMatch;
8255
8256 if (!Str.starts_with("attr"))
8257 return Error(S, "invalid interpolation attribute");
8258
8259 StringRef Chan = Str.take_back(2);
8260 int AttrChan = StringSwitch<int>(Chan)
8261 .Case(".x", 0)
8262 .Case(".y", 1)
8263 .Case(".z", 2)
8264 .Case(".w", 3)
8265 .Default(-1);
8266 if (AttrChan == -1)
8267 return Error(S, "invalid or missing interpolation attribute channel");
8268
8269 Str = Str.drop_back(2).drop_front(4);
8270
8271 uint8_t Attr;
8272 if (Str.getAsInteger(10, Attr))
8273 return Error(S, "invalid or missing interpolation attribute number");
8274
8275 if (Attr > 32)
8276 return Error(S, "out of bounds interpolation attribute number");
8277
8278 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8279
8280 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8281 AMDGPUOperand::ImmTyInterpAttr));
8282 Operands.push_back(AMDGPUOperand::CreateImm(
8283 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8284 return ParseStatus::Success;
8285}
8286
8287//===----------------------------------------------------------------------===//
8288// exp
8289//===----------------------------------------------------------------------===//
8290
8291ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8292 using namespace llvm::AMDGPU::Exp;
8293
8294 StringRef Str;
8295 SMLoc S = getLoc();
8296
8297 if (!parseId(Str))
8298 return ParseStatus::NoMatch;
8299
8300 unsigned Id = getTgtId(Str);
8301 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8302 return Error(S, (Id == ET_INVALID)
8303 ? "invalid exp target"
8304 : "exp target is not supported on this GPU");
8305
8306 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8307 AMDGPUOperand::ImmTyExpTgt));
8308 return ParseStatus::Success;
8309}
8310
8311//===----------------------------------------------------------------------===//
8312// parser helpers
8313//===----------------------------------------------------------------------===//
8314
8315bool
8316AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8317 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8318}
8319
8320bool
8321AMDGPUAsmParser::isId(const StringRef Id) const {
8322 return isId(getToken(), Id);
8323}
8324
8325bool
8326AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8327 return getTokenKind() == Kind;
8328}
8329
8330StringRef AMDGPUAsmParser::getId() const {
8331 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8332}
8333
8334bool
8335AMDGPUAsmParser::trySkipId(const StringRef Id) {
8336 if (isId(Id)) {
8337 lex();
8338 return true;
8339 }
8340 return false;
8341}
8342
8343bool
8344AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8345 if (isToken(AsmToken::Identifier)) {
8346 StringRef Tok = getTokenStr();
8347 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8348 lex();
8349 return true;
8350 }
8351 }
8352 return false;
8353}
8354
8355bool
8356AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8357 if (isId(Id) && peekToken().is(Kind)) {
8358 lex();
8359 lex();
8360 return true;
8361 }
8362 return false;
8363}
8364
8365bool
8366AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8367 if (isToken(Kind)) {
8368 lex();
8369 return true;
8370 }
8371 return false;
8372}
8373
8374bool
8375AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8376 const StringRef ErrMsg) {
8377 if (!trySkipToken(Kind)) {
8378 Error(getLoc(), ErrMsg);
8379 return false;
8380 }
8381 return true;
8382}
8383
8384bool
8385AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8386 SMLoc S = getLoc();
8387
8388 const MCExpr *Expr;
8389 if (Parser.parseExpression(Expr))
8390 return false;
8391
8392 if (Expr->evaluateAsAbsolute(Imm))
8393 return true;
8394
8395 if (Expected.empty()) {
8396 Error(S, "expected absolute expression");
8397 } else {
8398 Error(S, Twine("expected ", Expected) +
8399 Twine(" or an absolute expression"));
8400 }
8401 return false;
8402}
8403
8404bool
8405AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8406 SMLoc S = getLoc();
8407
8408 const MCExpr *Expr;
8409 if (Parser.parseExpression(Expr))
8410 return false;
8411
8412 int64_t IntVal;
8413 if (Expr->evaluateAsAbsolute(IntVal)) {
8414 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8415 } else {
8416 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8417 }
8418 return true;
8419}
8420
8421bool
8422AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8423 if (isToken(AsmToken::String)) {
8424 Val = getToken().getStringContents();
8425 lex();
8426 return true;
8427 }
8428 Error(getLoc(), ErrMsg);
8429 return false;
8430}
8431
8432bool
8433AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8434 if (isToken(AsmToken::Identifier)) {
8435 Val = getTokenStr();
8436 lex();
8437 return true;
8438 }
8439 if (!ErrMsg.empty())
8440 Error(getLoc(), ErrMsg);
8441 return false;
8442}
8443
8444AsmToken
8445AMDGPUAsmParser::getToken() const {
8446 return Parser.getTok();
8447}
8448
8449AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8450 return isToken(AsmToken::EndOfStatement)
8451 ? getToken()
8452 : getLexer().peekTok(ShouldSkipSpace);
8453}
8454
8455void
8456AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8457 auto TokCount = getLexer().peekTokens(Tokens);
8458
8459 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8460 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8461}
8462
8464AMDGPUAsmParser::getTokenKind() const {
8465 return getLexer().getKind();
8466}
8467
8468SMLoc
8469AMDGPUAsmParser::getLoc() const {
8470 return getToken().getLoc();
8471}
8472
8473StringRef
8474AMDGPUAsmParser::getTokenStr() const {
8475 return getToken().getString();
8476}
8477
8478void
8479AMDGPUAsmParser::lex() {
8480 Parser.Lex();
8481}
8482
8483SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8484 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8485}
8486
8487// Returns one of the given locations that comes later in the source.
8488SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8489 return a.getPointer() < b.getPointer() ? b : a;
8490}
8491
8492SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8493 int MCOpIdx) const {
8494 for (const auto &Op : Operands) {
8495 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8496 if (TargetOp.getMCOpIdx() == MCOpIdx)
8497 return TargetOp.getStartLoc();
8498 }
8499 llvm_unreachable("No such MC operand!");
8500}
8501
8502SMLoc
8503AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8504 const OperandVector &Operands) const {
8505 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8506 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8507 if (Test(Op))
8508 return Op.getStartLoc();
8509 }
8510 return getInstLoc(Operands);
8511}
8512
8513SMLoc
8514AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8515 const OperandVector &Operands) const {
8516 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8517 return getOperandLoc(Test, Operands);
8518}
8519
8520ParseStatus
8521AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8522 if (!trySkipToken(AsmToken::LCurly))
8523 return ParseStatus::NoMatch;
8524
8525 bool First = true;
8526 while (!trySkipToken(AsmToken::RCurly)) {
8527 if (!First &&
8528 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8529 return ParseStatus::Failure;
8530
8531 StringRef Id = getTokenStr();
8532 SMLoc IdLoc = getLoc();
8533 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8534 !skipToken(AsmToken::Colon, "colon expected"))
8535 return ParseStatus::Failure;
8536
8537 const auto *I =
8538 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8539 if (I == Fields.end())
8540 return Error(IdLoc, "unknown field");
8541 if ((*I)->IsDefined)
8542 return Error(IdLoc, "duplicate field");
8543
8544 // TODO: Support symbolic values.
8545 (*I)->Loc = getLoc();
8546 if (!parseExpr((*I)->Val))
8547 return ParseStatus::Failure;
8548 (*I)->IsDefined = true;
8549
8550 First = false;
8551 }
8552 return ParseStatus::Success;
8553}
8554
8555bool AMDGPUAsmParser::validateStructuredOpFields(
8557 return all_of(Fields, [this](const StructuredOpField *F) {
8558 return F->validate(*this);
8559 });
8560}
8561
8562//===----------------------------------------------------------------------===//
8563// swizzle
8564//===----------------------------------------------------------------------===//
8565
8567static unsigned
8568encodeBitmaskPerm(const unsigned AndMask,
8569 const unsigned OrMask,
8570 const unsigned XorMask) {
8571 using namespace llvm::AMDGPU::Swizzle;
8572
8573 return BITMASK_PERM_ENC |
8574 (AndMask << BITMASK_AND_SHIFT) |
8575 (OrMask << BITMASK_OR_SHIFT) |
8576 (XorMask << BITMASK_XOR_SHIFT);
8577}
8578
8579bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8580 const unsigned MaxVal,
8581 const Twine &ErrMsg, SMLoc &Loc) {
8582 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8583 return false;
8584 }
8585 Loc = getLoc();
8586 if (!parseExpr(Op)) {
8587 return false;
8588 }
8589 if (Op < MinVal || Op > MaxVal) {
8590 Error(Loc, ErrMsg);
8591 return false;
8592 }
8593
8594 return true;
8595}
8596
8597bool
8598AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8599 const unsigned MinVal,
8600 const unsigned MaxVal,
8601 const StringRef ErrMsg) {
8602 SMLoc Loc;
8603 for (unsigned i = 0; i < OpNum; ++i) {
8604 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8605 return false;
8606 }
8607
8608 return true;
8609}
8610
8611bool
8612AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8613 using namespace llvm::AMDGPU::Swizzle;
8614
8615 int64_t Lane[LANE_NUM];
8616 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8617 "expected a 2-bit lane id")) {
8619 for (unsigned I = 0; I < LANE_NUM; ++I) {
8620 Imm |= Lane[I] << (LANE_SHIFT * I);
8621 }
8622 return true;
8623 }
8624 return false;
8625}
8626
8627bool
8628AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8629 using namespace llvm::AMDGPU::Swizzle;
8630
8631 SMLoc Loc;
8632 int64_t GroupSize;
8633 int64_t LaneIdx;
8634
8635 if (!parseSwizzleOperand(GroupSize,
8636 2, 32,
8637 "group size must be in the interval [2,32]",
8638 Loc)) {
8639 return false;
8640 }
8641 if (!isPowerOf2_64(GroupSize)) {
8642 Error(Loc, "group size must be a power of two");
8643 return false;
8644 }
8645 if (parseSwizzleOperand(LaneIdx,
8646 0, GroupSize - 1,
8647 "lane id must be in the interval [0,group size - 1]",
8648 Loc)) {
8649 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8650 return true;
8651 }
8652 return false;
8653}
8654
8655bool
8656AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8657 using namespace llvm::AMDGPU::Swizzle;
8658
8659 SMLoc Loc;
8660 int64_t GroupSize;
8661
8662 if (!parseSwizzleOperand(GroupSize,
8663 2, 32,
8664 "group size must be in the interval [2,32]",
8665 Loc)) {
8666 return false;
8667 }
8668 if (!isPowerOf2_64(GroupSize)) {
8669 Error(Loc, "group size must be a power of two");
8670 return false;
8671 }
8672
8673 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8674 return true;
8675}
8676
8677bool
8678AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8679 using namespace llvm::AMDGPU::Swizzle;
8680
8681 SMLoc Loc;
8682 int64_t GroupSize;
8683
8684 if (!parseSwizzleOperand(GroupSize,
8685 1, 16,
8686 "group size must be in the interval [1,16]",
8687 Loc)) {
8688 return false;
8689 }
8690 if (!isPowerOf2_64(GroupSize)) {
8691 Error(Loc, "group size must be a power of two");
8692 return false;
8693 }
8694
8695 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8696 return true;
8697}
8698
8699bool
8700AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8701 using namespace llvm::AMDGPU::Swizzle;
8702
8703 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8704 return false;
8705 }
8706
8707 StringRef Ctl;
8708 SMLoc StrLoc = getLoc();
8709 if (!parseString(Ctl)) {
8710 return false;
8711 }
8712 if (Ctl.size() != BITMASK_WIDTH) {
8713 Error(StrLoc, "expected a 5-character mask");
8714 return false;
8715 }
8716
8717 unsigned AndMask = 0;
8718 unsigned OrMask = 0;
8719 unsigned XorMask = 0;
8720
8721 for (size_t i = 0; i < Ctl.size(); ++i) {
8722 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8723 switch(Ctl[i]) {
8724 default:
8725 Error(StrLoc, "invalid mask");
8726 return false;
8727 case '0':
8728 break;
8729 case '1':
8730 OrMask |= Mask;
8731 break;
8732 case 'p':
8733 AndMask |= Mask;
8734 break;
8735 case 'i':
8736 AndMask |= Mask;
8737 XorMask |= Mask;
8738 break;
8739 }
8740 }
8741
8742 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8743 return true;
8744}
8745
8746bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8747 using namespace llvm::AMDGPU::Swizzle;
8748
8749 if (!AMDGPU::isGFX9Plus(getSTI())) {
8750 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8751 return false;
8752 }
8753
8754 int64_t Swizzle;
8755 SMLoc Loc;
8756 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8757 "FFT swizzle must be in the interval [0," +
8758 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8759 Loc))
8760 return false;
8761
8762 Imm = FFT_MODE_ENC | Swizzle;
8763 return true;
8764}
8765
8766bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8767 using namespace llvm::AMDGPU::Swizzle;
8768
8769 if (!AMDGPU::isGFX9Plus(getSTI())) {
8770 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8771 return false;
8772 }
8773
8774 SMLoc Loc;
8775 int64_t Direction;
8776
8777 if (!parseSwizzleOperand(Direction, 0, 1,
8778 "direction must be 0 (left) or 1 (right)", Loc))
8779 return false;
8780
8781 int64_t RotateSize;
8782 if (!parseSwizzleOperand(
8783 RotateSize, 0, ROTATE_MAX_SIZE,
8784 "number of threads to rotate must be in the interval [0," +
8785 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8786 Loc))
8787 return false;
8788
8790 (RotateSize << ROTATE_SIZE_SHIFT);
8791 return true;
8792}
8793
8794bool
8795AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8796
8797 SMLoc OffsetLoc = getLoc();
8798
8799 if (!parseExpr(Imm, "a swizzle macro")) {
8800 return false;
8801 }
8802 if (!isUInt<16>(Imm)) {
8803 Error(OffsetLoc, "expected a 16-bit offset");
8804 return false;
8805 }
8806 return true;
8807}
8808
8809bool
8810AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8811 using namespace llvm::AMDGPU::Swizzle;
8812
8813 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8814
8815 SMLoc ModeLoc = getLoc();
8816 bool Ok = false;
8817
8818 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8819 Ok = parseSwizzleQuadPerm(Imm);
8820 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8821 Ok = parseSwizzleBitmaskPerm(Imm);
8822 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8823 Ok = parseSwizzleBroadcast(Imm);
8824 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8825 Ok = parseSwizzleSwap(Imm);
8826 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8827 Ok = parseSwizzleReverse(Imm);
8828 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8829 Ok = parseSwizzleFFT(Imm);
8830 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8831 Ok = parseSwizzleRotate(Imm);
8832 } else {
8833 Error(ModeLoc, "expected a swizzle mode");
8834 }
8835
8836 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8837 }
8838
8839 return false;
8840}
8841
8842ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8843 SMLoc S = getLoc();
8844 int64_t Imm = 0;
8845
8846 if (trySkipId("offset")) {
8847
8848 bool Ok = false;
8849 if (skipToken(AsmToken::Colon, "expected a colon")) {
8850 if (trySkipId("swizzle")) {
8851 Ok = parseSwizzleMacro(Imm);
8852 } else {
8853 Ok = parseSwizzleOffset(Imm);
8854 }
8855 }
8856
8857 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8858
8860 }
8861 return ParseStatus::NoMatch;
8862}
8863
8864bool
8865AMDGPUOperand::isSwizzle() const {
8866 return isImmTy(ImmTySwizzle);
8867}
8868
8869//===----------------------------------------------------------------------===//
8870// VGPR Index Mode
8871//===----------------------------------------------------------------------===//
8872
8873int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8874
8875 using namespace llvm::AMDGPU::VGPRIndexMode;
8876
8877 if (trySkipToken(AsmToken::RParen)) {
8878 return OFF;
8879 }
8880
8881 int64_t Imm = 0;
8882
8883 while (true) {
8884 unsigned Mode = 0;
8885 SMLoc S = getLoc();
8886
8887 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8888 if (trySkipId(IdSymbolic[ModeId])) {
8889 Mode = 1 << ModeId;
8890 break;
8891 }
8892 }
8893
8894 if (Mode == 0) {
8895 Error(S, (Imm == 0)?
8896 "expected a VGPR index mode or a closing parenthesis" :
8897 "expected a VGPR index mode");
8898 return UNDEF;
8899 }
8900
8901 if (Imm & Mode) {
8902 Error(S, "duplicate VGPR index mode");
8903 return UNDEF;
8904 }
8905 Imm |= Mode;
8906
8907 if (trySkipToken(AsmToken::RParen))
8908 break;
8909 if (!skipToken(AsmToken::Comma,
8910 "expected a comma or a closing parenthesis"))
8911 return UNDEF;
8912 }
8913
8914 return Imm;
8915}
8916
8917ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8918
8919 using namespace llvm::AMDGPU::VGPRIndexMode;
8920
8921 int64_t Imm = 0;
8922 SMLoc S = getLoc();
8923
8924 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8925 Imm = parseGPRIdxMacro();
8926 if (Imm == UNDEF)
8927 return ParseStatus::Failure;
8928 } else {
8929 if (getParser().parseAbsoluteExpression(Imm))
8930 return ParseStatus::Failure;
8931 if (Imm < 0 || !isUInt<4>(Imm))
8932 return Error(S, "invalid immediate: only 4-bit values are legal");
8933 }
8934
8935 Operands.push_back(
8936 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8937 return ParseStatus::Success;
8938}
8939
8940bool AMDGPUOperand::isGPRIdxMode() const {
8941 return isImmTy(ImmTyGprIdxMode);
8942}
8943
8944//===----------------------------------------------------------------------===//
8945// sopp branch targets
8946//===----------------------------------------------------------------------===//
8947
8948ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8949
8950 // Make sure we are not parsing something
8951 // that looks like a label or an expression but is not.
8952 // This will improve error messages.
8953 if (isRegister() || isModifier())
8954 return ParseStatus::NoMatch;
8955
8956 if (!parseExpr(Operands))
8957 return ParseStatus::Failure;
8958
8959 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8960 assert(Opr.isImm() || Opr.isExpr());
8961 SMLoc Loc = Opr.getStartLoc();
8962
8963 // Currently we do not support arbitrary expressions as branch targets.
8964 // Only labels and absolute expressions are accepted.
8965 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8966 Error(Loc, "expected an absolute expression or a label");
8967 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8968 Error(Loc, "expected a 16-bit signed jump offset");
8969 }
8970
8971 return ParseStatus::Success;
8972}
8973
8974//===----------------------------------------------------------------------===//
8975// Boolean holding registers
8976//===----------------------------------------------------------------------===//
8977
8978ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8979 return parseReg(Operands);
8980}
8981
8982//===----------------------------------------------------------------------===//
8983// mubuf
8984//===----------------------------------------------------------------------===//
8985
8986void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8987 const OperandVector &Operands,
8988 bool IsAtomic) {
8989 OptionalImmIndexMap OptionalIdx;
8990 unsigned FirstOperandIdx = 1;
8991 bool IsAtomicReturn = false;
8992
8993 if (IsAtomic) {
8994 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8996 }
8997
8998 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8999 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9000
9001 // Add the register arguments
9002 if (Op.isReg()) {
9003 Op.addRegOperands(Inst, 1);
9004 // Insert a tied src for atomic return dst.
9005 // This cannot be postponed as subsequent calls to
9006 // addImmOperands rely on correct number of MC operands.
9007 if (IsAtomicReturn && i == FirstOperandIdx)
9008 Op.addRegOperands(Inst, 1);
9009 continue;
9010 }
9011
9012 // Handle the case where soffset is an immediate
9013 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9014 Op.addImmOperands(Inst, 1);
9015 continue;
9016 }
9017
9018 // Handle tokens like 'offen' which are sometimes hard-coded into the
9019 // asm string. There are no MCInst operands for these.
9020 if (Op.isToken()) {
9021 continue;
9022 }
9023 assert(Op.isImm());
9024
9025 // Handle optional arguments
9026 OptionalIdx[Op.getImmTy()] = i;
9027 }
9028
9029 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9030 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9031 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9032 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9034}
9035
9036//===----------------------------------------------------------------------===//
9037// smrd
9038//===----------------------------------------------------------------------===//
9039
9040bool AMDGPUOperand::isSMRDOffset8() const {
9041 return isImmLiteral() && isUInt<8>(getImm());
9042}
9043
9044bool AMDGPUOperand::isSMEMOffset() const {
9045 // Offset range is checked later by validator.
9046 return isImmLiteral();
9047}
9048
9049bool AMDGPUOperand::isSMRDLiteralOffset() const {
9050 // 32-bit literals are only supported on CI and we only want to use them
9051 // when the offset is > 8-bits.
9052 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9053}
9054
9055//===----------------------------------------------------------------------===//
9056// vop3
9057//===----------------------------------------------------------------------===//
9058
9059static bool ConvertOmodMul(int64_t &Mul) {
9060 if (Mul != 1 && Mul != 2 && Mul != 4)
9061 return false;
9062
9063 Mul >>= 1;
9064 return true;
9065}
9066
9067static bool ConvertOmodDiv(int64_t &Div) {
9068 if (Div == 1) {
9069 Div = 0;
9070 return true;
9071 }
9072
9073 if (Div == 2) {
9074 Div = 3;
9075 return true;
9076 }
9077
9078 return false;
9079}
9080
9081// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9082// This is intentional and ensures compatibility with sp3.
9083// See bug 35397 for details.
9084bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9085 if (BoundCtrl == 0 || BoundCtrl == 1) {
9086 if (!isGFX11Plus())
9087 BoundCtrl = 1;
9088 return true;
9089 }
9090 return false;
9091}
9092
9093void AMDGPUAsmParser::onBeginOfFile() {
9094 if (!getParser().getStreamer().getTargetStreamer() ||
9095 getSTI().getTargetTriple().getArch() == Triple::r600)
9096 return;
9097
9098 if (!getTargetStreamer().getTargetID())
9099 getTargetStreamer().initializeTargetID(getSTI(),
9100 getSTI().getFeatureString());
9101
9102 if (isHsaAbi(getSTI()))
9103 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9104}
9105
9106/// Parse AMDGPU specific expressions.
9107///
9108/// expr ::= or(expr, ...) |
9109/// max(expr, ...)
9110///
9111bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9112 using AGVK = AMDGPUMCExpr::VariantKind;
9113
9114 if (isToken(AsmToken::Identifier)) {
9115 StringRef TokenId = getTokenStr();
9116 AGVK VK = StringSwitch<AGVK>(TokenId)
9117 .Case("max", AGVK::AGVK_Max)
9118 .Case("or", AGVK::AGVK_Or)
9119 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9120 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9121 .Case("alignto", AGVK::AGVK_AlignTo)
9122 .Case("occupancy", AGVK::AGVK_Occupancy)
9123 .Default(AGVK::AGVK_None);
9124
9125 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9127 uint64_t CommaCount = 0;
9128 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9129 lex(); // Eat '('
9130 while (true) {
9131 if (trySkipToken(AsmToken::RParen)) {
9132 if (Exprs.empty()) {
9133 Error(getToken().getLoc(),
9134 "empty " + Twine(TokenId) + " expression");
9135 return true;
9136 }
9137 if (CommaCount + 1 != Exprs.size()) {
9138 Error(getToken().getLoc(),
9139 "mismatch of commas in " + Twine(TokenId) + " expression");
9140 return true;
9141 }
9142 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9143 return false;
9144 }
9145 const MCExpr *Expr;
9146 if (getParser().parseExpression(Expr, EndLoc))
9147 return true;
9148 Exprs.push_back(Expr);
9149 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9150 if (LastTokenWasComma)
9151 CommaCount++;
9152 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9153 Error(getToken().getLoc(),
9154 "unexpected token in " + Twine(TokenId) + " expression");
9155 return true;
9156 }
9157 }
9158 }
9159 }
9160 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9161}
9162
9163ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9164 StringRef Name = getTokenStr();
9165 if (Name == "mul") {
9166 return parseIntWithPrefix("mul", Operands,
9167 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9168 }
9169
9170 if (Name == "div") {
9171 return parseIntWithPrefix("div", Operands,
9172 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9173 }
9174
9175 return ParseStatus::NoMatch;
9176}
9177
9178// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9179// the number of src operands present, then copies that bit into src0_modifiers.
9180static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9181 int Opc = Inst.getOpcode();
9182 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9183 if (OpSelIdx == -1)
9184 return;
9185
9186 int SrcNum;
9187 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9188 AMDGPU::OpName::src2};
9189 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9190 ++SrcNum)
9191 ;
9192 assert(SrcNum > 0);
9193
9194 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9195
9196 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9197 if (DstIdx == -1)
9198 return;
9199
9200 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9201 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9202 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9203 if (DstOp.isReg() &&
9204 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9206 ModVal |= SISrcMods::DST_OP_SEL;
9207 } else {
9208 if ((OpSel & (1 << SrcNum)) != 0)
9209 ModVal |= SISrcMods::DST_OP_SEL;
9210 }
9211 Inst.getOperand(ModIdx).setImm(ModVal);
9212}
9213
9214void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9215 const OperandVector &Operands) {
9216 cvtVOP3P(Inst, Operands);
9217 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9218}
9219
9220void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9221 OptionalImmIndexMap &OptionalIdx) {
9222 cvtVOP3P(Inst, Operands, OptionalIdx);
9223 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9224}
9225
9226static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9227 return
9228 // 1. This operand is input modifiers
9229 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9230 // 2. This is not last operand
9231 && Desc.NumOperands > (OpNum + 1)
9232 // 3. Next operand is register class
9233 && Desc.operands()[OpNum + 1].RegClass != -1
9234 // 4. Next register is not tied to any other operand
9235 && Desc.getOperandConstraint(OpNum + 1,
9237}
9238
9239void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9240 unsigned Opc = Inst.getOpcode();
9241 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9242 AMDGPU::OpName::src2};
9243 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9244 AMDGPU::OpName::src1_modifiers,
9245 AMDGPU::OpName::src2_modifiers};
9246 for (int J = 0; J < 3; ++J) {
9247 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9248 if (OpIdx == -1)
9249 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9250 // no src1. So continue instead of break.
9251 continue;
9252
9253 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9254 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9255
9256 if ((OpSel & (1 << J)) != 0)
9257 ModVal |= SISrcMods::OP_SEL_0;
9258 // op_sel[3] is encoded in src0_modifiers.
9259 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9260 ModVal |= SISrcMods::DST_OP_SEL;
9261
9262 Inst.getOperand(ModIdx).setImm(ModVal);
9263 }
9264}
9265
9266void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9267{
9268 OptionalImmIndexMap OptionalIdx;
9269 unsigned Opc = Inst.getOpcode();
9270
9271 unsigned I = 1;
9272 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9273 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9274 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9275 }
9276
9277 for (unsigned E = Operands.size(); I != E; ++I) {
9278 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9280 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9281 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9282 Op.isInterpAttrChan()) {
9283 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9284 } else if (Op.isImmModifier()) {
9285 OptionalIdx[Op.getImmTy()] = I;
9286 } else {
9287 llvm_unreachable("unhandled operand type");
9288 }
9289 }
9290
9291 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9292 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9293 AMDGPUOperand::ImmTyHigh);
9294
9295 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9296 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9297 AMDGPUOperand::ImmTyClamp);
9298
9299 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9300 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9301 AMDGPUOperand::ImmTyOModSI);
9302
9303 // Some v_interp instructions use op_sel[3] for dst.
9304 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9305 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9306 AMDGPUOperand::ImmTyOpSel);
9307 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9308 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9309
9310 cvtOpSelHelper(Inst, OpSel);
9311 }
9312}
9313
9314void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9315{
9316 OptionalImmIndexMap OptionalIdx;
9317 unsigned Opc = Inst.getOpcode();
9318
9319 unsigned I = 1;
9320 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9321 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9322 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9323 }
9324
9325 for (unsigned E = Operands.size(); I != E; ++I) {
9326 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9328 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9329 } else if (Op.isImmModifier()) {
9330 OptionalIdx[Op.getImmTy()] = I;
9331 } else {
9332 llvm_unreachable("unhandled operand type");
9333 }
9334 }
9335
9336 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9337
9338 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9339 if (OpSelIdx != -1)
9340 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9341
9342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9343
9344 if (OpSelIdx == -1)
9345 return;
9346
9347 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9348 cvtOpSelHelper(Inst, OpSel);
9349}
9350
9351void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9352 const OperandVector &Operands) {
9353 OptionalImmIndexMap OptionalIdx;
9354 unsigned Opc = Inst.getOpcode();
9355 unsigned I = 1;
9356 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9357
9358 const MCInstrDesc &Desc = MII.get(Opc);
9359
9360 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9361 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9362
9363 for (unsigned E = Operands.size(); I != E; ++I) {
9364 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9365 int NumOperands = Inst.getNumOperands();
9366 // The order of operands in MCInst and parsed operands are different.
9367 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9368 // indices for parsing scale values correctly.
9369 if (NumOperands == CbszOpIdx) {
9372 }
9373 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9374 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9375 } else if (Op.isImmModifier()) {
9376 OptionalIdx[Op.getImmTy()] = I;
9377 } else {
9378 Op.addRegOrImmOperands(Inst, 1);
9379 }
9380 }
9381
9382 // Insert CBSZ and BLGP operands for F8F6F4 variants
9383 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9384 if (CbszIdx != OptionalIdx.end()) {
9385 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9386 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9387 }
9388
9389 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9390 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9391 if (BlgpIdx != OptionalIdx.end()) {
9392 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9393 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9394 }
9395
9396 // Add dummy src_modifiers
9399
9400 // Handle op_sel fields
9401
9402 unsigned OpSel = 0;
9403 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9404 if (OpselIdx != OptionalIdx.end()) {
9405 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9406 .getImm();
9407 }
9408
9409 unsigned OpSelHi = 0;
9410 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9411 if (OpselHiIdx != OptionalIdx.end()) {
9412 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9413 .getImm();
9414 }
9415 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9416 AMDGPU::OpName::src1_modifiers};
9417
9418 for (unsigned J = 0; J < 2; ++J) {
9419 unsigned ModVal = 0;
9420 if (OpSel & (1 << J))
9421 ModVal |= SISrcMods::OP_SEL_0;
9422 if (OpSelHi & (1 << J))
9423 ModVal |= SISrcMods::OP_SEL_1;
9424
9425 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9426 Inst.getOperand(ModIdx).setImm(ModVal);
9427 }
9428}
9429
9430void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9431 OptionalImmIndexMap &OptionalIdx) {
9432 unsigned Opc = Inst.getOpcode();
9433
9434 unsigned I = 1;
9435 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9436 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9437 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9438 }
9439
9440 for (unsigned E = Operands.size(); I != E; ++I) {
9441 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9443 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9444 } else if (Op.isImmModifier()) {
9445 OptionalIdx[Op.getImmTy()] = I;
9446 } else {
9447 Op.addRegOrImmOperands(Inst, 1);
9448 }
9449 }
9450
9451 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9452 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9453 AMDGPUOperand::ImmTyScaleSel);
9454
9455 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9456 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9457 AMDGPUOperand::ImmTyClamp);
9458
9459 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9460 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9461 Inst.addOperand(Inst.getOperand(0));
9462 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9463 AMDGPUOperand::ImmTyByteSel);
9464 }
9465
9466 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9467 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9468 AMDGPUOperand::ImmTyOModSI);
9469
9470 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9471 // it has src2 register operand that is tied to dst operand
9472 // we don't allow modifiers for this operand in assembler so src2_modifiers
9473 // should be 0.
9474 if (isMAC(Opc)) {
9475 auto *it = Inst.begin();
9476 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9477 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9478 ++it;
9479 // Copy the operand to ensure it's not invalidated when Inst grows.
9480 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9481 }
9482}
9483
9484void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9485 OptionalImmIndexMap OptionalIdx;
9486 cvtVOP3(Inst, Operands, OptionalIdx);
9487}
9488
9489void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9490 OptionalImmIndexMap &OptIdx) {
9491 const int Opc = Inst.getOpcode();
9492 const MCInstrDesc &Desc = MII.get(Opc);
9493
9494 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9495
9496 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9497 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9498 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9499 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9500 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9501 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9502 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9503 Inst.addOperand(Inst.getOperand(0));
9504 }
9505
9506 // Adding vdst_in operand is already covered for these DPP instructions in
9507 // cvtVOP3DPP.
9508 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9509 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9510 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9511 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9512 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9513 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9514 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9515 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9516 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9517 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9518 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9519 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9520 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9521 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9522 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9523 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9524 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9525 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9526 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9527 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9528 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9529 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9530 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9531 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9532 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9533 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9534 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9535 Inst.addOperand(Inst.getOperand(0));
9536 }
9537
9538 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9539 if (BitOp3Idx != -1) {
9540 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9541 }
9542
9543 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9544 // instruction, and then figure out where to actually put the modifiers
9545
9546 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9547 if (OpSelIdx != -1) {
9548 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9549 }
9550
9551 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9552 if (OpSelHiIdx != -1) {
9553 int DefaultVal = IsPacked ? -1 : 0;
9554 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9555 DefaultVal);
9556 }
9557
9558 int MatrixAFMTIdx =
9559 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9560 if (MatrixAFMTIdx != -1) {
9561 addOptionalImmOperand(Inst, Operands, OptIdx,
9562 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9563 }
9564
9565 int MatrixBFMTIdx =
9566 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9567 if (MatrixBFMTIdx != -1) {
9568 addOptionalImmOperand(Inst, Operands, OptIdx,
9569 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9570 }
9571
9572 int MatrixAScaleIdx =
9573 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9574 if (MatrixAScaleIdx != -1) {
9575 addOptionalImmOperand(Inst, Operands, OptIdx,
9576 AMDGPUOperand::ImmTyMatrixAScale, 0);
9577 }
9578
9579 int MatrixBScaleIdx =
9580 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9581 if (MatrixBScaleIdx != -1) {
9582 addOptionalImmOperand(Inst, Operands, OptIdx,
9583 AMDGPUOperand::ImmTyMatrixBScale, 0);
9584 }
9585
9586 int MatrixAScaleFmtIdx =
9587 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9588 if (MatrixAScaleFmtIdx != -1) {
9589 addOptionalImmOperand(Inst, Operands, OptIdx,
9590 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9591 }
9592
9593 int MatrixBScaleFmtIdx =
9594 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9595 if (MatrixBScaleFmtIdx != -1) {
9596 addOptionalImmOperand(Inst, Operands, OptIdx,
9597 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9598 }
9599
9600 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9601 addOptionalImmOperand(Inst, Operands, OptIdx,
9602 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9603
9604 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9605 addOptionalImmOperand(Inst, Operands, OptIdx,
9606 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9607
9608 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9609 if (NegLoIdx != -1)
9610 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9611
9612 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9613 if (NegHiIdx != -1)
9614 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9615
9616 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9617 AMDGPU::OpName::src2};
9618 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9619 AMDGPU::OpName::src1_modifiers,
9620 AMDGPU::OpName::src2_modifiers};
9621
9622 unsigned OpSel = 0;
9623 unsigned OpSelHi = 0;
9624 unsigned NegLo = 0;
9625 unsigned NegHi = 0;
9626
9627 if (OpSelIdx != -1)
9628 OpSel = Inst.getOperand(OpSelIdx).getImm();
9629
9630 if (OpSelHiIdx != -1)
9631 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9632
9633 if (NegLoIdx != -1)
9634 NegLo = Inst.getOperand(NegLoIdx).getImm();
9635
9636 if (NegHiIdx != -1)
9637 NegHi = Inst.getOperand(NegHiIdx).getImm();
9638
9639 for (int J = 0; J < 3; ++J) {
9640 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9641 if (OpIdx == -1)
9642 break;
9643
9644 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9645
9646 if (ModIdx == -1)
9647 continue;
9648
9649 uint32_t ModVal = 0;
9650
9651 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9652 if (SrcOp.isReg() && getMRI()
9653 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9654 .contains(SrcOp.getReg())) {
9655 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9656 if (VGPRSuffixIsHi)
9657 ModVal |= SISrcMods::OP_SEL_0;
9658 } else {
9659 if ((OpSel & (1 << J)) != 0)
9660 ModVal |= SISrcMods::OP_SEL_0;
9661 }
9662
9663 if ((OpSelHi & (1 << J)) != 0)
9664 ModVal |= SISrcMods::OP_SEL_1;
9665
9666 if ((NegLo & (1 << J)) != 0)
9667 ModVal |= SISrcMods::NEG;
9668
9669 if ((NegHi & (1 << J)) != 0)
9670 ModVal |= SISrcMods::NEG_HI;
9671
9672 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9673 }
9674}
9675
9676void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9677 OptionalImmIndexMap OptIdx;
9678 cvtVOP3(Inst, Operands, OptIdx);
9679 cvtVOP3P(Inst, Operands, OptIdx);
9680}
9681
9683 unsigned i, unsigned Opc,
9684 AMDGPU::OpName OpName) {
9685 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9686 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9687 else
9688 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9689}
9690
9691void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9692 unsigned Opc = Inst.getOpcode();
9693
9694 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9695 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9696 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9697 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9698 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9699
9700 OptionalImmIndexMap OptIdx;
9701 for (unsigned i = 5; i < Operands.size(); ++i) {
9702 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9703 OptIdx[Op.getImmTy()] = i;
9704 }
9705
9706 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9707 addOptionalImmOperand(Inst, Operands, OptIdx,
9708 AMDGPUOperand::ImmTyIndexKey8bit);
9709
9710 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9711 addOptionalImmOperand(Inst, Operands, OptIdx,
9712 AMDGPUOperand::ImmTyIndexKey16bit);
9713
9714 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9715 addOptionalImmOperand(Inst, Operands, OptIdx,
9716 AMDGPUOperand::ImmTyIndexKey32bit);
9717
9718 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9719 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9720
9721 cvtVOP3P(Inst, Operands, OptIdx);
9722}
9723
9724//===----------------------------------------------------------------------===//
9725// VOPD
9726//===----------------------------------------------------------------------===//
9727
9728ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9729 if (!hasVOPD(getSTI()))
9730 return ParseStatus::NoMatch;
9731
9732 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9733 SMLoc S = getLoc();
9734 lex();
9735 lex();
9736 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9737 SMLoc OpYLoc = getLoc();
9738 StringRef OpYName;
9739 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9740 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9741 return ParseStatus::Success;
9742 }
9743 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9744 }
9745 return ParseStatus::NoMatch;
9746}
9747
9748// Create VOPD MCInst operands using parsed assembler operands.
9749void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9750 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9751
9752 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9753 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9755 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9756 return;
9757 }
9758 if (Op.isReg()) {
9759 Op.addRegOperands(Inst, 1);
9760 return;
9761 }
9762 if (Op.isImm()) {
9763 Op.addImmOperands(Inst, 1);
9764 return;
9765 }
9766 llvm_unreachable("Unhandled operand type in cvtVOPD");
9767 };
9768
9769 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9770
9771 // MCInst operands are ordered as follows:
9772 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9773
9774 for (auto CompIdx : VOPD::COMPONENTS) {
9775 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9776 }
9777
9778 for (auto CompIdx : VOPD::COMPONENTS) {
9779 const auto &CInfo = InstInfo[CompIdx];
9780 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9781 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9782 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9783 if (CInfo.hasSrc2Acc())
9784 addOp(CInfo.getIndexOfDstInParsedOperands());
9785 }
9786
9787 int BitOp3Idx =
9788 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9789 if (BitOp3Idx != -1) {
9790 OptionalImmIndexMap OptIdx;
9791 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9792 if (Op.isImm())
9793 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9794
9795 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9796 }
9797}
9798
9799//===----------------------------------------------------------------------===//
9800// dpp
9801//===----------------------------------------------------------------------===//
9802
9803bool AMDGPUOperand::isDPP8() const {
9804 return isImmTy(ImmTyDPP8);
9805}
9806
9807bool AMDGPUOperand::isDPPCtrl() const {
9808 using namespace AMDGPU::DPP;
9809
9810 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9811 if (result) {
9812 int64_t Imm = getImm();
9813 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9814 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9815 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9816 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9817 (Imm == DppCtrl::WAVE_SHL1) ||
9818 (Imm == DppCtrl::WAVE_ROL1) ||
9819 (Imm == DppCtrl::WAVE_SHR1) ||
9820 (Imm == DppCtrl::WAVE_ROR1) ||
9821 (Imm == DppCtrl::ROW_MIRROR) ||
9822 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9823 (Imm == DppCtrl::BCAST15) ||
9824 (Imm == DppCtrl::BCAST31) ||
9825 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9826 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9827 }
9828 return false;
9829}
9830
9831//===----------------------------------------------------------------------===//
9832// mAI
9833//===----------------------------------------------------------------------===//
9834
9835bool AMDGPUOperand::isBLGP() const {
9836 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9837}
9838
9839bool AMDGPUOperand::isS16Imm() const {
9840 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9841}
9842
9843bool AMDGPUOperand::isU16Imm() const {
9844 return isImmLiteral() && isUInt<16>(getImm());
9845}
9846
9847//===----------------------------------------------------------------------===//
9848// dim
9849//===----------------------------------------------------------------------===//
9850
9851bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9852 // We want to allow "dim:1D" etc.,
9853 // but the initial 1 is tokenized as an integer.
9854 std::string Token;
9855 if (isToken(AsmToken::Integer)) {
9856 SMLoc Loc = getToken().getEndLoc();
9857 Token = std::string(getTokenStr());
9858 lex();
9859 if (getLoc() != Loc)
9860 return false;
9861 }
9862
9863 StringRef Suffix;
9864 if (!parseId(Suffix))
9865 return false;
9866 Token += Suffix;
9867
9868 StringRef DimId = Token;
9869 DimId.consume_front("SQ_RSRC_IMG_");
9870
9871 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9872 if (!DimInfo)
9873 return false;
9874
9875 Encoding = DimInfo->Encoding;
9876 return true;
9877}
9878
9879ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9880 if (!isGFX10Plus())
9881 return ParseStatus::NoMatch;
9882
9883 SMLoc S = getLoc();
9884
9885 if (!trySkipId("dim", AsmToken::Colon))
9886 return ParseStatus::NoMatch;
9887
9888 unsigned Encoding;
9889 SMLoc Loc = getLoc();
9890 if (!parseDimId(Encoding))
9891 return Error(Loc, "invalid dim value");
9892
9893 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9894 AMDGPUOperand::ImmTyDim));
9895 return ParseStatus::Success;
9896}
9897
9898//===----------------------------------------------------------------------===//
9899// dpp
9900//===----------------------------------------------------------------------===//
9901
9902ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9903 SMLoc S = getLoc();
9904
9905 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9906 return ParseStatus::NoMatch;
9907
9908 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9909
9910 int64_t Sels[8];
9911
9912 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9913 return ParseStatus::Failure;
9914
9915 for (size_t i = 0; i < 8; ++i) {
9916 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9917 return ParseStatus::Failure;
9918
9919 SMLoc Loc = getLoc();
9920 if (getParser().parseAbsoluteExpression(Sels[i]))
9921 return ParseStatus::Failure;
9922 if (0 > Sels[i] || 7 < Sels[i])
9923 return Error(Loc, "expected a 3-bit value");
9924 }
9925
9926 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9927 return ParseStatus::Failure;
9928
9929 unsigned DPP8 = 0;
9930 for (size_t i = 0; i < 8; ++i)
9931 DPP8 |= (Sels[i] << (i * 3));
9932
9933 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9934 return ParseStatus::Success;
9935}
9936
9937bool
9938AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9939 const OperandVector &Operands) {
9940 if (Ctrl == "row_newbcast")
9941 return isGFX90A();
9942
9943 if (Ctrl == "row_share" ||
9944 Ctrl == "row_xmask")
9945 return isGFX10Plus();
9946
9947 if (Ctrl == "wave_shl" ||
9948 Ctrl == "wave_shr" ||
9949 Ctrl == "wave_rol" ||
9950 Ctrl == "wave_ror" ||
9951 Ctrl == "row_bcast")
9952 return isVI() || isGFX9();
9953
9954 return Ctrl == "row_mirror" ||
9955 Ctrl == "row_half_mirror" ||
9956 Ctrl == "quad_perm" ||
9957 Ctrl == "row_shl" ||
9958 Ctrl == "row_shr" ||
9959 Ctrl == "row_ror";
9960}
9961
9962int64_t
9963AMDGPUAsmParser::parseDPPCtrlPerm() {
9964 // quad_perm:[%d,%d,%d,%d]
9965
9966 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9967 return -1;
9968
9969 int64_t Val = 0;
9970 for (int i = 0; i < 4; ++i) {
9971 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9972 return -1;
9973
9974 int64_t Temp;
9975 SMLoc Loc = getLoc();
9976 if (getParser().parseAbsoluteExpression(Temp))
9977 return -1;
9978 if (Temp < 0 || Temp > 3) {
9979 Error(Loc, "expected a 2-bit value");
9980 return -1;
9981 }
9982
9983 Val += (Temp << i * 2);
9984 }
9985
9986 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9987 return -1;
9988
9989 return Val;
9990}
9991
9992int64_t
9993AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9994 using namespace AMDGPU::DPP;
9995
9996 // sel:%d
9997
9998 int64_t Val;
9999 SMLoc Loc = getLoc();
10000
10001 if (getParser().parseAbsoluteExpression(Val))
10002 return -1;
10003
10004 struct DppCtrlCheck {
10005 int64_t Ctrl;
10006 int Lo;
10007 int Hi;
10008 };
10009
10010 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10011 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10012 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10013 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10014 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10015 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10016 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10017 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10018 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10019 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10020 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10021 .Default({-1, 0, 0});
10022
10023 bool Valid;
10024 if (Check.Ctrl == -1) {
10025 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10026 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10027 } else {
10028 Valid = Check.Lo <= Val && Val <= Check.Hi;
10029 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10030 }
10031
10032 if (!Valid) {
10033 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10034 return -1;
10035 }
10036
10037 return Val;
10038}
10039
10040ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10041 using namespace AMDGPU::DPP;
10042
10043 if (!isToken(AsmToken::Identifier) ||
10044 !isSupportedDPPCtrl(getTokenStr(), Operands))
10045 return ParseStatus::NoMatch;
10046
10047 SMLoc S = getLoc();
10048 int64_t Val = -1;
10049 StringRef Ctrl;
10050
10051 parseId(Ctrl);
10052
10053 if (Ctrl == "row_mirror") {
10054 Val = DppCtrl::ROW_MIRROR;
10055 } else if (Ctrl == "row_half_mirror") {
10056 Val = DppCtrl::ROW_HALF_MIRROR;
10057 } else {
10058 if (skipToken(AsmToken::Colon, "expected a colon")) {
10059 if (Ctrl == "quad_perm") {
10060 Val = parseDPPCtrlPerm();
10061 } else {
10062 Val = parseDPPCtrlSel(Ctrl);
10063 }
10064 }
10065 }
10066
10067 if (Val == -1)
10068 return ParseStatus::Failure;
10069
10070 Operands.push_back(
10071 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10072 return ParseStatus::Success;
10073}
10074
10075void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10076 bool IsDPP8) {
10077 OptionalImmIndexMap OptionalIdx;
10078 unsigned Opc = Inst.getOpcode();
10079 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10080
10081 // MAC instructions are special because they have 'old'
10082 // operand which is not tied to dst (but assumed to be).
10083 // They also have dummy unused src2_modifiers.
10084 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10085 int Src2ModIdx =
10086 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10087 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10088 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10089
10090 unsigned I = 1;
10091 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10092 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10093 }
10094
10095 int Fi = 0;
10096 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10097 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10098 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10099 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10100 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10101
10102 for (unsigned E = Operands.size(); I != E; ++I) {
10103
10104 if (IsMAC) {
10105 int NumOperands = Inst.getNumOperands();
10106 if (OldIdx == NumOperands) {
10107 // Handle old operand
10108 constexpr int DST_IDX = 0;
10109 Inst.addOperand(Inst.getOperand(DST_IDX));
10110 } else if (Src2ModIdx == NumOperands) {
10111 // Add unused dummy src2_modifiers
10113 }
10114 }
10115
10116 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10117 Inst.addOperand(Inst.getOperand(0));
10118 }
10119
10120 if (IsVOP3CvtSrDpp) {
10121 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10123 Inst.addOperand(MCOperand::createReg(MCRegister()));
10124 }
10125 }
10126
10127 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10129 if (TiedTo != -1) {
10130 assert((unsigned)TiedTo < Inst.getNumOperands());
10131 // handle tied old or src2 for MAC instructions
10132 Inst.addOperand(Inst.getOperand(TiedTo));
10133 }
10134 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10135 // Add the register arguments
10136 if (IsDPP8 && Op.isDppFI()) {
10137 Fi = Op.getImm();
10138 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10139 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10140 } else if (Op.isReg()) {
10141 Op.addRegOperands(Inst, 1);
10142 } else if (Op.isImm() &&
10143 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10144 Op.addImmOperands(Inst, 1);
10145 } else if (Op.isImm()) {
10146 OptionalIdx[Op.getImmTy()] = I;
10147 } else {
10148 llvm_unreachable("unhandled operand type");
10149 }
10150 }
10151
10152 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10153 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10154 AMDGPUOperand::ImmTyClamp);
10155
10156 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10157 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10158 Inst.addOperand(Inst.getOperand(0));
10159 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10160 AMDGPUOperand::ImmTyByteSel);
10161 }
10162
10163 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10164 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10165
10166 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10167 cvtVOP3P(Inst, Operands, OptionalIdx);
10168 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10169 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10170 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10171 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10172 }
10173
10174 if (IsDPP8) {
10175 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10176 using namespace llvm::AMDGPU::DPP;
10177 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10178 } else {
10179 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10180 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10182 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10183
10184 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10185 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10186 AMDGPUOperand::ImmTyDppFI);
10187 }
10188}
10189
10190void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10191 OptionalImmIndexMap OptionalIdx;
10192
10193 unsigned I = 1;
10194 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10195 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10196 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10197 }
10198
10199 int Fi = 0;
10200 for (unsigned E = Operands.size(); I != E; ++I) {
10201 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10203 if (TiedTo != -1) {
10204 assert((unsigned)TiedTo < Inst.getNumOperands());
10205 // handle tied old or src2 for MAC instructions
10206 Inst.addOperand(Inst.getOperand(TiedTo));
10207 }
10208 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10209 // Add the register arguments
10210 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10211 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10212 // Skip it.
10213 continue;
10214 }
10215
10216 if (IsDPP8) {
10217 if (Op.isDPP8()) {
10218 Op.addImmOperands(Inst, 1);
10219 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10220 Op.addRegWithFPInputModsOperands(Inst, 2);
10221 } else if (Op.isDppFI()) {
10222 Fi = Op.getImm();
10223 } else if (Op.isReg()) {
10224 Op.addRegOperands(Inst, 1);
10225 } else {
10226 llvm_unreachable("Invalid operand type");
10227 }
10228 } else {
10230 Op.addRegWithFPInputModsOperands(Inst, 2);
10231 } else if (Op.isReg()) {
10232 Op.addRegOperands(Inst, 1);
10233 } else if (Op.isDPPCtrl()) {
10234 Op.addImmOperands(Inst, 1);
10235 } else if (Op.isImm()) {
10236 // Handle optional arguments
10237 OptionalIdx[Op.getImmTy()] = I;
10238 } else {
10239 llvm_unreachable("Invalid operand type");
10240 }
10241 }
10242 }
10243
10244 if (IsDPP8) {
10245 using namespace llvm::AMDGPU::DPP;
10246 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10247 } else {
10248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10250 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10251 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10252 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10253 AMDGPUOperand::ImmTyDppFI);
10254 }
10255 }
10256}
10257
10258//===----------------------------------------------------------------------===//
10259// sdwa
10260//===----------------------------------------------------------------------===//
10261
10262ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10263 StringRef Prefix,
10264 AMDGPUOperand::ImmTy Type) {
10265 return parseStringOrIntWithPrefix(
10266 Operands, Prefix,
10267 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10268 Type);
10269}
10270
10271ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10272 return parseStringOrIntWithPrefix(
10273 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10274 AMDGPUOperand::ImmTySDWADstUnused);
10275}
10276
10277void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10278 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10279}
10280
10281void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10282 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10283}
10284
10285void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10286 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10287}
10288
10289void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10290 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10291}
10292
10293void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10294 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10295}
10296
10297void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10298 uint64_t BasicInstType,
10299 bool SkipDstVcc,
10300 bool SkipSrcVcc) {
10301 using namespace llvm::AMDGPU::SDWA;
10302
10303 OptionalImmIndexMap OptionalIdx;
10304 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10305 bool SkippedVcc = false;
10306
10307 unsigned I = 1;
10308 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10309 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10310 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10311 }
10312
10313 for (unsigned E = Operands.size(); I != E; ++I) {
10314 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10315 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10316 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10317 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10318 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10319 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10320 // Skip VCC only if we didn't skip it on previous iteration.
10321 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10322 if (BasicInstType == SIInstrFlags::VOP2 &&
10323 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10324 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10325 SkippedVcc = true;
10326 continue;
10327 }
10328 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10329 SkippedVcc = true;
10330 continue;
10331 }
10332 }
10334 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10335 } else if (Op.isImm()) {
10336 // Handle optional arguments
10337 OptionalIdx[Op.getImmTy()] = I;
10338 } else {
10339 llvm_unreachable("Invalid operand type");
10340 }
10341 SkippedVcc = false;
10342 }
10343
10344 const unsigned Opc = Inst.getOpcode();
10345 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10346 Opc != AMDGPU::V_NOP_sdwa_vi) {
10347 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10348 switch (BasicInstType) {
10349 case SIInstrFlags::VOP1:
10350 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10351 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10352 AMDGPUOperand::ImmTyClamp, 0);
10353
10354 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10355 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10356 AMDGPUOperand::ImmTyOModSI, 0);
10357
10358 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10359 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10360 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10361
10362 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10363 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10364 AMDGPUOperand::ImmTySDWADstUnused,
10365 DstUnused::UNUSED_PRESERVE);
10366
10367 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10368 break;
10369
10370 case SIInstrFlags::VOP2:
10371 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10372 AMDGPUOperand::ImmTyClamp, 0);
10373
10374 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10376
10377 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10378 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10380 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10381 break;
10382
10383 case SIInstrFlags::VOPC:
10384 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10385 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10386 AMDGPUOperand::ImmTyClamp, 0);
10387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10388 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10389 break;
10390
10391 default:
10392 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10393 }
10394 }
10395
10396 // special case v_mac_{f16, f32}:
10397 // it has src2 register operand that is tied to dst operand
10398 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10399 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10400 auto *it = Inst.begin();
10401 std::advance(
10402 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10403 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10404 }
10405}
10406
10407/// Force static initialization.
10408extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10413
10414#define GET_MATCHER_IMPLEMENTATION
10415#define GET_MNEMONIC_SPELL_CHECKER
10416#define GET_MNEMONIC_CHECKER
10417#include "AMDGPUGenAsmMatcher.inc"
10418
10419ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10420 unsigned MCK) {
10421 switch (MCK) {
10422 case MCK_addr64:
10423 return parseTokenOp("addr64", Operands);
10424 case MCK_done:
10425 return parseTokenOp("done", Operands);
10426 case MCK_idxen:
10427 return parseTokenOp("idxen", Operands);
10428 case MCK_lds:
10429 return parseTokenOp("lds", Operands);
10430 case MCK_offen:
10431 return parseTokenOp("offen", Operands);
10432 case MCK_off:
10433 return parseTokenOp("off", Operands);
10434 case MCK_row_95_en:
10435 return parseTokenOp("row_en", Operands);
10436 case MCK_gds:
10437 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10438 case MCK_tfe:
10439 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10440 }
10441 return tryCustomParseOperand(Operands, MCK);
10442}
10443
10444// This function should be defined after auto-generated include so that we have
10445// MatchClassKind enum defined
10446unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10447 unsigned Kind) {
10448 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10449 // But MatchInstructionImpl() expects to meet token and fails to validate
10450 // operand. This method checks if we are given immediate operand but expect to
10451 // get corresponding token.
10452 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10453 switch (Kind) {
10454 case MCK_addr64:
10455 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10456 case MCK_gds:
10457 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10458 case MCK_lds:
10459 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10460 case MCK_idxen:
10461 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10462 case MCK_offen:
10463 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10464 case MCK_tfe:
10465 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10466 case MCK_SSrc_b32:
10467 // When operands have expression values, they will return true for isToken,
10468 // because it is not possible to distinguish between a token and an
10469 // expression at parse time. MatchInstructionImpl() will always try to
10470 // match an operand as a token, when isToken returns true, and when the
10471 // name of the expression is not a valid token, the match will fail,
10472 // so we need to handle it here.
10473 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10474 case MCK_SSrc_f32:
10475 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10476 case MCK_SOPPBrTarget:
10477 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10478 case MCK_VReg32OrOff:
10479 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10480 case MCK_InterpSlot:
10481 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10482 case MCK_InterpAttr:
10483 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10484 case MCK_InterpAttrChan:
10485 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10486 case MCK_SReg_64:
10487 case MCK_SReg_64_XEXEC:
10488 // Null is defined as a 32-bit register but
10489 // it should also be enabled with 64-bit operands or larger.
10490 // The following code enables it for SReg_64 and larger operands
10491 // used as source and destination. Remaining source
10492 // operands are handled in isInlinableImm.
10493 case MCK_SReg_96:
10494 case MCK_SReg_128:
10495 case MCK_SReg_256:
10496 case MCK_SReg_512:
10497 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10498 default:
10499 return Match_InvalidOperand;
10500 }
10501}
10502
10503//===----------------------------------------------------------------------===//
10504// endpgm
10505//===----------------------------------------------------------------------===//
10506
10507ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10508 SMLoc S = getLoc();
10509 int64_t Imm = 0;
10510
10511 if (!parseExpr(Imm)) {
10512 // The operand is optional, if not present default to 0
10513 Imm = 0;
10514 }
10515
10516 if (!isUInt<16>(Imm))
10517 return Error(S, "expected a 16-bit value");
10518
10519 Operands.push_back(
10520 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10521 return ParseStatus::Success;
10522}
10523
10524bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10525
10526//===----------------------------------------------------------------------===//
10527// Split Barrier
10528//===----------------------------------------------------------------------===//
10529
10530bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
static bool Enabled
Definition Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:132
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:95
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1430
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:62
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...