LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyIsAsync,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyDone,
148 ImmTyRowEn,
149 ImmTyFORMAT,
150 ImmTyHwreg,
151 ImmTyOff,
152 ImmTySendMsg,
153 ImmTyWaitEvent,
154 ImmTyInterpSlot,
155 ImmTyInterpAttr,
156 ImmTyInterpAttrChan,
157 ImmTyOpSel,
158 ImmTyOpSelHi,
159 ImmTyNegLo,
160 ImmTyNegHi,
161 ImmTyIndexKey8bit,
162 ImmTyIndexKey16bit,
163 ImmTyIndexKey32bit,
164 ImmTyDPP8,
165 ImmTyDppCtrl,
166 ImmTyDppRowMask,
167 ImmTyDppBankMask,
168 ImmTyDppBoundCtrl,
169 ImmTyDppFI,
170 ImmTySwizzle,
171 ImmTyGprIdxMode,
172 ImmTyHigh,
173 ImmTyBLGP,
174 ImmTyCBSZ,
175 ImmTyABID,
176 ImmTyEndpgm,
177 ImmTyWaitVDST,
178 ImmTyWaitEXP,
179 ImmTyWaitVAVDst,
180 ImmTyWaitVMVSrc,
181 ImmTyBitOp3,
182 ImmTyMatrixAFMT,
183 ImmTyMatrixBFMT,
184 ImmTyMatrixAScale,
185 ImmTyMatrixBScale,
186 ImmTyMatrixAScaleFmt,
187 ImmTyMatrixBScaleFmt,
188 ImmTyMatrixAReuse,
189 ImmTyMatrixBReuse,
190 ImmTyScaleSel,
191 ImmTyByteSel,
192 };
193
194private:
195 struct TokOp {
196 const char *Data;
197 unsigned Length;
198 };
199
200 struct ImmOp {
201 int64_t Val;
202 ImmTy Type;
203 bool IsFPImm;
204 Modifiers Mods;
205 };
206
207 struct RegOp {
208 MCRegister RegNo;
209 Modifiers Mods;
210 };
211
212 union {
213 TokOp Tok;
214 ImmOp Imm;
215 RegOp Reg;
216 const MCExpr *Expr;
217 };
218
219 // The index of the associated MCInst operand.
220 mutable int MCOpIdx = -1;
221
222public:
223 bool isToken() const override { return Kind == Token; }
224
225 bool isSymbolRefExpr() const {
226 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
227 }
228
229 bool isImm() const override {
230 return Kind == Immediate;
231 }
232
233 bool isInlinableImm(MVT type) const;
234 bool isLiteralImm(MVT type) const;
235
236 bool isRegKind() const {
237 return Kind == Register;
238 }
239
240 bool isReg() const override {
241 return isRegKind() && !hasModifiers();
242 }
243
244 bool isRegOrInline(unsigned RCID, MVT type) const {
245 return isRegClass(RCID) || isInlinableImm(type);
246 }
247
248 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
249 return isRegOrInline(RCID, type) || isLiteralImm(type);
250 }
251
252 bool isRegOrImmWithInt16InputMods() const {
253 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
254 }
255
256 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
258 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
259 }
260
261 bool isRegOrImmWithInt32InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
263 }
264
265 bool isRegOrInlineImmWithInt16InputMods() const {
266 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
267 }
268
269 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
270 return isRegOrInline(
271 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
272 }
273
274 bool isRegOrInlineImmWithInt32InputMods() const {
275 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
276 }
277
278 bool isRegOrImmWithInt64InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
280 }
281
282 bool isRegOrImmWithFP16InputMods() const {
283 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
284 }
285
286 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
288 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
289 }
290
291 bool isRegOrImmWithFP32InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
293 }
294
295 bool isRegOrImmWithFP64InputMods() const {
296 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
297 }
298
299 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
300 return isRegOrInline(
301 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
302 }
303
304 bool isRegOrInlineImmWithFP32InputMods() const {
305 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
306 }
307
308 bool isRegOrInlineImmWithFP64InputMods() const {
309 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
310 }
311
312 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
313
314 bool isVRegWithFP32InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
316 }
317
318 bool isVRegWithFP64InputMods() const {
319 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
320 }
321
322 bool isPackedFP16InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
324 }
325
326 bool isPackedVGPRFP32InputMods() const {
327 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
328 }
329
330 bool isVReg() const {
331 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
332 isRegClass(AMDGPU::VReg_64RegClassID) ||
333 isRegClass(AMDGPU::VReg_96RegClassID) ||
334 isRegClass(AMDGPU::VReg_128RegClassID) ||
335 isRegClass(AMDGPU::VReg_160RegClassID) ||
336 isRegClass(AMDGPU::VReg_192RegClassID) ||
337 isRegClass(AMDGPU::VReg_256RegClassID) ||
338 isRegClass(AMDGPU::VReg_512RegClassID) ||
339 isRegClass(AMDGPU::VReg_1024RegClassID);
340 }
341
342 bool isVReg32() const {
343 return isRegClass(AMDGPU::VGPR_32RegClassID);
344 }
345
346 bool isVReg32OrOff() const {
347 return isOff() || isVReg32();
348 }
349
350 bool isNull() const {
351 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
352 }
353
354 bool isAV_LdSt_32_Align2_RegOp() const {
355 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
356 isRegClass(AMDGPU::AGPR_32RegClassID);
357 }
358
359 bool isVRegWithInputMods() const;
360 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
361 template <bool IsFake16> bool isT16VRegWithInputMods() const;
362
363 bool isSDWAOperand(MVT type) const;
364 bool isSDWAFP16Operand() const;
365 bool isSDWAFP32Operand() const;
366 bool isSDWAInt16Operand() const;
367 bool isSDWAInt32Operand() const;
368
369 bool isImmTy(ImmTy ImmT) const {
370 return isImm() && Imm.Type == ImmT;
371 }
372
373 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
374
375 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
376
377 bool isImmModifier() const {
378 return isImm() && Imm.Type != ImmTyNone;
379 }
380
381 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
382 bool isDim() const { return isImmTy(ImmTyDim); }
383 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
384 bool isOff() const { return isImmTy(ImmTyOff); }
385 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
386 bool isOffen() const { return isImmTy(ImmTyOffen); }
387 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
388 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
389 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
390 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
391 bool isGDS() const { return isImmTy(ImmTyGDS); }
392 bool isLDS() const { return isImmTy(ImmTyLDS); }
393 bool isCPol() const { return isImmTy(ImmTyCPol); }
394 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
395 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
396 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
397 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
398 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
399 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
400 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
401 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
402 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
403 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
404 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
405 bool isTFE() const { return isImmTy(ImmTyTFE); }
406 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
407 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
408 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
409 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
410 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
411 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
412 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
413 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
414 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
415 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
416 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
417 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
418 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
419 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
420 bool isDone() const { return isImmTy(ImmTyDone); }
421 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
422
423 bool isRegOrImm() const {
424 return isReg() || isImm();
425 }
426
427 bool isRegClass(unsigned RCID) const;
428
429 bool isInlineValue() const;
430
431 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
432 return isRegOrInline(RCID, type) && !hasModifiers();
433 }
434
435 bool isSCSrcB16() const {
436 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
437 }
438
439 bool isSCSrcV2B16() const {
440 return isSCSrcB16();
441 }
442
443 bool isSCSrc_b32() const {
444 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
445 }
446
447 bool isSCSrc_b64() const {
448 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
449 }
450
451 bool isBoolReg() const;
452
453 bool isSCSrcF16() const {
454 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
455 }
456
457 bool isSCSrcV2F16() const {
458 return isSCSrcF16();
459 }
460
461 bool isSCSrcF32() const {
462 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
463 }
464
465 bool isSCSrcF64() const {
466 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
467 }
468
469 bool isSSrc_b32() const {
470 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
471 }
472
473 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
474
475 bool isSSrcV2B16() const {
476 llvm_unreachable("cannot happen");
477 return isSSrc_b16();
478 }
479
480 bool isSSrc_b64() const {
481 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
482 // See isVSrc64().
483 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
484 (((const MCTargetAsmParser *)AsmParser)
485 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
486 isExpr());
487 }
488
489 bool isSSrc_f32() const {
490 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
491 }
492
493 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
494
495 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
496
497 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
498
499 bool isSSrcV2F16() const {
500 llvm_unreachable("cannot happen");
501 return isSSrc_f16();
502 }
503
504 bool isSSrcV2FP32() const {
505 llvm_unreachable("cannot happen");
506 return isSSrc_f32();
507 }
508
509 bool isSCSrcV2FP32() const {
510 llvm_unreachable("cannot happen");
511 return isSCSrcF32();
512 }
513
514 bool isSSrcV2INT32() const {
515 llvm_unreachable("cannot happen");
516 return isSSrc_b32();
517 }
518
519 bool isSCSrcV2INT32() const {
520 llvm_unreachable("cannot happen");
521 return isSCSrc_b32();
522 }
523
524 bool isSSrcOrLds_b32() const {
525 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
526 isLiteralImm(MVT::i32) || isExpr();
527 }
528
529 bool isVCSrc_b32() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
531 }
532
533 bool isVCSrc_b32_Lo256() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
535 }
536
537 bool isVCSrc_b64_Lo256() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
539 }
540
541 bool isVCSrc_b64() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
543 }
544
545 bool isVCSrcT_b16() const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
547 }
548
549 bool isVCSrcTB16_Lo128() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
551 }
552
553 bool isVCSrcFake16B16_Lo128() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
555 }
556
557 bool isVCSrc_b16() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
559 }
560
561 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
562
563 bool isVCSrc_f32() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
565 }
566
567 bool isVCSrc_f64() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
569 }
570
571 bool isVCSrcTBF16() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
573 }
574
575 bool isVCSrcT_f16() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
577 }
578
579 bool isVCSrcT_bf16() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
581 }
582
583 bool isVCSrcTBF16_Lo128() const {
584 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
585 }
586
587 bool isVCSrcTF16_Lo128() const {
588 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
589 }
590
591 bool isVCSrcFake16BF16_Lo128() const {
592 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
593 }
594
595 bool isVCSrcFake16F16_Lo128() const {
596 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
597 }
598
599 bool isVCSrc_bf16() const {
600 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
601 }
602
603 bool isVCSrc_f16() const {
604 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
605 }
606
607 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
608
609 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
610
611 bool isVSrc_b32() const {
612 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
613 }
614
615 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
616
617 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
618
619 bool isVSrcT_b16_Lo128() const {
620 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
621 }
622
623 bool isVSrcFake16_b16_Lo128() const {
624 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
625 }
626
627 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
628
629 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
630
631 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
632
633 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
634
635 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
636
637 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
638
639 bool isVSrc_f32() const {
640 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
641 }
642
643 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
644
645 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
646
647 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
648
649 bool isVSrcT_bf16_Lo128() const {
650 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
651 }
652
653 bool isVSrcT_f16_Lo128() const {
654 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
655 }
656
657 bool isVSrcFake16_bf16_Lo128() const {
658 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
659 }
660
661 bool isVSrcFake16_f16_Lo128() const {
662 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
663 }
664
665 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
666
667 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
668
669 bool isVSrc_v2bf16() const {
670 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
671 }
672
673 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
674
675 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
676
677 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
678
679 bool isVISrcB32() const {
680 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
681 }
682
683 bool isVISrcB16() const {
684 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
685 }
686
687 bool isVISrcV2B16() const {
688 return isVISrcB16();
689 }
690
691 bool isVISrcF32() const {
692 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
693 }
694
695 bool isVISrcF16() const {
696 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
697 }
698
699 bool isVISrcV2F16() const {
700 return isVISrcF16() || isVISrcB32();
701 }
702
703 bool isVISrc_64_bf16() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
705 }
706
707 bool isVISrc_64_f16() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
709 }
710
711 bool isVISrc_64_b32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
713 }
714
715 bool isVISrc_64B64() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
717 }
718
719 bool isVISrc_64_f64() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
721 }
722
723 bool isVISrc_64V2FP32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
725 }
726
727 bool isVISrc_64V2INT32() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
729 }
730
731 bool isVISrc_256_b32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
733 }
734
735 bool isVISrc_256_f32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
737 }
738
739 bool isVISrc_256B64() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
741 }
742
743 bool isVISrc_256_f64() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
745 }
746
747 bool isVISrc_512_f64() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
749 }
750
751 bool isVISrc_128B16() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
753 }
754
755 bool isVISrc_128V2B16() const {
756 return isVISrc_128B16();
757 }
758
759 bool isVISrc_128_b32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
761 }
762
763 bool isVISrc_128_f32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
765 }
766
767 bool isVISrc_256V2FP32() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
769 }
770
771 bool isVISrc_256V2INT32() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
773 }
774
775 bool isVISrc_512_b32() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
777 }
778
779 bool isVISrc_512B16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
781 }
782
783 bool isVISrc_512V2B16() const {
784 return isVISrc_512B16();
785 }
786
787 bool isVISrc_512_f32() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
789 }
790
791 bool isVISrc_512F16() const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
793 }
794
795 bool isVISrc_512V2F16() const {
796 return isVISrc_512F16() || isVISrc_512_b32();
797 }
798
799 bool isVISrc_1024_b32() const {
800 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
801 }
802
803 bool isVISrc_1024B16() const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
805 }
806
807 bool isVISrc_1024V2B16() const {
808 return isVISrc_1024B16();
809 }
810
811 bool isVISrc_1024_f32() const {
812 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
813 }
814
815 bool isVISrc_1024F16() const {
816 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
817 }
818
819 bool isVISrc_1024V2F16() const {
820 return isVISrc_1024F16() || isVISrc_1024_b32();
821 }
822
823 bool isAISrcB32() const {
824 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
825 }
826
827 bool isAISrcB16() const {
828 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
829 }
830
831 bool isAISrcV2B16() const {
832 return isAISrcB16();
833 }
834
835 bool isAISrcF32() const {
836 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
837 }
838
839 bool isAISrcF16() const {
840 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
841 }
842
843 bool isAISrcV2F16() const {
844 return isAISrcF16() || isAISrcB32();
845 }
846
847 bool isAISrc_64B64() const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
849 }
850
851 bool isAISrc_64_f64() const {
852 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
853 }
854
855 bool isAISrc_128_b32() const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
857 }
858
859 bool isAISrc_128B16() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
861 }
862
863 bool isAISrc_128V2B16() const {
864 return isAISrc_128B16();
865 }
866
867 bool isAISrc_128_f32() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
869 }
870
871 bool isAISrc_128F16() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
873 }
874
875 bool isAISrc_128V2F16() const {
876 return isAISrc_128F16() || isAISrc_128_b32();
877 }
878
879 bool isVISrc_128_bf16() const {
880 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
881 }
882
883 bool isVISrc_128_f16() const {
884 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
885 }
886
887 bool isVISrc_128V2F16() const {
888 return isVISrc_128_f16() || isVISrc_128_b32();
889 }
890
891 bool isAISrc_256B64() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
893 }
894
895 bool isAISrc_256_f64() const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
897 }
898
899 bool isAISrc_512_b32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
901 }
902
903 bool isAISrc_512B16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
905 }
906
907 bool isAISrc_512V2B16() const {
908 return isAISrc_512B16();
909 }
910
911 bool isAISrc_512_f32() const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
913 }
914
915 bool isAISrc_512F16() const {
916 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
917 }
918
919 bool isAISrc_512V2F16() const {
920 return isAISrc_512F16() || isAISrc_512_b32();
921 }
922
923 bool isAISrc_1024_b32() const {
924 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
925 }
926
927 bool isAISrc_1024B16() const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
929 }
930
931 bool isAISrc_1024V2B16() const {
932 return isAISrc_1024B16();
933 }
934
935 bool isAISrc_1024_f32() const {
936 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
937 }
938
939 bool isAISrc_1024F16() const {
940 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
941 }
942
943 bool isAISrc_1024V2F16() const {
944 return isAISrc_1024F16() || isAISrc_1024_b32();
945 }
946
947 bool isKImmFP32() const {
948 return isLiteralImm(MVT::f32);
949 }
950
951 bool isKImmFP16() const {
952 return isLiteralImm(MVT::f16);
953 }
954
955 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
956
957 bool isMem() const override {
958 return false;
959 }
960
961 bool isExpr() const {
962 return Kind == Expression;
963 }
964
965 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
966
967 bool isSWaitCnt() const;
968 bool isDepCtr() const;
969 bool isSDelayALU() const;
970 bool isHwreg() const;
971 bool isSendMsg() const;
972 bool isWaitEvent() const;
973 bool isSplitBarrier() const;
974 bool isSwizzle() const;
975 bool isSMRDOffset8() const;
976 bool isSMEMOffset() const;
977 bool isSMRDLiteralOffset() const;
978 bool isDPP8() const;
979 bool isDPPCtrl() const;
980 bool isBLGP() const;
981 bool isGPRIdxMode() const;
982 bool isS16Imm() const;
983 bool isU16Imm() const;
984 bool isEndpgm() const;
985
986 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
987 return [this, P]() { return P(*this); };
988 }
989
990 StringRef getToken() const {
991 assert(isToken());
992 return StringRef(Tok.Data, Tok.Length);
993 }
994
995 int64_t getImm() const {
996 assert(isImm());
997 return Imm.Val;
998 }
999
1000 void setImm(int64_t Val) {
1001 assert(isImm());
1002 Imm.Val = Val;
1003 }
1004
1005 ImmTy getImmTy() const {
1006 assert(isImm());
1007 return Imm.Type;
1008 }
1009
1010 MCRegister getReg() const override {
1011 assert(isRegKind());
1012 return Reg.RegNo;
1013 }
1014
1015 SMLoc getStartLoc() const override {
1016 return StartLoc;
1017 }
1018
1019 SMLoc getEndLoc() const override {
1020 return EndLoc;
1021 }
1022
1023 SMRange getLocRange() const {
1024 return SMRange(StartLoc, EndLoc);
1025 }
1026
1027 int getMCOpIdx() const { return MCOpIdx; }
1028
1029 Modifiers getModifiers() const {
1030 assert(isRegKind() || isImmTy(ImmTyNone));
1031 return isRegKind() ? Reg.Mods : Imm.Mods;
1032 }
1033
1034 void setModifiers(Modifiers Mods) {
1035 assert(isRegKind() || isImmTy(ImmTyNone));
1036 if (isRegKind())
1037 Reg.Mods = Mods;
1038 else
1039 Imm.Mods = Mods;
1040 }
1041
1042 bool hasModifiers() const {
1043 return getModifiers().hasModifiers();
1044 }
1045
1046 bool hasFPModifiers() const {
1047 return getModifiers().hasFPModifiers();
1048 }
1049
1050 bool hasIntModifiers() const {
1051 return getModifiers().hasIntModifiers();
1052 }
1053
1054 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1055
1056 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1057
1058 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1059
1060 void addRegOperands(MCInst &Inst, unsigned N) const;
1061
1062 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1063 if (isRegKind())
1064 addRegOperands(Inst, N);
1065 else
1066 addImmOperands(Inst, N);
1067 }
1068
1069 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1070 Modifiers Mods = getModifiers();
1071 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1072 if (isRegKind()) {
1073 addRegOperands(Inst, N);
1074 } else {
1075 addImmOperands(Inst, N, false);
1076 }
1077 }
1078
1079 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1080 assert(!hasIntModifiers());
1081 addRegOrImmWithInputModsOperands(Inst, N);
1082 }
1083
1084 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1085 assert(!hasFPModifiers());
1086 addRegOrImmWithInputModsOperands(Inst, N);
1087 }
1088
1089 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1090 Modifiers Mods = getModifiers();
1091 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1092 assert(isRegKind());
1093 addRegOperands(Inst, N);
1094 }
1095
1096 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1097 assert(!hasIntModifiers());
1098 addRegWithInputModsOperands(Inst, N);
1099 }
1100
1101 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1102 assert(!hasFPModifiers());
1103 addRegWithInputModsOperands(Inst, N);
1104 }
1105
1106 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1107 // clang-format off
1108 switch (Type) {
1109 case ImmTyNone: OS << "None"; break;
1110 case ImmTyGDS: OS << "GDS"; break;
1111 case ImmTyLDS: OS << "LDS"; break;
1112 case ImmTyOffen: OS << "Offen"; break;
1113 case ImmTyIdxen: OS << "Idxen"; break;
1114 case ImmTyAddr64: OS << "Addr64"; break;
1115 case ImmTyOffset: OS << "Offset"; break;
1116 case ImmTyInstOffset: OS << "InstOffset"; break;
1117 case ImmTyOffset0: OS << "Offset0"; break;
1118 case ImmTyOffset1: OS << "Offset1"; break;
1119 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1120 case ImmTyCPol: OS << "CPol"; break;
1121 case ImmTyIndexKey8bit: OS << "index_key"; break;
1122 case ImmTyIndexKey16bit: OS << "index_key"; break;
1123 case ImmTyIndexKey32bit: OS << "index_key"; break;
1124 case ImmTyTFE: OS << "TFE"; break;
1125 case ImmTyIsAsync: OS << "IsAsync"; break;
1126 case ImmTyD16: OS << "D16"; break;
1127 case ImmTyFORMAT: OS << "FORMAT"; break;
1128 case ImmTyClamp: OS << "Clamp"; break;
1129 case ImmTyOModSI: OS << "OModSI"; break;
1130 case ImmTyDPP8: OS << "DPP8"; break;
1131 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1132 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1133 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1134 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1135 case ImmTyDppFI: OS << "DppFI"; break;
1136 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1137 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1138 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1139 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1140 case ImmTyDMask: OS << "DMask"; break;
1141 case ImmTyDim: OS << "Dim"; break;
1142 case ImmTyUNorm: OS << "UNorm"; break;
1143 case ImmTyDA: OS << "DA"; break;
1144 case ImmTyR128A16: OS << "R128A16"; break;
1145 case ImmTyA16: OS << "A16"; break;
1146 case ImmTyLWE: OS << "LWE"; break;
1147 case ImmTyOff: OS << "Off"; break;
1148 case ImmTyExpTgt: OS << "ExpTgt"; break;
1149 case ImmTyExpCompr: OS << "ExpCompr"; break;
1150 case ImmTyExpVM: OS << "ExpVM"; break;
1151 case ImmTyDone: OS << "Done"; break;
1152 case ImmTyRowEn: OS << "RowEn"; break;
1153 case ImmTyHwreg: OS << "Hwreg"; break;
1154 case ImmTySendMsg: OS << "SendMsg"; break;
1155 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1156 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1157 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1158 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1159 case ImmTyOpSel: OS << "OpSel"; break;
1160 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1161 case ImmTyNegLo: OS << "NegLo"; break;
1162 case ImmTyNegHi: OS << "NegHi"; break;
1163 case ImmTySwizzle: OS << "Swizzle"; break;
1164 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1165 case ImmTyHigh: OS << "High"; break;
1166 case ImmTyBLGP: OS << "BLGP"; break;
1167 case ImmTyCBSZ: OS << "CBSZ"; break;
1168 case ImmTyABID: OS << "ABID"; break;
1169 case ImmTyEndpgm: OS << "Endpgm"; break;
1170 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1171 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1172 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1173 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1174 case ImmTyBitOp3: OS << "BitOp3"; break;
1175 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1176 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1177 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1178 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1179 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1180 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1181 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1182 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1183 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1184 case ImmTyByteSel: OS << "ByteSel" ; break;
1185 }
1186 // clang-format on
1187 }
1188
1189 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1190 switch (Kind) {
1191 case Register:
1192 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1193 << " mods: " << Reg.Mods << '>';
1194 break;
1195 case Immediate:
1196 OS << '<' << getImm();
1197 if (getImmTy() != ImmTyNone) {
1198 OS << " type: "; printImmTy(OS, getImmTy());
1199 }
1200 OS << " mods: " << Imm.Mods << '>';
1201 break;
1202 case Token:
1203 OS << '\'' << getToken() << '\'';
1204 break;
1205 case Expression:
1206 OS << "<expr ";
1207 MAI.printExpr(OS, *Expr);
1208 OS << '>';
1209 break;
1210 }
1211 }
1212
1213 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1214 int64_t Val, SMLoc Loc,
1215 ImmTy Type = ImmTyNone,
1216 bool IsFPImm = false) {
1217 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1218 Op->Imm.Val = Val;
1219 Op->Imm.IsFPImm = IsFPImm;
1220 Op->Imm.Type = Type;
1221 Op->Imm.Mods = Modifiers();
1222 Op->StartLoc = Loc;
1223 Op->EndLoc = Loc;
1224 return Op;
1225 }
1226
1227 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1228 StringRef Str, SMLoc Loc,
1229 bool HasExplicitEncodingSize = true) {
1230 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1231 Res->Tok.Data = Str.data();
1232 Res->Tok.Length = Str.size();
1233 Res->StartLoc = Loc;
1234 Res->EndLoc = Loc;
1235 return Res;
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1239 MCRegister Reg, SMLoc S, SMLoc E) {
1240 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1241 Op->Reg.RegNo = Reg;
1242 Op->Reg.Mods = Modifiers();
1243 Op->StartLoc = S;
1244 Op->EndLoc = E;
1245 return Op;
1246 }
1247
1248 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1249 const class MCExpr *Expr, SMLoc S) {
1250 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1251 Op->Expr = Expr;
1252 Op->StartLoc = S;
1253 Op->EndLoc = S;
1254 return Op;
1255 }
1256};
1257
1258raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1259 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1260 return OS;
1261}
1262
1263//===----------------------------------------------------------------------===//
1264// AsmParser
1265//===----------------------------------------------------------------------===//
1266
1267// TODO: define GET_SUBTARGET_FEATURE_NAME
1268#define GET_REGISTER_MATCHER
1269#include "AMDGPUGenAsmMatcher.inc"
1270#undef GET_REGISTER_MATCHER
1271#undef GET_SUBTARGET_FEATURE_NAME
1272
1273// Holds info related to the current kernel, e.g. count of SGPRs used.
1274// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1275// .amdgpu_hsa_kernel or at EOF.
1276class KernelScopeInfo {
1277 int SgprIndexUnusedMin = -1;
1278 int VgprIndexUnusedMin = -1;
1279 int AgprIndexUnusedMin = -1;
1280 MCContext *Ctx = nullptr;
1281 MCSubtargetInfo const *MSTI = nullptr;
1282
1283 void usesSgprAt(int i) {
1284 if (i >= SgprIndexUnusedMin) {
1285 SgprIndexUnusedMin = ++i;
1286 if (Ctx) {
1287 MCSymbol* const Sym =
1288 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1289 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1290 }
1291 }
1292 }
1293
1294 void usesVgprAt(int i) {
1295 if (i >= VgprIndexUnusedMin) {
1296 VgprIndexUnusedMin = ++i;
1297 if (Ctx) {
1298 MCSymbol* const Sym =
1299 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1300 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1301 VgprIndexUnusedMin);
1302 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1303 }
1304 }
1305 }
1306
1307 void usesAgprAt(int i) {
1308 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1309 if (!hasMAIInsts(*MSTI))
1310 return;
1311
1312 if (i >= AgprIndexUnusedMin) {
1313 AgprIndexUnusedMin = ++i;
1314 if (Ctx) {
1315 MCSymbol* const Sym =
1316 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1317 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1318
1319 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1320 MCSymbol* const vSym =
1321 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1322 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1323 VgprIndexUnusedMin);
1324 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1325 }
1326 }
1327 }
1328
1329public:
1330 KernelScopeInfo() = default;
1331
1332 void initialize(MCContext &Context) {
1333 Ctx = &Context;
1334 MSTI = Ctx->getSubtargetInfo();
1335
1336 usesSgprAt(SgprIndexUnusedMin = -1);
1337 usesVgprAt(VgprIndexUnusedMin = -1);
1338 if (hasMAIInsts(*MSTI)) {
1339 usesAgprAt(AgprIndexUnusedMin = -1);
1340 }
1341 }
1342
1343 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1344 unsigned RegWidth) {
1345 switch (RegKind) {
1346 case IS_SGPR:
1347 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1348 break;
1349 case IS_AGPR:
1350 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1351 break;
1352 case IS_VGPR:
1353 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1354 break;
1355 default:
1356 break;
1357 }
1358 }
1359};
1360
1361class AMDGPUAsmParser : public MCTargetAsmParser {
1362 MCAsmParser &Parser;
1363
1364 unsigned ForcedEncodingSize = 0;
1365 bool ForcedDPP = false;
1366 bool ForcedSDWA = false;
1367 KernelScopeInfo KernelScope;
1368 const unsigned HwMode;
1369
1370 /// @name Auto-generated Match Functions
1371 /// {
1372
1373#define GET_ASSEMBLER_HEADER
1374#include "AMDGPUGenAsmMatcher.inc"
1375
1376 /// }
1377
1378 /// Get size of register operand
1379 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1380 assert(OpNo < Desc.NumOperands);
1381 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1382 return getRegBitWidth(RCID) / 8;
1383 }
1384
1385private:
1386 void createConstantSymbol(StringRef Id, int64_t Val);
1387
1388 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1389 bool OutOfRangeError(SMRange Range);
1390 /// Calculate VGPR/SGPR blocks required for given target, reserved
1391 /// registers, and user-specified NextFreeXGPR values.
1392 ///
1393 /// \param Features [in] Target features, used for bug corrections.
1394 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1395 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1396 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1397 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1398 /// descriptor field, if valid.
1399 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1400 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1401 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1402 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1403 /// \param VGPRBlocks [out] Result VGPR block count.
1404 /// \param SGPRBlocks [out] Result SGPR block count.
1405 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1406 const MCExpr *FlatScrUsed, bool XNACKUsed,
1407 std::optional<bool> EnableWavefrontSize32,
1408 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1409 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1410 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1411 bool ParseDirectiveAMDGCNTarget();
1412 bool ParseDirectiveAMDHSACodeObjectVersion();
1413 bool ParseDirectiveAMDHSAKernel();
1414 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1415 bool ParseDirectiveAMDKernelCodeT();
1416 // TODO: Possibly make subtargetHasRegister const.
1417 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1418 bool ParseDirectiveAMDGPUHsaKernel();
1419
1420 bool ParseDirectiveISAVersion();
1421 bool ParseDirectiveHSAMetadata();
1422 bool ParseDirectivePALMetadataBegin();
1423 bool ParseDirectivePALMetadata();
1424 bool ParseDirectiveAMDGPULDS();
1425
1426 /// Common code to parse out a block of text (typically YAML) between start and
1427 /// end directives.
1428 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1429 const char *AssemblerDirectiveEnd,
1430 std::string &CollectString);
1431
1432 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1433 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1434 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1435 unsigned &RegNum, unsigned &RegWidth,
1436 bool RestoreOnFailure = false);
1437 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1438 unsigned &RegNum, unsigned &RegWidth,
1439 SmallVectorImpl<AsmToken> &Tokens);
1440 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1441 unsigned &RegWidth,
1442 SmallVectorImpl<AsmToken> &Tokens);
1443 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1444 unsigned &RegWidth,
1445 SmallVectorImpl<AsmToken> &Tokens);
1446 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1447 unsigned &RegWidth,
1448 SmallVectorImpl<AsmToken> &Tokens);
1449 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1450 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1451 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1452
1453 bool isRegister();
1454 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1455 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1456 void initializeGprCountSymbol(RegisterKind RegKind);
1457 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1458 unsigned RegWidth);
1459 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1460 bool IsAtomic);
1461
1462public:
1463 enum OperandMode {
1464 OperandMode_Default,
1465 OperandMode_NSA,
1466 };
1467
1468 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1469
1470 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1471 const MCInstrInfo &MII, const MCTargetOptions &Options)
1472 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1473 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1475
1476 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1477
1478 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1479 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1480 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1481 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1482 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1483 } else {
1484 createConstantSymbol(".option.machine_version_major", ISA.Major);
1485 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1486 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1487 }
1488 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1489 initializeGprCountSymbol(IS_VGPR);
1490 initializeGprCountSymbol(IS_SGPR);
1491 } else
1492 KernelScope.initialize(getContext());
1493
1494 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1495 createConstantSymbol(Symbol, Code);
1496
1497 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1498 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1499 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1500 }
1501
1502 bool hasMIMG_R128() const {
1503 return AMDGPU::hasMIMG_R128(getSTI());
1504 }
1505
1506 bool hasPackedD16() const {
1507 return AMDGPU::hasPackedD16(getSTI());
1508 }
1509
1510 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1511
1512 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1513
1514 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1515
1516 bool isSI() const {
1517 return AMDGPU::isSI(getSTI());
1518 }
1519
1520 bool isCI() const {
1521 return AMDGPU::isCI(getSTI());
1522 }
1523
1524 bool isVI() const {
1525 return AMDGPU::isVI(getSTI());
1526 }
1527
1528 bool isGFX9() const {
1529 return AMDGPU::isGFX9(getSTI());
1530 }
1531
1532 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1533 bool isGFX90A() const {
1534 return AMDGPU::isGFX90A(getSTI());
1535 }
1536
1537 bool isGFX940() const {
1538 return AMDGPU::isGFX940(getSTI());
1539 }
1540
1541 bool isGFX9Plus() const {
1542 return AMDGPU::isGFX9Plus(getSTI());
1543 }
1544
1545 bool isGFX10() const {
1546 return AMDGPU::isGFX10(getSTI());
1547 }
1548
1549 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1550
1551 bool isGFX11() const {
1552 return AMDGPU::isGFX11(getSTI());
1553 }
1554
1555 bool isGFX11Plus() const {
1556 return AMDGPU::isGFX11Plus(getSTI());
1557 }
1558
1559 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1560
1561 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1562
1563 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1564
1565 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1566
1567 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1568
1569 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1570
1571 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1572
1573 bool isGFX10_BEncoding() const {
1574 return AMDGPU::isGFX10_BEncoding(getSTI());
1575 }
1576
1577 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1578
1579 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1580
1581 bool hasInv2PiInlineImm() const {
1582 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1583 }
1584
1585 bool has64BitLiterals() const {
1586 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1587 }
1588
1589 bool hasFlatOffsets() const {
1590 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1591 }
1592
1593 bool hasTrue16Insts() const {
1594 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1595 }
1596
1597 bool hasArchitectedFlatScratch() const {
1598 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1599 }
1600
1601 bool hasSGPR102_SGPR103() const {
1602 return !isVI() && !isGFX9();
1603 }
1604
1605 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1606
1607 bool hasIntClamp() const {
1608 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1609 }
1610
1611 bool hasPartialNSAEncoding() const {
1612 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1613 }
1614
1615 bool hasGloballyAddressableScratch() const {
1616 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1617 }
1618
1619 unsigned getNSAMaxSize(bool HasSampler = false) const {
1620 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1621 }
1622
1623 unsigned getMaxNumUserSGPRs() const {
1624 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1625 }
1626
1627 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1628
1629 AMDGPUTargetStreamer &getTargetStreamer() {
1630 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1631 return static_cast<AMDGPUTargetStreamer &>(TS);
1632 }
1633
1634 MCContext &getContext() const {
1635 // We need this const_cast because for some reason getContext() is not const
1636 // in MCAsmParser.
1637 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1638 }
1639
1640 const MCRegisterInfo *getMRI() const {
1641 return getContext().getRegisterInfo();
1642 }
1643
1644 const MCInstrInfo *getMII() const {
1645 return &MII;
1646 }
1647
1648 // FIXME: This should not be used. Instead, should use queries derived from
1649 // getAvailableFeatures().
1650 const FeatureBitset &getFeatureBits() const {
1651 return getSTI().getFeatureBits();
1652 }
1653
1654 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1655 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1656 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1657
1658 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1659 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1660 bool isForcedDPP() const { return ForcedDPP; }
1661 bool isForcedSDWA() const { return ForcedSDWA; }
1662 ArrayRef<unsigned> getMatchedVariants() const;
1663 StringRef getMatchedVariantName() const;
1664
1665 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1666 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1667 bool RestoreOnFailure);
1668 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1669 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1670 SMLoc &EndLoc) override;
1671 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1672 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1673 unsigned Kind) override;
1674 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1675 OperandVector &Operands, MCStreamer &Out,
1676 uint64_t &ErrorInfo,
1677 bool MatchingInlineAsm) override;
1678 bool ParseDirective(AsmToken DirectiveID) override;
1679 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1680 OperandMode Mode = OperandMode_Default);
1681 StringRef parseMnemonicSuffix(StringRef Name);
1682 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1683 SMLoc NameLoc, OperandVector &Operands) override;
1684 //bool ProcessInstruction(MCInst &Inst);
1685
1686 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1687
1688 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1689
1690 ParseStatus
1691 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1692 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1693 std::function<bool(int64_t &)> ConvertResult = nullptr);
1694
1695 ParseStatus parseOperandArrayWithPrefix(
1696 const char *Prefix, OperandVector &Operands,
1697 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1698 bool (*ConvertResult)(int64_t &) = nullptr);
1699
1700 ParseStatus
1701 parseNamedBit(StringRef Name, OperandVector &Operands,
1702 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1703 bool IgnoreNegative = false);
1704 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1705 ParseStatus parseCPol(OperandVector &Operands);
1706 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1707 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1708 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1709 SMLoc &StringLoc);
1710 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1711 StringRef Name,
1712 ArrayRef<const char *> Ids,
1713 int64_t &IntVal);
1714 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1715 StringRef Name,
1716 ArrayRef<const char *> Ids,
1717 AMDGPUOperand::ImmTy Type);
1718
1719 bool isModifier();
1720 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1721 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1722 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1723 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1724 bool parseSP3NegModifier();
1725 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1726 LitModifier Lit = LitModifier::None);
1727 ParseStatus parseReg(OperandVector &Operands);
1728 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1729 LitModifier Lit = LitModifier::None);
1730 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1731 bool AllowImm = true);
1732 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1733 bool AllowImm = true);
1734 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1735 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1736 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1737 ParseStatus tryParseIndexKey(OperandVector &Operands,
1738 AMDGPUOperand::ImmTy ImmTy);
1739 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1740 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1741 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1742 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1743 AMDGPUOperand::ImmTy Type);
1744 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1745 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1746 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1747 AMDGPUOperand::ImmTy Type);
1748 ParseStatus parseMatrixAScale(OperandVector &Operands);
1749 ParseStatus parseMatrixBScale(OperandVector &Operands);
1750 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1751 AMDGPUOperand::ImmTy Type);
1752 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1753 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1754
1755 ParseStatus parseDfmtNfmt(int64_t &Format);
1756 ParseStatus parseUfmt(int64_t &Format);
1757 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1758 int64_t &Format);
1759 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1760 int64_t &Format);
1761 ParseStatus parseFORMAT(OperandVector &Operands);
1762 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1763 ParseStatus parseNumericFormat(int64_t &Format);
1764 ParseStatus parseFlatOffset(OperandVector &Operands);
1765 ParseStatus parseR128A16(OperandVector &Operands);
1766 ParseStatus parseBLGP(OperandVector &Operands);
1767 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1768 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1769
1770 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1771
1772 bool parseCnt(int64_t &IntVal);
1773 ParseStatus parseSWaitCnt(OperandVector &Operands);
1774
1775 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1776 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1777 ParseStatus parseDepCtr(OperandVector &Operands);
1778
1779 bool parseDelay(int64_t &Delay);
1780 ParseStatus parseSDelayALU(OperandVector &Operands);
1781
1782 ParseStatus parseHwreg(OperandVector &Operands);
1783
1784private:
1785 struct OperandInfoTy {
1786 SMLoc Loc;
1787 int64_t Val;
1788 bool IsSymbolic = false;
1789 bool IsDefined = false;
1790
1791 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1792 };
1793
1794 struct StructuredOpField : OperandInfoTy {
1795 StringLiteral Id;
1796 StringLiteral Desc;
1797 unsigned Width;
1798 bool IsDefined = false;
1799
1800 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1801 unsigned Width, int64_t Default)
1802 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1803 virtual ~StructuredOpField() = default;
1804
1805 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1806 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1807 return false;
1808 }
1809
1810 virtual bool validate(AMDGPUAsmParser &Parser) const {
1811 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1812 return Error(Parser, "not supported on this GPU");
1813 if (!isUIntN(Width, Val))
1814 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1815 return true;
1816 }
1817 };
1818
1819 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1820 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1821
1822 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1823 bool validateSendMsg(const OperandInfoTy &Msg,
1824 const OperandInfoTy &Op,
1825 const OperandInfoTy &Stream);
1826
1827 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1828 OperandInfoTy &Width);
1829
1830 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1831
1832 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1833 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1834 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1835
1836 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1837 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1838 const OperandVector &Operands) const;
1839 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1840 const OperandVector &Operands) const;
1841 SMLoc getInstLoc(const OperandVector &Operands) const;
1842
1843 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1844 const OperandVector &Operands);
1845 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1847 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1850 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1851 bool AsVOPD3);
1852 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1853 bool tryVOPD(const MCInst &Inst);
1854 bool tryVOPD3(const MCInst &Inst);
1855 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1856
1857 bool validateIntClampSupported(const MCInst &Inst);
1858 bool validateMIMGAtomicDMask(const MCInst &Inst);
1859 bool validateMIMGGatherDMask(const MCInst &Inst);
1860 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1861 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1862 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1863 bool validateMIMGD16(const MCInst &Inst);
1864 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateTensorR128(const MCInst &Inst);
1866 bool validateMIMGMSAA(const MCInst &Inst);
1867 bool validateOpSel(const MCInst &Inst);
1868 bool validateTrue16OpSel(const MCInst &Inst);
1869 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1870 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1871 bool validateVccOperand(MCRegister Reg) const;
1872 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1873 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1874 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateAGPRLdSt(const MCInst &Inst) const;
1877 bool validateVGPRAlign(const MCInst &Inst) const;
1878 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1879 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1880 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1881 bool validateDivScale(const MCInst &Inst);
1882 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1884 SMLoc IDLoc);
1885 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1886 const unsigned CPol);
1887 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1888 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1889 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1890 unsigned getConstantBusLimit(unsigned Opcode) const;
1891 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1892 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1893 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1894
1895 bool isSupportedMnemo(StringRef Mnemo,
1896 const FeatureBitset &FBS);
1897 bool isSupportedMnemo(StringRef Mnemo,
1898 const FeatureBitset &FBS,
1899 ArrayRef<unsigned> Variants);
1900 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1901
1902 bool isId(const StringRef Id) const;
1903 bool isId(const AsmToken &Token, const StringRef Id) const;
1904 bool isToken(const AsmToken::TokenKind Kind) const;
1905 StringRef getId() const;
1906 bool trySkipId(const StringRef Id);
1907 bool trySkipId(const StringRef Pref, const StringRef Id);
1908 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1909 bool trySkipToken(const AsmToken::TokenKind Kind);
1910 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1911 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1912 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1913
1914 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1915 AsmToken::TokenKind getTokenKind() const;
1916 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1917 bool parseExpr(OperandVector &Operands);
1918 StringRef getTokenStr() const;
1919 AsmToken peekToken(bool ShouldSkipSpace = true);
1920 AsmToken getToken() const;
1921 SMLoc getLoc() const;
1922 void lex();
1923
1924public:
1925 void onBeginOfFile() override;
1926 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1927
1928 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1929
1930 ParseStatus parseExpTgt(OperandVector &Operands);
1931 ParseStatus parseSendMsg(OperandVector &Operands);
1932 ParseStatus parseWaitEvent(OperandVector &Operands);
1933 ParseStatus parseInterpSlot(OperandVector &Operands);
1934 ParseStatus parseInterpAttr(OperandVector &Operands);
1935 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1936 ParseStatus parseBoolReg(OperandVector &Operands);
1937
1938 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1939 const unsigned MaxVal, const Twine &ErrMsg,
1940 SMLoc &Loc);
1941 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1942 const unsigned MinVal,
1943 const unsigned MaxVal,
1944 const StringRef ErrMsg);
1945 ParseStatus parseSwizzle(OperandVector &Operands);
1946 bool parseSwizzleOffset(int64_t &Imm);
1947 bool parseSwizzleMacro(int64_t &Imm);
1948 bool parseSwizzleQuadPerm(int64_t &Imm);
1949 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1950 bool parseSwizzleBroadcast(int64_t &Imm);
1951 bool parseSwizzleSwap(int64_t &Imm);
1952 bool parseSwizzleReverse(int64_t &Imm);
1953 bool parseSwizzleFFT(int64_t &Imm);
1954 bool parseSwizzleRotate(int64_t &Imm);
1955
1956 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1957 int64_t parseGPRIdxMacro();
1958
1959 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1960 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1961
1962 ParseStatus parseOModSI(OperandVector &Operands);
1963
1964 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1965 OptionalImmIndexMap &OptionalIdx);
1966 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1967 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1968 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1969 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1970 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1971
1972 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1973 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1974 OptionalImmIndexMap &OptionalIdx);
1975 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1976 OptionalImmIndexMap &OptionalIdx);
1977
1978 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1979 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1980 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1981
1982 bool parseDimId(unsigned &Encoding);
1983 ParseStatus parseDim(OperandVector &Operands);
1984 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1985 ParseStatus parseDPP8(OperandVector &Operands);
1986 ParseStatus parseDPPCtrl(OperandVector &Operands);
1987 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1988 int64_t parseDPPCtrlSel(StringRef Ctrl);
1989 int64_t parseDPPCtrlPerm();
1990 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1991 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1992 cvtDPP(Inst, Operands, true);
1993 }
1994 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1995 bool IsDPP8 = false);
1996 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1997 cvtVOP3DPP(Inst, Operands, true);
1998 }
1999
2000 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2001 AMDGPUOperand::ImmTy Type);
2002 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2003 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2004 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2005 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2006 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2007 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2008 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2009 uint64_t BasicInstType,
2010 bool SkipDstVcc = false,
2011 bool SkipSrcVcc = false);
2012
2013 ParseStatus parseEndpgm(OperandVector &Operands);
2014
2015 ParseStatus parseVOPD(OperandVector &Operands);
2016};
2017
2018} // end anonymous namespace
2019
2020// May be called with integer type with equivalent bitwidth.
2021static const fltSemantics *getFltSemantics(unsigned Size) {
2022 switch (Size) {
2023 case 4:
2024 return &APFloat::IEEEsingle();
2025 case 8:
2026 return &APFloat::IEEEdouble();
2027 case 2:
2028 return &APFloat::IEEEhalf();
2029 default:
2030 llvm_unreachable("unsupported fp type");
2031 }
2032}
2033
2035 return getFltSemantics(VT.getSizeInBits() / 8);
2036}
2037
2039 switch (OperandType) {
2040 // When floating-point immediate is used as operand of type i16, the 32-bit
2041 // representation of the constant truncated to the 16 LSBs should be used.
2056 return &APFloat::IEEEsingle();
2063 return &APFloat::IEEEdouble();
2071 return &APFloat::IEEEhalf();
2076 return &APFloat::BFloat();
2077 default:
2078 llvm_unreachable("unsupported fp type");
2079 }
2080}
2081
2082//===----------------------------------------------------------------------===//
2083// Operand
2084//===----------------------------------------------------------------------===//
2085
2086static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2087 bool Lost;
2088
2089 // Convert literal to single precision
2092 &Lost);
2093 // We allow precision lost but not overflow or underflow
2094 if (Status != APFloat::opOK &&
2095 Lost &&
2096 ((Status & APFloat::opOverflow) != 0 ||
2097 (Status & APFloat::opUnderflow) != 0)) {
2098 return false;
2099 }
2100
2101 return true;
2102}
2103
2104static bool isSafeTruncation(int64_t Val, unsigned Size) {
2105 return isUIntN(Size, Val) || isIntN(Size, Val);
2106}
2107
2108static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2109 if (VT.getScalarType() == MVT::i16)
2110 return isInlinableLiteral32(Val, HasInv2Pi);
2111
2112 if (VT.getScalarType() == MVT::f16)
2113 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2114
2115 assert(VT.getScalarType() == MVT::bf16);
2116
2117 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2118}
2119
2120bool AMDGPUOperand::isInlinableImm(MVT type) const {
2121
2122 // This is a hack to enable named inline values like
2123 // shared_base with both 32-bit and 64-bit operands.
2124 // Note that these values are defined as
2125 // 32-bit operands only.
2126 if (isInlineValue()) {
2127 return true;
2128 }
2129
2130 if (!isImmTy(ImmTyNone)) {
2131 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2132 return false;
2133 }
2134
2135 if (getModifiers().Lit != LitModifier::None)
2136 return false;
2137
2138 // TODO: We should avoid using host float here. It would be better to
2139 // check the float bit values which is what a few other places do.
2140 // We've had bot failures before due to weird NaN support on mips hosts.
2141
2142 APInt Literal(64, Imm.Val);
2143
2144 if (Imm.IsFPImm) { // We got fp literal token
2145 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2147 AsmParser->hasInv2PiInlineImm());
2148 }
2149
2150 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2151 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2152 return false;
2153
2154 if (type.getScalarSizeInBits() == 16) {
2155 bool Lost = false;
2156 switch (type.getScalarType().SimpleTy) {
2157 default:
2158 llvm_unreachable("unknown 16-bit type");
2159 case MVT::bf16:
2160 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2161 &Lost);
2162 break;
2163 case MVT::f16:
2164 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2165 &Lost);
2166 break;
2167 case MVT::i16:
2168 FPLiteral.convert(APFloatBase::IEEEsingle(),
2169 APFloat::rmNearestTiesToEven, &Lost);
2170 break;
2171 }
2172 // We need to use 32-bit representation here because when a floating-point
2173 // inline constant is used as an i16 operand, its 32-bit representation
2174 // representation will be used. We will need the 32-bit value to check if
2175 // it is FP inline constant.
2176 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2177 return isInlineableLiteralOp16(ImmVal, type,
2178 AsmParser->hasInv2PiInlineImm());
2179 }
2180
2181 // Check if single precision literal is inlinable
2183 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2184 AsmParser->hasInv2PiInlineImm());
2185 }
2186
2187 // We got int literal token.
2188 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2190 AsmParser->hasInv2PiInlineImm());
2191 }
2192
2193 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2194 return false;
2195 }
2196
2197 if (type.getScalarSizeInBits() == 16) {
2199 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2200 type, AsmParser->hasInv2PiInlineImm());
2201 }
2202
2204 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2205 AsmParser->hasInv2PiInlineImm());
2206}
2207
2208bool AMDGPUOperand::isLiteralImm(MVT type) const {
2209 // Check that this immediate can be added as literal
2210 if (!isImmTy(ImmTyNone)) {
2211 return false;
2212 }
2213
2214 bool Allow64Bit =
2215 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2216
2217 if (!Imm.IsFPImm) {
2218 // We got int literal token.
2219
2220 if (type == MVT::f64 && hasFPModifiers()) {
2221 // Cannot apply fp modifiers to int literals preserving the same semantics
2222 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2223 // disable these cases.
2224 return false;
2225 }
2226
2227 unsigned Size = type.getSizeInBits();
2228 if (Size == 64) {
2229 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2230 return true;
2231 Size = 32;
2232 }
2233
2234 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2235 // types.
2236 return isSafeTruncation(Imm.Val, Size);
2237 }
2238
2239 // We got fp literal token
2240 if (type == MVT::f64) { // Expected 64-bit fp operand
2241 // We would set low 64-bits of literal to zeroes but we accept this literals
2242 return true;
2243 }
2244
2245 if (type == MVT::i64) { // Expected 64-bit int operand
2246 // We don't allow fp literals in 64-bit integer instructions. It is
2247 // unclear how we should encode them.
2248 return false;
2249 }
2250
2251 // We allow fp literals with f16x2 operands assuming that the specified
2252 // literal goes into the lower half and the upper half is zero. We also
2253 // require that the literal may be losslessly converted to f16.
2254 //
2255 // For i16x2 operands, we assume that the specified literal is encoded as a
2256 // single-precision float. This is pretty odd, but it matches SP3 and what
2257 // happens in hardware.
2258 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2259 : (type == MVT::v2i16) ? MVT::f32
2260 : (type == MVT::v2f32) ? MVT::f32
2261 : type;
2262
2263 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2264 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2265}
2266
2267bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2268 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2269}
2270
2271bool AMDGPUOperand::isVRegWithInputMods() const {
2272 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2273 // GFX90A allows DPP on 64-bit operands.
2274 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2275 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2276}
2277
2278template <bool IsFake16>
2279bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2280 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2281 : AMDGPU::VGPR_16_Lo128RegClassID);
2282}
2283
2284template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2285 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2286 : AMDGPU::VGPR_16RegClassID);
2287}
2288
2289bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2290 if (AsmParser->isVI())
2291 return isVReg32();
2292 if (AsmParser->isGFX9Plus())
2293 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2294 return false;
2295}
2296
2297bool AMDGPUOperand::isSDWAFP16Operand() const {
2298 return isSDWAOperand(MVT::f16);
2299}
2300
2301bool AMDGPUOperand::isSDWAFP32Operand() const {
2302 return isSDWAOperand(MVT::f32);
2303}
2304
2305bool AMDGPUOperand::isSDWAInt16Operand() const {
2306 return isSDWAOperand(MVT::i16);
2307}
2308
2309bool AMDGPUOperand::isSDWAInt32Operand() const {
2310 return isSDWAOperand(MVT::i32);
2311}
2312
2313bool AMDGPUOperand::isBoolReg() const {
2314 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2315 (AsmParser->isWave32() && isSCSrc_b32()));
2316}
2317
2318uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2319{
2320 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2321 assert(Size == 2 || Size == 4 || Size == 8);
2322
2323 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2324
2325 if (Imm.Mods.Abs) {
2326 Val &= ~FpSignMask;
2327 }
2328 if (Imm.Mods.Neg) {
2329 Val ^= FpSignMask;
2330 }
2331
2332 return Val;
2333}
2334
2335void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2336 MCOpIdx = Inst.getNumOperands();
2337
2338 if (isExpr()) {
2340 return;
2341 }
2342
2343 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2344 Inst.getNumOperands())) {
2345 addLiteralImmOperand(Inst, Imm.Val,
2346 ApplyModifiers &
2347 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2348 } else {
2349 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2351 }
2352}
2353
2354void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2355 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2356 auto OpNum = Inst.getNumOperands();
2357 // Check that this operand accepts literals
2358 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2359
2360 if (ApplyModifiers) {
2361 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2362 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2363 Val = applyInputFPModifiers(Val, Size);
2364 }
2365
2366 APInt Literal(64, Val);
2367 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2368
2369 bool CanUse64BitLiterals =
2370 AsmParser->has64BitLiterals() &&
2371 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2372 LitModifier Lit = getModifiers().Lit;
2373 MCContext &Ctx = AsmParser->getContext();
2374
2375 if (Imm.IsFPImm) { // We got fp literal token
2376 switch (OpTy) {
2382 if (Lit == LitModifier::None &&
2384 AsmParser->hasInv2PiInlineImm())) {
2385 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2386 return;
2387 }
2388
2389 // Non-inlineable
2390 if (AMDGPU::isSISrcFPOperand(InstDesc,
2391 OpNum)) { // Expected 64-bit fp operand
2392 bool HasMandatoryLiteral =
2393 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2394 // For fp operands we check if low 32 bits are zeros
2395 if (Literal.getLoBits(32) != 0 &&
2396 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2397 !HasMandatoryLiteral) {
2398 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2399 Inst.getLoc(),
2400 "Can't encode literal as exact 64-bit floating-point operand. "
2401 "Low 32-bits will be set to zero");
2402 Val &= 0xffffffff00000000u;
2403 }
2404
2405 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2408 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2409 (isInt<32>(Val) || isUInt<32>(Val))) {
2410 // The floating-point operand will be verbalized as an
2411 // integer one. If that integer happens to fit 32 bits, on
2412 // re-assembling it will be intepreted as the high half of
2413 // the actual value, so we have to wrap it into lit64().
2414 Lit = LitModifier::Lit64;
2415 } else if (Lit == LitModifier::Lit) {
2416 // For FP64 operands lit() specifies the high half of the value.
2417 Val = Hi_32(Val);
2418 }
2419 }
2420 break;
2421 }
2422
2423 // We don't allow fp literals in 64-bit integer instructions. It is
2424 // unclear how we should encode them. This case should be checked earlier
2425 // in predicate methods (isLiteralImm())
2426 llvm_unreachable("fp literal in 64-bit integer instruction.");
2427
2429 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2430 (isInt<32>(Val) || isUInt<32>(Val)))
2431 Lit = LitModifier::Lit64;
2432 break;
2433
2438 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2439 Literal == 0x3fc45f306725feed) {
2440 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2441 // loss of precision. The constant represents ideomatic fp32 value of
2442 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2443 // bits. Prevent rounding below.
2444 Inst.addOperand(MCOperand::createImm(0x3e22));
2445 return;
2446 }
2447 [[fallthrough]];
2448
2470 bool lost;
2471 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2472 // Convert literal to single precision
2473 FPLiteral.convert(*getOpFltSemantics(OpTy),
2474 APFloat::rmNearestTiesToEven, &lost);
2475 // We allow precision lost but not overflow or underflow. This should be
2476 // checked earlier in isLiteralImm()
2477
2478 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2479 break;
2480 }
2481 default:
2482 llvm_unreachable("invalid operand size");
2483 }
2484
2485 if (Lit != LitModifier::None) {
2486 Inst.addOperand(
2488 } else {
2490 }
2491 return;
2492 }
2493
2494 // We got int literal token.
2495 // Only sign extend inline immediates.
2496 switch (OpTy) {
2511 break;
2512
2515 if (Lit == LitModifier::None &&
2516 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2518 return;
2519 }
2520
2521 // When the 32 MSBs are not zero (effectively means it can't be safely
2522 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2523 // the lit modifier is explicitly used, we need to truncate it to the 32
2524 // LSBs.
2525 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2526 Val = Lo_32(Val);
2527 break;
2528
2532 if (Lit == LitModifier::None &&
2533 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2535 return;
2536 }
2537
2538 // If the target doesn't support 64-bit literals, we need to use the
2539 // constant as the high 32 MSBs of a double-precision floating point value.
2540 if (!AsmParser->has64BitLiterals()) {
2541 Val = static_cast<uint64_t>(Val) << 32;
2542 } else {
2543 // Now the target does support 64-bit literals, there are two cases
2544 // where we still want to use src_literal encoding:
2545 // 1) explicitly forced by using lit modifier;
2546 // 2) the value is a valid 32-bit representation (signed or unsigned),
2547 // meanwhile not forced by lit64 modifier.
2548 if (Lit == LitModifier::Lit ||
2549 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2550 Val = static_cast<uint64_t>(Val) << 32;
2551 }
2552
2553 // For FP64 operands lit() specifies the high half of the value.
2554 if (Lit == LitModifier::Lit)
2555 Val = Hi_32(Val);
2556 break;
2557
2569 break;
2570
2572 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2573 Val <<= 32;
2574 break;
2575
2576 default:
2577 llvm_unreachable("invalid operand type");
2578 }
2579
2580 if (Lit != LitModifier::None) {
2581 Inst.addOperand(
2583 } else {
2585 }
2586}
2587
2588void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2589 MCOpIdx = Inst.getNumOperands();
2590 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2591}
2592
2593bool AMDGPUOperand::isInlineValue() const {
2594 return isRegKind() && ::isInlineValue(getReg());
2595}
2596
2597//===----------------------------------------------------------------------===//
2598// AsmParser
2599//===----------------------------------------------------------------------===//
2600
2601void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2602 // TODO: make those pre-defined variables read-only.
2603 // Currently there is none suitable machinery in the core llvm-mc for this.
2604 // MCSymbol::isRedefinable is intended for another purpose, and
2605 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2606 MCContext &Ctx = getContext();
2607 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2609}
2610
2611static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2612 if (Is == IS_VGPR) {
2613 switch (RegWidth) {
2614 default: return -1;
2615 case 32:
2616 return AMDGPU::VGPR_32RegClassID;
2617 case 64:
2618 return AMDGPU::VReg_64RegClassID;
2619 case 96:
2620 return AMDGPU::VReg_96RegClassID;
2621 case 128:
2622 return AMDGPU::VReg_128RegClassID;
2623 case 160:
2624 return AMDGPU::VReg_160RegClassID;
2625 case 192:
2626 return AMDGPU::VReg_192RegClassID;
2627 case 224:
2628 return AMDGPU::VReg_224RegClassID;
2629 case 256:
2630 return AMDGPU::VReg_256RegClassID;
2631 case 288:
2632 return AMDGPU::VReg_288RegClassID;
2633 case 320:
2634 return AMDGPU::VReg_320RegClassID;
2635 case 352:
2636 return AMDGPU::VReg_352RegClassID;
2637 case 384:
2638 return AMDGPU::VReg_384RegClassID;
2639 case 512:
2640 return AMDGPU::VReg_512RegClassID;
2641 case 1024:
2642 return AMDGPU::VReg_1024RegClassID;
2643 }
2644 } else if (Is == IS_TTMP) {
2645 switch (RegWidth) {
2646 default: return -1;
2647 case 32:
2648 return AMDGPU::TTMP_32RegClassID;
2649 case 64:
2650 return AMDGPU::TTMP_64RegClassID;
2651 case 128:
2652 return AMDGPU::TTMP_128RegClassID;
2653 case 256:
2654 return AMDGPU::TTMP_256RegClassID;
2655 case 512:
2656 return AMDGPU::TTMP_512RegClassID;
2657 }
2658 } else if (Is == IS_SGPR) {
2659 switch (RegWidth) {
2660 default: return -1;
2661 case 32:
2662 return AMDGPU::SGPR_32RegClassID;
2663 case 64:
2664 return AMDGPU::SGPR_64RegClassID;
2665 case 96:
2666 return AMDGPU::SGPR_96RegClassID;
2667 case 128:
2668 return AMDGPU::SGPR_128RegClassID;
2669 case 160:
2670 return AMDGPU::SGPR_160RegClassID;
2671 case 192:
2672 return AMDGPU::SGPR_192RegClassID;
2673 case 224:
2674 return AMDGPU::SGPR_224RegClassID;
2675 case 256:
2676 return AMDGPU::SGPR_256RegClassID;
2677 case 288:
2678 return AMDGPU::SGPR_288RegClassID;
2679 case 320:
2680 return AMDGPU::SGPR_320RegClassID;
2681 case 352:
2682 return AMDGPU::SGPR_352RegClassID;
2683 case 384:
2684 return AMDGPU::SGPR_384RegClassID;
2685 case 512:
2686 return AMDGPU::SGPR_512RegClassID;
2687 }
2688 } else if (Is == IS_AGPR) {
2689 switch (RegWidth) {
2690 default: return -1;
2691 case 32:
2692 return AMDGPU::AGPR_32RegClassID;
2693 case 64:
2694 return AMDGPU::AReg_64RegClassID;
2695 case 96:
2696 return AMDGPU::AReg_96RegClassID;
2697 case 128:
2698 return AMDGPU::AReg_128RegClassID;
2699 case 160:
2700 return AMDGPU::AReg_160RegClassID;
2701 case 192:
2702 return AMDGPU::AReg_192RegClassID;
2703 case 224:
2704 return AMDGPU::AReg_224RegClassID;
2705 case 256:
2706 return AMDGPU::AReg_256RegClassID;
2707 case 288:
2708 return AMDGPU::AReg_288RegClassID;
2709 case 320:
2710 return AMDGPU::AReg_320RegClassID;
2711 case 352:
2712 return AMDGPU::AReg_352RegClassID;
2713 case 384:
2714 return AMDGPU::AReg_384RegClassID;
2715 case 512:
2716 return AMDGPU::AReg_512RegClassID;
2717 case 1024:
2718 return AMDGPU::AReg_1024RegClassID;
2719 }
2720 }
2721 return -1;
2722}
2723
2726 .Case("exec", AMDGPU::EXEC)
2727 .Case("vcc", AMDGPU::VCC)
2728 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2729 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2730 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2731 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2732 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2733 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2734 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2735 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2736 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2737 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2738 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2739 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2740 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2741 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2742 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2743 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2744 .Case("m0", AMDGPU::M0)
2745 .Case("vccz", AMDGPU::SRC_VCCZ)
2746 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2747 .Case("execz", AMDGPU::SRC_EXECZ)
2748 .Case("src_execz", AMDGPU::SRC_EXECZ)
2749 .Case("scc", AMDGPU::SRC_SCC)
2750 .Case("src_scc", AMDGPU::SRC_SCC)
2751 .Case("tba", AMDGPU::TBA)
2752 .Case("tma", AMDGPU::TMA)
2753 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2754 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2755 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2756 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2757 .Case("vcc_lo", AMDGPU::VCC_LO)
2758 .Case("vcc_hi", AMDGPU::VCC_HI)
2759 .Case("exec_lo", AMDGPU::EXEC_LO)
2760 .Case("exec_hi", AMDGPU::EXEC_HI)
2761 .Case("tma_lo", AMDGPU::TMA_LO)
2762 .Case("tma_hi", AMDGPU::TMA_HI)
2763 .Case("tba_lo", AMDGPU::TBA_LO)
2764 .Case("tba_hi", AMDGPU::TBA_HI)
2765 .Case("pc", AMDGPU::PC_REG)
2766 .Case("null", AMDGPU::SGPR_NULL)
2767 .Default(AMDGPU::NoRegister);
2768}
2769
2770bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2771 SMLoc &EndLoc, bool RestoreOnFailure) {
2772 auto R = parseRegister();
2773 if (!R) return true;
2774 assert(R->isReg());
2775 RegNo = R->getReg();
2776 StartLoc = R->getStartLoc();
2777 EndLoc = R->getEndLoc();
2778 return false;
2779}
2780
2781bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2782 SMLoc &EndLoc) {
2783 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2784}
2785
2786ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2787 SMLoc &EndLoc) {
2788 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2789 bool PendingErrors = getParser().hasPendingError();
2790 getParser().clearPendingErrors();
2791 if (PendingErrors)
2792 return ParseStatus::Failure;
2793 if (Result)
2794 return ParseStatus::NoMatch;
2795 return ParseStatus::Success;
2796}
2797
2798bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2799 RegisterKind RegKind,
2800 MCRegister Reg1, SMLoc Loc) {
2801 switch (RegKind) {
2802 case IS_SPECIAL:
2803 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2804 Reg = AMDGPU::EXEC;
2805 RegWidth = 64;
2806 return true;
2807 }
2808 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2809 Reg = AMDGPU::FLAT_SCR;
2810 RegWidth = 64;
2811 return true;
2812 }
2813 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2814 Reg = AMDGPU::XNACK_MASK;
2815 RegWidth = 64;
2816 return true;
2817 }
2818 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2819 Reg = AMDGPU::VCC;
2820 RegWidth = 64;
2821 return true;
2822 }
2823 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2824 Reg = AMDGPU::TBA;
2825 RegWidth = 64;
2826 return true;
2827 }
2828 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2829 Reg = AMDGPU::TMA;
2830 RegWidth = 64;
2831 return true;
2832 }
2833 Error(Loc, "register does not fit in the list");
2834 return false;
2835 case IS_VGPR:
2836 case IS_SGPR:
2837 case IS_AGPR:
2838 case IS_TTMP:
2839 if (Reg1 != Reg + RegWidth / 32) {
2840 Error(Loc, "registers in a list must have consecutive indices");
2841 return false;
2842 }
2843 RegWidth += 32;
2844 return true;
2845 default:
2846 llvm_unreachable("unexpected register kind");
2847 }
2848}
2849
2850struct RegInfo {
2852 RegisterKind Kind;
2853};
2854
2855static constexpr RegInfo RegularRegisters[] = {
2856 {{"v"}, IS_VGPR},
2857 {{"s"}, IS_SGPR},
2858 {{"ttmp"}, IS_TTMP},
2859 {{"acc"}, IS_AGPR},
2860 {{"a"}, IS_AGPR},
2861};
2862
2863static bool isRegularReg(RegisterKind Kind) {
2864 return Kind == IS_VGPR ||
2865 Kind == IS_SGPR ||
2866 Kind == IS_TTMP ||
2867 Kind == IS_AGPR;
2868}
2869
2871 for (const RegInfo &Reg : RegularRegisters)
2872 if (Str.starts_with(Reg.Name))
2873 return &Reg;
2874 return nullptr;
2875}
2876
2877static bool getRegNum(StringRef Str, unsigned& Num) {
2878 return !Str.getAsInteger(10, Num);
2879}
2880
2881bool
2882AMDGPUAsmParser::isRegister(const AsmToken &Token,
2883 const AsmToken &NextToken) const {
2884
2885 // A list of consecutive registers: [s0,s1,s2,s3]
2886 if (Token.is(AsmToken::LBrac))
2887 return true;
2888
2889 if (!Token.is(AsmToken::Identifier))
2890 return false;
2891
2892 // A single register like s0 or a range of registers like s[0:1]
2893
2894 StringRef Str = Token.getString();
2895 const RegInfo *Reg = getRegularRegInfo(Str);
2896 if (Reg) {
2897 StringRef RegName = Reg->Name;
2898 StringRef RegSuffix = Str.substr(RegName.size());
2899 if (!RegSuffix.empty()) {
2900 RegSuffix.consume_back(".l");
2901 RegSuffix.consume_back(".h");
2902 unsigned Num;
2903 // A single register with an index: rXX
2904 if (getRegNum(RegSuffix, Num))
2905 return true;
2906 } else {
2907 // A range of registers: r[XX:YY].
2908 if (NextToken.is(AsmToken::LBrac))
2909 return true;
2910 }
2911 }
2912
2913 return getSpecialRegForName(Str).isValid();
2914}
2915
2916bool
2917AMDGPUAsmParser::isRegister()
2918{
2919 return isRegister(getToken(), peekToken());
2920}
2921
2922MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2923 unsigned SubReg, unsigned RegWidth,
2924 SMLoc Loc) {
2925 assert(isRegularReg(RegKind));
2926
2927 unsigned AlignSize = 1;
2928 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2929 // SGPR and TTMP registers must be aligned.
2930 // Max required alignment is 4 dwords.
2931 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2932 }
2933
2934 if (RegNum % AlignSize != 0) {
2935 Error(Loc, "invalid register alignment");
2936 return MCRegister();
2937 }
2938
2939 unsigned RegIdx = RegNum / AlignSize;
2940 int RCID = getRegClass(RegKind, RegWidth);
2941 if (RCID == -1) {
2942 Error(Loc, "invalid or unsupported register size");
2943 return MCRegister();
2944 }
2945
2946 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2947 const MCRegisterClass RC = TRI->getRegClass(RCID);
2948 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2949 Error(Loc, "register index is out of range");
2950 return AMDGPU::NoRegister;
2951 }
2952
2953 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2954 Error(Loc, "register index is out of range");
2955 return MCRegister();
2956 }
2957
2958 MCRegister Reg = RC.getRegister(RegIdx);
2959
2960 if (SubReg) {
2961 Reg = TRI->getSubReg(Reg, SubReg);
2962
2963 // Currently all regular registers have their .l and .h subregisters, so
2964 // we should never need to generate an error here.
2965 assert(Reg && "Invalid subregister!");
2966 }
2967
2968 return Reg;
2969}
2970
2971bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2972 unsigned &SubReg) {
2973 int64_t RegLo, RegHi;
2974 if (!skipToken(AsmToken::LBrac, "missing register index"))
2975 return false;
2976
2977 SMLoc FirstIdxLoc = getLoc();
2978 SMLoc SecondIdxLoc;
2979
2980 if (!parseExpr(RegLo))
2981 return false;
2982
2983 if (trySkipToken(AsmToken::Colon)) {
2984 SecondIdxLoc = getLoc();
2985 if (!parseExpr(RegHi))
2986 return false;
2987 } else {
2988 RegHi = RegLo;
2989 }
2990
2991 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2992 return false;
2993
2994 if (!isUInt<32>(RegLo)) {
2995 Error(FirstIdxLoc, "invalid register index");
2996 return false;
2997 }
2998
2999 if (!isUInt<32>(RegHi)) {
3000 Error(SecondIdxLoc, "invalid register index");
3001 return false;
3002 }
3003
3004 if (RegLo > RegHi) {
3005 Error(FirstIdxLoc, "first register index should not exceed second index");
3006 return false;
3007 }
3008
3009 if (RegHi == RegLo) {
3010 StringRef RegSuffix = getTokenStr();
3011 if (RegSuffix == ".l") {
3012 SubReg = AMDGPU::lo16;
3013 lex();
3014 } else if (RegSuffix == ".h") {
3015 SubReg = AMDGPU::hi16;
3016 lex();
3017 }
3018 }
3019
3020 Num = static_cast<unsigned>(RegLo);
3021 RegWidth = 32 * ((RegHi - RegLo) + 1);
3022
3023 return true;
3024}
3025
3026MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3027 unsigned &RegNum,
3028 unsigned &RegWidth,
3029 SmallVectorImpl<AsmToken> &Tokens) {
3030 assert(isToken(AsmToken::Identifier));
3031 MCRegister Reg = getSpecialRegForName(getTokenStr());
3032 if (Reg) {
3033 RegNum = 0;
3034 RegWidth = 32;
3035 RegKind = IS_SPECIAL;
3036 Tokens.push_back(getToken());
3037 lex(); // skip register name
3038 }
3039 return Reg;
3040}
3041
3042MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3043 unsigned &RegNum,
3044 unsigned &RegWidth,
3045 SmallVectorImpl<AsmToken> &Tokens) {
3046 assert(isToken(AsmToken::Identifier));
3047 StringRef RegName = getTokenStr();
3048 auto Loc = getLoc();
3049
3050 const RegInfo *RI = getRegularRegInfo(RegName);
3051 if (!RI) {
3052 Error(Loc, "invalid register name");
3053 return MCRegister();
3054 }
3055
3056 Tokens.push_back(getToken());
3057 lex(); // skip register name
3058
3059 RegKind = RI->Kind;
3060 StringRef RegSuffix = RegName.substr(RI->Name.size());
3061 unsigned SubReg = NoSubRegister;
3062 if (!RegSuffix.empty()) {
3063 if (RegSuffix.consume_back(".l"))
3064 SubReg = AMDGPU::lo16;
3065 else if (RegSuffix.consume_back(".h"))
3066 SubReg = AMDGPU::hi16;
3067
3068 // Single 32-bit register: vXX.
3069 if (!getRegNum(RegSuffix, RegNum)) {
3070 Error(Loc, "invalid register index");
3071 return MCRegister();
3072 }
3073 RegWidth = 32;
3074 } else {
3075 // Range of registers: v[XX:YY]. ":YY" is optional.
3076 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3077 return MCRegister();
3078 }
3079
3080 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3081}
3082
3083MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3084 unsigned &RegNum, unsigned &RegWidth,
3085 SmallVectorImpl<AsmToken> &Tokens) {
3086 MCRegister Reg;
3087 auto ListLoc = getLoc();
3088
3089 if (!skipToken(AsmToken::LBrac,
3090 "expected a register or a list of registers")) {
3091 return MCRegister();
3092 }
3093
3094 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3095
3096 auto Loc = getLoc();
3097 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3098 return MCRegister();
3099 if (RegWidth != 32) {
3100 Error(Loc, "expected a single 32-bit register");
3101 return MCRegister();
3102 }
3103
3104 for (; trySkipToken(AsmToken::Comma); ) {
3105 RegisterKind NextRegKind;
3106 MCRegister NextReg;
3107 unsigned NextRegNum, NextRegWidth;
3108 Loc = getLoc();
3109
3110 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3111 NextRegNum, NextRegWidth,
3112 Tokens)) {
3113 return MCRegister();
3114 }
3115 if (NextRegWidth != 32) {
3116 Error(Loc, "expected a single 32-bit register");
3117 return MCRegister();
3118 }
3119 if (NextRegKind != RegKind) {
3120 Error(Loc, "registers in a list must be of the same kind");
3121 return MCRegister();
3122 }
3123 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3124 return MCRegister();
3125 }
3126
3127 if (!skipToken(AsmToken::RBrac,
3128 "expected a comma or a closing square bracket")) {
3129 return MCRegister();
3130 }
3131
3132 if (isRegularReg(RegKind))
3133 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3134
3135 return Reg;
3136}
3137
3138bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3139 MCRegister &Reg, unsigned &RegNum,
3140 unsigned &RegWidth,
3141 SmallVectorImpl<AsmToken> &Tokens) {
3142 auto Loc = getLoc();
3143 Reg = MCRegister();
3144
3145 if (isToken(AsmToken::Identifier)) {
3146 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3147 if (!Reg)
3148 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3149 } else {
3150 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3151 }
3152
3153 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3154 if (!Reg) {
3155 assert(Parser.hasPendingError());
3156 return false;
3157 }
3158
3159 if (!subtargetHasRegister(*TRI, Reg)) {
3160 if (Reg == AMDGPU::SGPR_NULL) {
3161 Error(Loc, "'null' operand is not supported on this GPU");
3162 } else {
3164 " register not available on this GPU");
3165 }
3166 return false;
3167 }
3168
3169 return true;
3170}
3171
3172bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3173 MCRegister &Reg, unsigned &RegNum,
3174 unsigned &RegWidth,
3175 bool RestoreOnFailure /*=false*/) {
3176 Reg = MCRegister();
3177
3179 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3180 if (RestoreOnFailure) {
3181 while (!Tokens.empty()) {
3182 getLexer().UnLex(Tokens.pop_back_val());
3183 }
3184 }
3185 return true;
3186 }
3187 return false;
3188}
3189
3190std::optional<StringRef>
3191AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3192 switch (RegKind) {
3193 case IS_VGPR:
3194 return StringRef(".amdgcn.next_free_vgpr");
3195 case IS_SGPR:
3196 return StringRef(".amdgcn.next_free_sgpr");
3197 default:
3198 return std::nullopt;
3199 }
3200}
3201
3202void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3203 auto SymbolName = getGprCountSymbolName(RegKind);
3204 assert(SymbolName && "initializing invalid register kind");
3205 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3207 Sym->setRedefinable(true);
3208}
3209
3210bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3211 unsigned DwordRegIndex,
3212 unsigned RegWidth) {
3213 // Symbols are only defined for GCN targets
3214 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3215 return true;
3216
3217 auto SymbolName = getGprCountSymbolName(RegKind);
3218 if (!SymbolName)
3219 return true;
3220 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3221
3222 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3223 int64_t OldCount;
3224
3225 if (!Sym->isVariable())
3226 return !Error(getLoc(),
3227 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3228 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3229 return !Error(
3230 getLoc(),
3231 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3232
3233 if (OldCount <= NewMax)
3235
3236 return true;
3237}
3238
3239std::unique_ptr<AMDGPUOperand>
3240AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3241 const auto &Tok = getToken();
3242 SMLoc StartLoc = Tok.getLoc();
3243 SMLoc EndLoc = Tok.getEndLoc();
3244 RegisterKind RegKind;
3245 MCRegister Reg;
3246 unsigned RegNum, RegWidth;
3247
3248 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3249 return nullptr;
3250 }
3251 if (isHsaAbi(getSTI())) {
3252 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3253 return nullptr;
3254 } else
3255 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3256 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3257}
3258
3259ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3260 bool HasSP3AbsModifier, LitModifier Lit) {
3261 // TODO: add syntactic sugar for 1/(2*PI)
3262
3263 if (isRegister() || isModifier())
3264 return ParseStatus::NoMatch;
3265
3266 if (Lit == LitModifier::None) {
3267 if (trySkipId("lit"))
3268 Lit = LitModifier::Lit;
3269 else if (trySkipId("lit64"))
3270 Lit = LitModifier::Lit64;
3271
3272 if (Lit != LitModifier::None) {
3273 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3274 return ParseStatus::Failure;
3275 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3276 if (S.isSuccess() &&
3277 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3278 return ParseStatus::Failure;
3279 return S;
3280 }
3281 }
3282
3283 const auto& Tok = getToken();
3284 const auto& NextTok = peekToken();
3285 bool IsReal = Tok.is(AsmToken::Real);
3286 SMLoc S = getLoc();
3287 bool Negate = false;
3288
3289 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3290 lex();
3291 IsReal = true;
3292 Negate = true;
3293 }
3294
3295 AMDGPUOperand::Modifiers Mods;
3296 Mods.Lit = Lit;
3297
3298 if (IsReal) {
3299 // Floating-point expressions are not supported.
3300 // Can only allow floating-point literals with an
3301 // optional sign.
3302
3303 StringRef Num = getTokenStr();
3304 lex();
3305
3306 APFloat RealVal(APFloat::IEEEdouble());
3307 auto roundMode = APFloat::rmNearestTiesToEven;
3308 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3309 return ParseStatus::Failure;
3310 if (Negate)
3311 RealVal.changeSign();
3312
3313 Operands.push_back(
3314 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3315 AMDGPUOperand::ImmTyNone, true));
3316 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3317 Op.setModifiers(Mods);
3318
3319 return ParseStatus::Success;
3320
3321 } else {
3322 int64_t IntVal;
3323 const MCExpr *Expr;
3324 SMLoc S = getLoc();
3325
3326 if (HasSP3AbsModifier) {
3327 // This is a workaround for handling expressions
3328 // as arguments of SP3 'abs' modifier, for example:
3329 // |1.0|
3330 // |-1|
3331 // |1+x|
3332 // This syntax is not compatible with syntax of standard
3333 // MC expressions (due to the trailing '|').
3334 SMLoc EndLoc;
3335 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3336 return ParseStatus::Failure;
3337 } else {
3338 if (Parser.parseExpression(Expr))
3339 return ParseStatus::Failure;
3340 }
3341
3342 if (Expr->evaluateAsAbsolute(IntVal)) {
3343 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3344 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3345 Op.setModifiers(Mods);
3346 } else {
3347 if (Lit != LitModifier::None)
3348 return ParseStatus::NoMatch;
3349 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3350 }
3351
3352 return ParseStatus::Success;
3353 }
3354
3355 return ParseStatus::NoMatch;
3356}
3357
3358ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3359 if (!isRegister())
3360 return ParseStatus::NoMatch;
3361
3362 if (auto R = parseRegister()) {
3363 assert(R->isReg());
3364 Operands.push_back(std::move(R));
3365 return ParseStatus::Success;
3366 }
3367 return ParseStatus::Failure;
3368}
3369
3370ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3371 bool HasSP3AbsMod, LitModifier Lit) {
3372 ParseStatus Res = parseReg(Operands);
3373 if (!Res.isNoMatch())
3374 return Res;
3375 if (isModifier())
3376 return ParseStatus::NoMatch;
3377 return parseImm(Operands, HasSP3AbsMod, Lit);
3378}
3379
3380bool
3381AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3382 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3383 const auto &str = Token.getString();
3384 return str == "abs" || str == "neg" || str == "sext";
3385 }
3386 return false;
3387}
3388
3389bool
3390AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3391 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3392}
3393
3394bool
3395AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3396 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3397}
3398
3399bool
3400AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3401 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3402}
3403
3404// Check if this is an operand modifier or an opcode modifier
3405// which may look like an expression but it is not. We should
3406// avoid parsing these modifiers as expressions. Currently
3407// recognized sequences are:
3408// |...|
3409// abs(...)
3410// neg(...)
3411// sext(...)
3412// -reg
3413// -|...|
3414// -abs(...)
3415// name:...
3416//
3417bool
3418AMDGPUAsmParser::isModifier() {
3419
3420 AsmToken Tok = getToken();
3421 AsmToken NextToken[2];
3422 peekTokens(NextToken);
3423
3424 return isOperandModifier(Tok, NextToken[0]) ||
3425 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3426 isOpcodeModifierWithVal(Tok, NextToken[0]);
3427}
3428
3429// Check if the current token is an SP3 'neg' modifier.
3430// Currently this modifier is allowed in the following context:
3431//
3432// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3433// 2. Before an 'abs' modifier: -abs(...)
3434// 3. Before an SP3 'abs' modifier: -|...|
3435//
3436// In all other cases "-" is handled as a part
3437// of an expression that follows the sign.
3438//
3439// Note: When "-" is followed by an integer literal,
3440// this is interpreted as integer negation rather
3441// than a floating-point NEG modifier applied to N.
3442// Beside being contr-intuitive, such use of floating-point
3443// NEG modifier would have resulted in different meaning
3444// of integer literals used with VOP1/2/C and VOP3,
3445// for example:
3446// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3447// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3448// Negative fp literals with preceding "-" are
3449// handled likewise for uniformity
3450//
3451bool
3452AMDGPUAsmParser::parseSP3NegModifier() {
3453
3454 AsmToken NextToken[2];
3455 peekTokens(NextToken);
3456
3457 if (isToken(AsmToken::Minus) &&
3458 (isRegister(NextToken[0], NextToken[1]) ||
3459 NextToken[0].is(AsmToken::Pipe) ||
3460 isId(NextToken[0], "abs"))) {
3461 lex();
3462 return true;
3463 }
3464
3465 return false;
3466}
3467
3468ParseStatus
3469AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3470 bool AllowImm) {
3471 bool Neg, SP3Neg;
3472 bool Abs, SP3Abs;
3473 SMLoc Loc;
3474
3475 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3476 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3477 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3478
3479 SP3Neg = parseSP3NegModifier();
3480
3481 Loc = getLoc();
3482 Neg = trySkipId("neg");
3483 if (Neg && SP3Neg)
3484 return Error(Loc, "expected register or immediate");
3485 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3486 return ParseStatus::Failure;
3487
3488 Abs = trySkipId("abs");
3489 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3490 return ParseStatus::Failure;
3491
3492 LitModifier Lit = LitModifier::None;
3493 if (trySkipId("lit")) {
3494 Lit = LitModifier::Lit;
3495 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3496 return ParseStatus::Failure;
3497 } else if (trySkipId("lit64")) {
3498 Lit = LitModifier::Lit64;
3499 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3500 return ParseStatus::Failure;
3501 if (!has64BitLiterals())
3502 return Error(Loc, "lit64 is not supported on this GPU");
3503 }
3504
3505 Loc = getLoc();
3506 SP3Abs = trySkipToken(AsmToken::Pipe);
3507 if (Abs && SP3Abs)
3508 return Error(Loc, "expected register or immediate");
3509
3510 ParseStatus Res;
3511 if (AllowImm) {
3512 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3513 } else {
3514 Res = parseReg(Operands);
3515 }
3516 if (!Res.isSuccess())
3517 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3519 : Res;
3520
3521 if (Lit != LitModifier::None && !Operands.back()->isImm())
3522 Error(Loc, "expected immediate with lit modifier");
3523
3524 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3525 return ParseStatus::Failure;
3526 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3527 return ParseStatus::Failure;
3528 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3529 return ParseStatus::Failure;
3530 if (Lit != LitModifier::None &&
3531 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3532 return ParseStatus::Failure;
3533
3534 AMDGPUOperand::Modifiers Mods;
3535 Mods.Abs = Abs || SP3Abs;
3536 Mods.Neg = Neg || SP3Neg;
3537 Mods.Lit = Lit;
3538
3539 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3540 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3541 if (Op.isExpr())
3542 return Error(Op.getStartLoc(), "expected an absolute expression");
3543 Op.setModifiers(Mods);
3544 }
3545 return ParseStatus::Success;
3546}
3547
3548ParseStatus
3549AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3550 bool AllowImm) {
3551 bool Sext = trySkipId("sext");
3552 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3553 return ParseStatus::Failure;
3554
3555 ParseStatus Res;
3556 if (AllowImm) {
3557 Res = parseRegOrImm(Operands);
3558 } else {
3559 Res = parseReg(Operands);
3560 }
3561 if (!Res.isSuccess())
3562 return Sext ? ParseStatus::Failure : Res;
3563
3564 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3565 return ParseStatus::Failure;
3566
3567 AMDGPUOperand::Modifiers Mods;
3568 Mods.Sext = Sext;
3569
3570 if (Mods.hasIntModifiers()) {
3571 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3572 if (Op.isExpr())
3573 return Error(Op.getStartLoc(), "expected an absolute expression");
3574 Op.setModifiers(Mods);
3575 }
3576
3577 return ParseStatus::Success;
3578}
3579
3580ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3581 return parseRegOrImmWithFPInputMods(Operands, false);
3582}
3583
3584ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3585 return parseRegOrImmWithIntInputMods(Operands, false);
3586}
3587
3588ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3589 auto Loc = getLoc();
3590 if (trySkipId("off")) {
3591 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3592 AMDGPUOperand::ImmTyOff, false));
3593 return ParseStatus::Success;
3594 }
3595
3596 if (!isRegister())
3597 return ParseStatus::NoMatch;
3598
3599 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3600 if (Reg) {
3601 Operands.push_back(std::move(Reg));
3602 return ParseStatus::Success;
3603 }
3604
3605 return ParseStatus::Failure;
3606}
3607
3608unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3609 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3610
3611 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3612 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3613 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3614 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3615 return Match_InvalidOperand;
3616
3617 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3618 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3619 // v_mac_f32/16 allow only dst_sel == DWORD;
3620 auto OpNum =
3621 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3622 const auto &Op = Inst.getOperand(OpNum);
3623 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3624 return Match_InvalidOperand;
3625 }
3626 }
3627
3628 // Asm can first try to match VOPD or VOPD3. By failing early here with
3629 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3630 // Checking later during validateInstruction does not give a chance to retry
3631 // parsing as a different encoding.
3632 if (tryAnotherVOPDEncoding(Inst))
3633 return Match_InvalidOperand;
3634
3635 return Match_Success;
3636}
3637
3647
3648// What asm variants we should check
3649ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3650 if (isForcedDPP() && isForcedVOP3()) {
3651 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3652 return ArrayRef(Variants);
3653 }
3654 if (getForcedEncodingSize() == 32) {
3655 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3656 return ArrayRef(Variants);
3657 }
3658
3659 if (isForcedVOP3()) {
3660 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3661 return ArrayRef(Variants);
3662 }
3663
3664 if (isForcedSDWA()) {
3665 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3667 return ArrayRef(Variants);
3668 }
3669
3670 if (isForcedDPP()) {
3671 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3672 return ArrayRef(Variants);
3673 }
3674
3675 return getAllVariants();
3676}
3677
3678StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3679 if (isForcedDPP() && isForcedVOP3())
3680 return "e64_dpp";
3681
3682 if (getForcedEncodingSize() == 32)
3683 return "e32";
3684
3685 if (isForcedVOP3())
3686 return "e64";
3687
3688 if (isForcedSDWA())
3689 return "sdwa";
3690
3691 if (isForcedDPP())
3692 return "dpp";
3693
3694 return "";
3695}
3696
3697MCRegister
3698AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3699 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3700 for (MCPhysReg Reg : Desc.implicit_uses()) {
3701 switch (Reg) {
3702 case AMDGPU::FLAT_SCR:
3703 case AMDGPU::VCC:
3704 case AMDGPU::VCC_LO:
3705 case AMDGPU::VCC_HI:
3706 case AMDGPU::M0:
3707 return Reg;
3708 default:
3709 break;
3710 }
3711 }
3712 return MCRegister();
3713}
3714
3715// NB: This code is correct only when used to check constant
3716// bus limitations because GFX7 support no f16 inline constants.
3717// Note that there are no cases when a GFX7 opcode violates
3718// constant bus limitations due to the use of an f16 constant.
3719bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3720 unsigned OpIdx) const {
3721 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3722
3725 return false;
3726 }
3727
3728 const MCOperand &MO = Inst.getOperand(OpIdx);
3729
3730 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3731 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3732
3733 switch (OpSize) { // expected operand size
3734 case 8:
3735 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3736 case 4:
3737 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3738 case 2: {
3739 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3742 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3743
3747
3751
3754
3758
3761 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3762
3765 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3766
3768 return false;
3769
3770 llvm_unreachable("invalid operand type");
3771 }
3772 default:
3773 llvm_unreachable("invalid operand size");
3774 }
3775}
3776
3777unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3778 if (!isGFX10Plus())
3779 return 1;
3780
3781 switch (Opcode) {
3782 // 64-bit shift instructions can use only one scalar value input
3783 case AMDGPU::V_LSHLREV_B64_e64:
3784 case AMDGPU::V_LSHLREV_B64_gfx10:
3785 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3786 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3787 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3788 case AMDGPU::V_LSHRREV_B64_e64:
3789 case AMDGPU::V_LSHRREV_B64_gfx10:
3790 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3791 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3792 case AMDGPU::V_ASHRREV_I64_e64:
3793 case AMDGPU::V_ASHRREV_I64_gfx10:
3794 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3795 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3796 case AMDGPU::V_LSHL_B64_e64:
3797 case AMDGPU::V_LSHR_B64_e64:
3798 case AMDGPU::V_ASHR_I64_e64:
3799 return 1;
3800 default:
3801 return 2;
3802 }
3803}
3804
3805constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3807
3808// Get regular operand indices in the same order as specified
3809// in the instruction (but append mandatory literals to the end).
3811 bool AddMandatoryLiterals = false) {
3812
3813 int16_t ImmIdx =
3814 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3815
3816 if (isVOPD(Opcode)) {
3817 int16_t ImmXIdx =
3818 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3819
3820 return {getNamedOperandIdx(Opcode, OpName::src0X),
3821 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3822 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3823 getNamedOperandIdx(Opcode, OpName::src0Y),
3824 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3825 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3826 ImmXIdx,
3827 ImmIdx};
3828 }
3829
3830 return {getNamedOperandIdx(Opcode, OpName::src0),
3831 getNamedOperandIdx(Opcode, OpName::src1),
3832 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3833}
3834
3835bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3836 const MCOperand &MO = Inst.getOperand(OpIdx);
3837 if (MO.isImm())
3838 return !isInlineConstant(Inst, OpIdx);
3839 if (MO.isReg()) {
3840 auto Reg = MO.getReg();
3841 if (!Reg)
3842 return false;
3843 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3844 auto PReg = mc2PseudoReg(Reg);
3845 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3846 }
3847 return true;
3848}
3849
3850// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3851// Writelane is special in that it can use SGPR and M0 (which would normally
3852// count as using the constant bus twice - but in this case it is allowed since
3853// the lane selector doesn't count as a use of the constant bus). However, it is
3854// still required to abide by the 1 SGPR rule.
3855static bool checkWriteLane(const MCInst &Inst) {
3856 const unsigned Opcode = Inst.getOpcode();
3857 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3858 return false;
3859 const MCOperand &LaneSelOp = Inst.getOperand(2);
3860 if (!LaneSelOp.isReg())
3861 return false;
3862 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3863 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3864}
3865
3866bool AMDGPUAsmParser::validateConstantBusLimitations(
3867 const MCInst &Inst, const OperandVector &Operands) {
3868 const unsigned Opcode = Inst.getOpcode();
3869 const MCInstrDesc &Desc = MII.get(Opcode);
3870 MCRegister LastSGPR;
3871 unsigned ConstantBusUseCount = 0;
3872 unsigned NumLiterals = 0;
3873 unsigned LiteralSize;
3874
3875 if (!(Desc.TSFlags &
3878 !isVOPD(Opcode))
3879 return true;
3880
3881 if (checkWriteLane(Inst))
3882 return true;
3883
3884 // Check special imm operands (used by madmk, etc)
3885 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3886 ++NumLiterals;
3887 LiteralSize = 4;
3888 }
3889
3890 SmallDenseSet<MCRegister> SGPRsUsed;
3891 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3892 if (SGPRUsed) {
3893 SGPRsUsed.insert(SGPRUsed);
3894 ++ConstantBusUseCount;
3895 }
3896
3897 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3898
3899 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3900
3901 for (int OpIdx : OpIndices) {
3902 if (OpIdx == -1)
3903 continue;
3904
3905 const MCOperand &MO = Inst.getOperand(OpIdx);
3906 if (usesConstantBus(Inst, OpIdx)) {
3907 if (MO.isReg()) {
3908 LastSGPR = mc2PseudoReg(MO.getReg());
3909 // Pairs of registers with a partial intersections like these
3910 // s0, s[0:1]
3911 // flat_scratch_lo, flat_scratch
3912 // flat_scratch_lo, flat_scratch_hi
3913 // are theoretically valid but they are disabled anyway.
3914 // Note that this code mimics SIInstrInfo::verifyInstruction
3915 if (SGPRsUsed.insert(LastSGPR).second) {
3916 ++ConstantBusUseCount;
3917 }
3918 } else { // Expression or a literal
3919
3920 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3921 continue; // special operand like VINTERP attr_chan
3922
3923 // An instruction may use only one literal.
3924 // This has been validated on the previous step.
3925 // See validateVOPLiteral.
3926 // This literal may be used as more than one operand.
3927 // If all these operands are of the same size,
3928 // this literal counts as one scalar value.
3929 // Otherwise it counts as 2 scalar values.
3930 // See "GFX10 Shader Programming", section 3.6.2.3.
3931
3933 if (Size < 4)
3934 Size = 4;
3935
3936 if (NumLiterals == 0) {
3937 NumLiterals = 1;
3938 LiteralSize = Size;
3939 } else if (LiteralSize != Size) {
3940 NumLiterals = 2;
3941 }
3942 }
3943 }
3944
3945 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3946 Error(getOperandLoc(Operands, OpIdx),
3947 "invalid operand (violates constant bus restrictions)");
3948 return false;
3949 }
3950 }
3951 return true;
3952}
3953
3954std::optional<unsigned>
3955AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3956
3957 const unsigned Opcode = Inst.getOpcode();
3958 if (!isVOPD(Opcode))
3959 return {};
3960
3961 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3962
3963 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3964 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3965 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3966 ? Opr.getReg()
3967 : MCRegister();
3968 };
3969
3970 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3971 // source-cache.
3972 bool SkipSrc =
3973 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3974 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3975 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3976 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3977 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
3978 bool AllowSameVGPR = isGFX1250Plus();
3979
3980 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3981 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3982 int I = getNamedOperandIdx(Opcode, OpName);
3983 const MCOperand &Op = Inst.getOperand(I);
3984 if (!Op.isImm())
3985 continue;
3986 int64_t Imm = Op.getImm();
3987 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3988 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3989 return (unsigned)I;
3990 }
3991
3992 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3993 OpName::vsrc2Y, OpName::imm}) {
3994 int I = getNamedOperandIdx(Opcode, OpName);
3995 if (I == -1)
3996 continue;
3997 const MCOperand &Op = Inst.getOperand(I);
3998 if (Op.isImm())
3999 return (unsigned)I;
4000 }
4001 }
4002
4003 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4004 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4005 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4006
4007 return InvalidCompOprIdx;
4008}
4009
4010bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4011 const OperandVector &Operands) {
4012
4013 unsigned Opcode = Inst.getOpcode();
4014 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4015
4016 if (AsVOPD3) {
4017 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4018 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4019 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4020 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4021 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4022 }
4023 }
4024
4025 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4026 if (!InvalidCompOprIdx.has_value())
4027 return true;
4028
4029 auto CompOprIdx = *InvalidCompOprIdx;
4030 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4031 auto ParsedIdx =
4032 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4033 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4034 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4035
4036 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4037 if (CompOprIdx == VOPD::Component::DST) {
4038 if (AsVOPD3)
4039 Error(Loc, "dst registers must be distinct");
4040 else
4041 Error(Loc, "one dst register must be even and the other odd");
4042 } else {
4043 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4044 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4045 " operands must use different VGPR banks");
4046 }
4047
4048 return false;
4049}
4050
4051// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4052// potentially used as VOPD3 with the same operands.
4053bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4054 // First check if it fits VOPD
4055 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4056 if (!InvalidCompOprIdx.has_value())
4057 return false;
4058
4059 // Then if it fits VOPD3
4060 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4061 if (InvalidCompOprIdx.has_value()) {
4062 // If failed operand is dst it is better to show error about VOPD3
4063 // instruction as it has more capabilities and error message will be
4064 // more informative. If the dst is not legal for VOPD3, then it is not
4065 // legal for VOPD either.
4066 if (*InvalidCompOprIdx == VOPD::Component::DST)
4067 return true;
4068
4069 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4070 // with a conflict in tied implicit src2 of fmac and no asm operand to
4071 // to point to.
4072 return false;
4073 }
4074 return true;
4075}
4076
4077// \returns true is a VOPD3 instruction can be also represented as a shorter
4078// VOPD encoding.
4079bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4080 const unsigned Opcode = Inst.getOpcode();
4081 const auto &II = getVOPDInstInfo(Opcode, &MII);
4082 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4083 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4084 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4085 return false;
4086
4087 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4088 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4089 // be parsed as VOPD which does not accept src2.
4090 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4091 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4092 return false;
4093
4094 // If any modifiers are set this cannot be VOPD.
4095 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4096 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4097 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4098 int I = getNamedOperandIdx(Opcode, OpName);
4099 if (I == -1)
4100 continue;
4101 if (Inst.getOperand(I).getImm())
4102 return false;
4103 }
4104
4105 return !tryVOPD3(Inst);
4106}
4107
4108// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4109// form but switch to VOPD3 otherwise.
4110bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4111 const unsigned Opcode = Inst.getOpcode();
4112 if (!isGFX1250Plus() || !isVOPD(Opcode))
4113 return false;
4114
4115 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4116 return tryVOPD(Inst);
4117 return tryVOPD3(Inst);
4118}
4119
4120bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4121
4122 const unsigned Opc = Inst.getOpcode();
4123 const MCInstrDesc &Desc = MII.get(Opc);
4124
4125 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4126 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4127 assert(ClampIdx != -1);
4128 return Inst.getOperand(ClampIdx).getImm() == 0;
4129 }
4130
4131 return true;
4132}
4133
4136
4137bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4138
4139 const unsigned Opc = Inst.getOpcode();
4140 const MCInstrDesc &Desc = MII.get(Opc);
4141
4142 if ((Desc.TSFlags & MIMGFlags) == 0)
4143 return true;
4144
4145 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4146 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4147 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4148
4149 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4150 return true;
4151
4152 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4153 return true;
4154
4155 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4156 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4157 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4158 if (DMask == 0)
4159 DMask = 1;
4160
4161 bool IsPackedD16 = false;
4162 unsigned DataSize =
4163 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4164 if (hasPackedD16()) {
4165 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4166 IsPackedD16 = D16Idx >= 0;
4167 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4168 DataSize = (DataSize + 1) / 2;
4169 }
4170
4171 if ((VDataSize / 4) == DataSize + TFESize)
4172 return true;
4173
4174 StringRef Modifiers;
4175 if (isGFX90A())
4176 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4177 else
4178 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4179
4180 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4181 return false;
4182}
4183
4184bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4185 const unsigned Opc = Inst.getOpcode();
4186 const MCInstrDesc &Desc = MII.get(Opc);
4187
4188 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4189 return true;
4190
4191 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4192
4193 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4195 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4196 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4197 ? AMDGPU::OpName::srsrc
4198 : AMDGPU::OpName::rsrc;
4199 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4200 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4201 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4202
4203 assert(VAddr0Idx != -1);
4204 assert(SrsrcIdx != -1);
4205 assert(SrsrcIdx > VAddr0Idx);
4206
4207 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4208 if (BaseOpcode->BVH) {
4209 if (IsA16 == BaseOpcode->A16)
4210 return true;
4211 Error(IDLoc, "image address size does not match a16");
4212 return false;
4213 }
4214
4215 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4216 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4217 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4218 unsigned ActualAddrSize =
4219 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4220
4221 unsigned ExpectedAddrSize =
4222 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4223
4224 if (IsNSA) {
4225 if (hasPartialNSAEncoding() &&
4226 ExpectedAddrSize >
4228 int VAddrLastIdx = SrsrcIdx - 1;
4229 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4230
4231 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4232 }
4233 } else {
4234 if (ExpectedAddrSize > 12)
4235 ExpectedAddrSize = 16;
4236
4237 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4238 // This provides backward compatibility for assembly created
4239 // before 160b/192b/224b types were directly supported.
4240 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4241 return true;
4242 }
4243
4244 if (ActualAddrSize == ExpectedAddrSize)
4245 return true;
4246
4247 Error(IDLoc, "image address size does not match dim and a16");
4248 return false;
4249}
4250
4251bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4252
4253 const unsigned Opc = Inst.getOpcode();
4254 const MCInstrDesc &Desc = MII.get(Opc);
4255
4256 if ((Desc.TSFlags & MIMGFlags) == 0)
4257 return true;
4258 if (!Desc.mayLoad() || !Desc.mayStore())
4259 return true; // Not atomic
4260
4261 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4262 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4263
4264 // This is an incomplete check because image_atomic_cmpswap
4265 // may only use 0x3 and 0xf while other atomic operations
4266 // may use 0x1 and 0x3. However these limitations are
4267 // verified when we check that dmask matches dst size.
4268 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4269}
4270
4271bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4272
4273 const unsigned Opc = Inst.getOpcode();
4274 const MCInstrDesc &Desc = MII.get(Opc);
4275
4276 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4277 return true;
4278
4279 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4280 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4281
4282 // GATHER4 instructions use dmask in a different fashion compared to
4283 // other MIMG instructions. The only useful DMASK values are
4284 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4285 // (red,red,red,red) etc.) The ISA document doesn't mention
4286 // this.
4287 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4288}
4289
4290bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4291 const OperandVector &Operands) {
4292 if (!isGFX10Plus())
4293 return true;
4294
4295 const unsigned Opc = Inst.getOpcode();
4296 const MCInstrDesc &Desc = MII.get(Opc);
4297
4298 if ((Desc.TSFlags & MIMGFlags) == 0)
4299 return true;
4300
4301 // image_bvh_intersect_ray instructions do not have dim
4303 return true;
4304
4305 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4306 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4307 if (Op.isDim())
4308 return true;
4309 }
4310 return false;
4311}
4312
4313bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4314 const unsigned Opc = Inst.getOpcode();
4315 const MCInstrDesc &Desc = MII.get(Opc);
4316
4317 if ((Desc.TSFlags & MIMGFlags) == 0)
4318 return true;
4319
4320 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4321 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4323
4324 if (!BaseOpcode->MSAA)
4325 return true;
4326
4327 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4328 assert(DimIdx != -1);
4329
4330 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4331 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4332
4333 return DimInfo->MSAA;
4334}
4335
4336static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4337{
4338 switch (Opcode) {
4339 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4340 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4341 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4342 return true;
4343 default:
4344 return false;
4345 }
4346}
4347
4348// movrels* opcodes should only allow VGPRS as src0.
4349// This is specified in .td description for vop1/vop3,
4350// but sdwa is handled differently. See isSDWAOperand.
4351bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4352 const OperandVector &Operands) {
4353
4354 const unsigned Opc = Inst.getOpcode();
4355 const MCInstrDesc &Desc = MII.get(Opc);
4356
4357 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4358 return true;
4359
4360 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4361 assert(Src0Idx != -1);
4362
4363 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4364 if (Src0.isReg()) {
4365 auto Reg = mc2PseudoReg(Src0.getReg());
4366 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4367 if (!isSGPR(Reg, TRI))
4368 return true;
4369 }
4370
4371 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4372 return false;
4373}
4374
4375bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4376 const OperandVector &Operands) {
4377
4378 const unsigned Opc = Inst.getOpcode();
4379
4380 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4381 return true;
4382
4383 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4384 assert(Src0Idx != -1);
4385
4386 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4387 if (!Src0.isReg())
4388 return true;
4389
4390 auto Reg = mc2PseudoReg(Src0.getReg());
4391 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4392 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4393 Error(getOperandLoc(Operands, Src0Idx),
4394 "source operand must be either a VGPR or an inline constant");
4395 return false;
4396 }
4397
4398 return true;
4399}
4400
4401bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4402 const OperandVector &Operands) {
4403 unsigned Opcode = Inst.getOpcode();
4404 const MCInstrDesc &Desc = MII.get(Opcode);
4405
4406 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4407 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4408 return true;
4409
4410 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4411 if (Src2Idx == -1)
4412 return true;
4413
4414 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4415 Error(getOperandLoc(Operands, Src2Idx),
4416 "inline constants are not allowed for this operand");
4417 return false;
4418 }
4419
4420 return true;
4421}
4422
4423bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4424 const OperandVector &Operands) {
4425 const unsigned Opc = Inst.getOpcode();
4426 const MCInstrDesc &Desc = MII.get(Opc);
4427
4428 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4429 return true;
4430
4431 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4432 if (BlgpIdx != -1) {
4433 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4434 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4435
4436 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4437 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4438
4439 // Validate the correct register size was used for the floating point
4440 // format operands
4441
4442 bool Success = true;
4443 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4444 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4445 Error(getOperandLoc(Operands, Src0Idx),
4446 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4447 Success = false;
4448 }
4449
4450 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4451 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4452 Error(getOperandLoc(Operands, Src1Idx),
4453 "wrong register tuple size for blgp value " + Twine(BLGP));
4454 Success = false;
4455 }
4456
4457 return Success;
4458 }
4459 }
4460
4461 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4462 if (Src2Idx == -1)
4463 return true;
4464
4465 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4466 if (!Src2.isReg())
4467 return true;
4468
4469 MCRegister Src2Reg = Src2.getReg();
4470 MCRegister DstReg = Inst.getOperand(0).getReg();
4471 if (Src2Reg == DstReg)
4472 return true;
4473
4474 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4475 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4476 .getSizeInBits() <= 128)
4477 return true;
4478
4479 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4480 Error(getOperandLoc(Operands, Src2Idx),
4481 "source 2 operand must not partially overlap with dst");
4482 return false;
4483 }
4484
4485 return true;
4486}
4487
4488bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4489 switch (Inst.getOpcode()) {
4490 default:
4491 return true;
4492 case V_DIV_SCALE_F32_gfx6_gfx7:
4493 case V_DIV_SCALE_F32_vi:
4494 case V_DIV_SCALE_F32_gfx10:
4495 case V_DIV_SCALE_F64_gfx6_gfx7:
4496 case V_DIV_SCALE_F64_vi:
4497 case V_DIV_SCALE_F64_gfx10:
4498 break;
4499 }
4500
4501 // TODO: Check that src0 = src1 or src2.
4502
4503 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4504 AMDGPU::OpName::src2_modifiers,
4505 AMDGPU::OpName::src2_modifiers}) {
4506 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4507 .getImm() &
4509 return false;
4510 }
4511 }
4512
4513 return true;
4514}
4515
4516bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4517
4518 const unsigned Opc = Inst.getOpcode();
4519 const MCInstrDesc &Desc = MII.get(Opc);
4520
4521 if ((Desc.TSFlags & MIMGFlags) == 0)
4522 return true;
4523
4524 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4525 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4526 if (isCI() || isSI())
4527 return false;
4528 }
4529
4530 return true;
4531}
4532
4533bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4534 const unsigned Opc = Inst.getOpcode();
4535 const MCInstrDesc &Desc = MII.get(Opc);
4536
4537 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4538 return true;
4539
4540 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4541
4542 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4543}
4544
4545static bool IsRevOpcode(const unsigned Opcode)
4546{
4547 switch (Opcode) {
4548 case AMDGPU::V_SUBREV_F32_e32:
4549 case AMDGPU::V_SUBREV_F32_e64:
4550 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4551 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4552 case AMDGPU::V_SUBREV_F32_e32_vi:
4553 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4554 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4555 case AMDGPU::V_SUBREV_F32_e64_vi:
4556
4557 case AMDGPU::V_SUBREV_CO_U32_e32:
4558 case AMDGPU::V_SUBREV_CO_U32_e64:
4559 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4560 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4561
4562 case AMDGPU::V_SUBBREV_U32_e32:
4563 case AMDGPU::V_SUBBREV_U32_e64:
4564 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4565 case AMDGPU::V_SUBBREV_U32_e32_vi:
4566 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4567 case AMDGPU::V_SUBBREV_U32_e64_vi:
4568
4569 case AMDGPU::V_SUBREV_U32_e32:
4570 case AMDGPU::V_SUBREV_U32_e64:
4571 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4572 case AMDGPU::V_SUBREV_U32_e32_vi:
4573 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4574 case AMDGPU::V_SUBREV_U32_e64_vi:
4575
4576 case AMDGPU::V_SUBREV_F16_e32:
4577 case AMDGPU::V_SUBREV_F16_e64:
4578 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4579 case AMDGPU::V_SUBREV_F16_e32_vi:
4580 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4581 case AMDGPU::V_SUBREV_F16_e64_vi:
4582
4583 case AMDGPU::V_SUBREV_U16_e32:
4584 case AMDGPU::V_SUBREV_U16_e64:
4585 case AMDGPU::V_SUBREV_U16_e32_vi:
4586 case AMDGPU::V_SUBREV_U16_e64_vi:
4587
4588 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4589 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4590 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4591
4592 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4593 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4594
4595 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4596 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4597
4598 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4599 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4600
4601 case AMDGPU::V_LSHRREV_B32_e32:
4602 case AMDGPU::V_LSHRREV_B32_e64:
4603 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4604 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4605 case AMDGPU::V_LSHRREV_B32_e32_vi:
4606 case AMDGPU::V_LSHRREV_B32_e64_vi:
4607 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4608 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4609
4610 case AMDGPU::V_ASHRREV_I32_e32:
4611 case AMDGPU::V_ASHRREV_I32_e64:
4612 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4613 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4614 case AMDGPU::V_ASHRREV_I32_e32_vi:
4615 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4616 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4617 case AMDGPU::V_ASHRREV_I32_e64_vi:
4618
4619 case AMDGPU::V_LSHLREV_B32_e32:
4620 case AMDGPU::V_LSHLREV_B32_e64:
4621 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4622 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4623 case AMDGPU::V_LSHLREV_B32_e32_vi:
4624 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4625 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4626 case AMDGPU::V_LSHLREV_B32_e64_vi:
4627
4628 case AMDGPU::V_LSHLREV_B16_e32:
4629 case AMDGPU::V_LSHLREV_B16_e64:
4630 case AMDGPU::V_LSHLREV_B16_e32_vi:
4631 case AMDGPU::V_LSHLREV_B16_e64_vi:
4632 case AMDGPU::V_LSHLREV_B16_gfx10:
4633
4634 case AMDGPU::V_LSHRREV_B16_e32:
4635 case AMDGPU::V_LSHRREV_B16_e64:
4636 case AMDGPU::V_LSHRREV_B16_e32_vi:
4637 case AMDGPU::V_LSHRREV_B16_e64_vi:
4638 case AMDGPU::V_LSHRREV_B16_gfx10:
4639
4640 case AMDGPU::V_ASHRREV_I16_e32:
4641 case AMDGPU::V_ASHRREV_I16_e64:
4642 case AMDGPU::V_ASHRREV_I16_e32_vi:
4643 case AMDGPU::V_ASHRREV_I16_e64_vi:
4644 case AMDGPU::V_ASHRREV_I16_gfx10:
4645
4646 case AMDGPU::V_LSHLREV_B64_e64:
4647 case AMDGPU::V_LSHLREV_B64_gfx10:
4648 case AMDGPU::V_LSHLREV_B64_vi:
4649
4650 case AMDGPU::V_LSHRREV_B64_e64:
4651 case AMDGPU::V_LSHRREV_B64_gfx10:
4652 case AMDGPU::V_LSHRREV_B64_vi:
4653
4654 case AMDGPU::V_ASHRREV_I64_e64:
4655 case AMDGPU::V_ASHRREV_I64_gfx10:
4656 case AMDGPU::V_ASHRREV_I64_vi:
4657
4658 case AMDGPU::V_PK_LSHLREV_B16:
4659 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4660 case AMDGPU::V_PK_LSHLREV_B16_vi:
4661
4662 case AMDGPU::V_PK_LSHRREV_B16:
4663 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4664 case AMDGPU::V_PK_LSHRREV_B16_vi:
4665 case AMDGPU::V_PK_ASHRREV_I16:
4666 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4667 case AMDGPU::V_PK_ASHRREV_I16_vi:
4668 return true;
4669 default:
4670 return false;
4671 }
4672}
4673
4674bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4675 const OperandVector &Operands) {
4676 using namespace SIInstrFlags;
4677 const unsigned Opcode = Inst.getOpcode();
4678 const MCInstrDesc &Desc = MII.get(Opcode);
4679
4680 // lds_direct register is defined so that it can be used
4681 // with 9-bit operands only. Ignore encodings which do not accept these.
4682 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4683 if ((Desc.TSFlags & Enc) == 0)
4684 return true;
4685
4686 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4687 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4688 if (SrcIdx == -1)
4689 break;
4690 const auto &Src = Inst.getOperand(SrcIdx);
4691 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4692
4693 if (isGFX90A() || isGFX11Plus()) {
4694 Error(getOperandLoc(Operands, SrcIdx),
4695 "lds_direct is not supported on this GPU");
4696 return false;
4697 }
4698
4699 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4700 Error(getOperandLoc(Operands, SrcIdx),
4701 "lds_direct cannot be used with this instruction");
4702 return false;
4703 }
4704
4705 if (SrcName != OpName::src0) {
4706 Error(getOperandLoc(Operands, SrcIdx),
4707 "lds_direct may be used as src0 only");
4708 return false;
4709 }
4710 }
4711 }
4712
4713 return true;
4714}
4715
4716SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4717 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4718 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4719 if (Op.isFlatOffset())
4720 return Op.getStartLoc();
4721 }
4722 return getLoc();
4723}
4724
4725bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4726 const OperandVector &Operands) {
4727 auto Opcode = Inst.getOpcode();
4728 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4729 if (OpNum == -1)
4730 return true;
4731
4732 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4733 if ((TSFlags & SIInstrFlags::FLAT))
4734 return validateFlatOffset(Inst, Operands);
4735
4736 if ((TSFlags & SIInstrFlags::SMRD))
4737 return validateSMEMOffset(Inst, Operands);
4738
4739 const auto &Op = Inst.getOperand(OpNum);
4740 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4741 if (isGFX12Plus() &&
4742 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4743 const unsigned OffsetSize = 24;
4744 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4745 Error(getFlatOffsetLoc(Operands),
4746 Twine("expected a ") + Twine(OffsetSize - 1) +
4747 "-bit unsigned offset for buffer ops");
4748 return false;
4749 }
4750 } else {
4751 const unsigned OffsetSize = 16;
4752 if (!isUIntN(OffsetSize, Op.getImm())) {
4753 Error(getFlatOffsetLoc(Operands),
4754 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4755 return false;
4756 }
4757 }
4758 return true;
4759}
4760
4761bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4762 const OperandVector &Operands) {
4763 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4764 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4765 return true;
4766
4767 auto Opcode = Inst.getOpcode();
4768 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4769 assert(OpNum != -1);
4770
4771 const auto &Op = Inst.getOperand(OpNum);
4772 if (!hasFlatOffsets() && Op.getImm() != 0) {
4773 Error(getFlatOffsetLoc(Operands),
4774 "flat offset modifier is not supported on this GPU");
4775 return false;
4776 }
4777
4778 // For pre-GFX12 FLAT instructions the offset must be positive;
4779 // MSB is ignored and forced to zero.
4780 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4781 bool AllowNegative =
4783 isGFX12Plus();
4784 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4785 Error(getFlatOffsetLoc(Operands),
4786 Twine("expected a ") +
4787 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4788 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4789 return false;
4790 }
4791
4792 return true;
4793}
4794
4795SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4796 // Start with second operand because SMEM Offset cannot be dst or src0.
4797 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4798 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4799 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4800 return Op.getStartLoc();
4801 }
4802 return getLoc();
4803}
4804
4805bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4806 const OperandVector &Operands) {
4807 if (isCI() || isSI())
4808 return true;
4809
4810 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4811 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4812 return true;
4813
4814 auto Opcode = Inst.getOpcode();
4815 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4816 if (OpNum == -1)
4817 return true;
4818
4819 const auto &Op = Inst.getOperand(OpNum);
4820 if (!Op.isImm())
4821 return true;
4822
4823 uint64_t Offset = Op.getImm();
4824 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4827 return true;
4828
4829 Error(getSMEMOffsetLoc(Operands),
4830 isGFX12Plus() && IsBuffer
4831 ? "expected a 23-bit unsigned offset for buffer ops"
4832 : isGFX12Plus() ? "expected a 24-bit signed offset"
4833 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4834 : "expected a 21-bit signed offset");
4835
4836 return false;
4837}
4838
4839bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4840 const OperandVector &Operands) {
4841 unsigned Opcode = Inst.getOpcode();
4842 const MCInstrDesc &Desc = MII.get(Opcode);
4843 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4844 return true;
4845
4846 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4847 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4848
4849 const int OpIndices[] = { Src0Idx, Src1Idx };
4850
4851 unsigned NumExprs = 0;
4852 unsigned NumLiterals = 0;
4853 int64_t LiteralValue;
4854
4855 for (int OpIdx : OpIndices) {
4856 if (OpIdx == -1) break;
4857
4858 const MCOperand &MO = Inst.getOperand(OpIdx);
4859 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4861 bool IsLit = false;
4862 std::optional<int64_t> Imm;
4863 if (MO.isImm()) {
4864 Imm = MO.getImm();
4865 } else if (MO.isExpr()) {
4866 if (isLitExpr(MO.getExpr())) {
4867 IsLit = true;
4868 Imm = getLitValue(MO.getExpr());
4869 }
4870 } else {
4871 continue;
4872 }
4873
4874 if (!Imm.has_value()) {
4875 ++NumExprs;
4876 } else if (!isInlineConstant(Inst, OpIdx)) {
4877 auto OpType = static_cast<AMDGPU::OperandType>(
4878 Desc.operands()[OpIdx].OperandType);
4879 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4880 if (NumLiterals == 0 || LiteralValue != Value) {
4882 ++NumLiterals;
4883 }
4884 }
4885 }
4886 }
4887
4888 if (NumLiterals + NumExprs <= 1)
4889 return true;
4890
4891 Error(getOperandLoc(Operands, Src1Idx),
4892 "only one unique literal operand is allowed");
4893 return false;
4894}
4895
4896bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4897 const unsigned Opc = Inst.getOpcode();
4898 if (isPermlane16(Opc)) {
4899 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4900 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4901
4902 if (OpSel & ~3)
4903 return false;
4904 }
4905
4906 uint64_t TSFlags = MII.get(Opc).TSFlags;
4907
4908 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4909 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4910 if (OpSelIdx != -1) {
4911 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4912 return false;
4913 }
4914 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4915 if (OpSelHiIdx != -1) {
4916 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4917 return false;
4918 }
4919 }
4920
4921 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4922 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4923 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4924 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4925 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4926 if (OpSel & 3)
4927 return false;
4928 }
4929
4930 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4931 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4932 // the first SGPR and use it for both the low and high operations.
4933 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4934 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4935 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4936 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4937 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4938
4939 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4940 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4941 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4942 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4943
4944 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4945
4946 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4947 unsigned Mask = 1U << Index;
4948 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4949 };
4950
4951 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4952 !VerifyOneSGPR(/*Index=*/0))
4953 return false;
4954 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4955 !VerifyOneSGPR(/*Index=*/1))
4956 return false;
4957
4958 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4959 if (Src2Idx != -1) {
4960 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4961 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4962 !VerifyOneSGPR(/*Index=*/2))
4963 return false;
4964 }
4965 }
4966
4967 return true;
4968}
4969
4970bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4971 if (!hasTrue16Insts())
4972 return true;
4973 const MCRegisterInfo *MRI = getMRI();
4974 const unsigned Opc = Inst.getOpcode();
4975 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4976 if (OpSelIdx == -1)
4977 return true;
4978 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4979 // If the value is 0 we could have a default OpSel Operand, so conservatively
4980 // allow it.
4981 if (OpSelOpValue == 0)
4982 return true;
4983 unsigned OpCount = 0;
4984 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4985 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4986 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4987 if (OpIdx == -1)
4988 continue;
4989 const MCOperand &Op = Inst.getOperand(OpIdx);
4990 if (Op.isReg() &&
4991 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4992 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4993 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4994 if (OpSelOpIsHi != VGPRSuffixIsHi)
4995 return false;
4996 }
4997 ++OpCount;
4998 }
4999
5000 return true;
5001}
5002
5003bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5004 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5005
5006 const unsigned Opc = Inst.getOpcode();
5007 uint64_t TSFlags = MII.get(Opc).TSFlags;
5008
5009 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5010 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5011 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5012 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5013 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5014 !(TSFlags & SIInstrFlags::IsSWMMAC))
5015 return true;
5016
5017 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5018 if (NegIdx == -1)
5019 return true;
5020
5021 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5022
5023 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5024 // on some src operands but not allowed on other.
5025 // It is convenient that such instructions don't have src_modifiers operand
5026 // for src operands that don't allow neg because they also don't allow opsel.
5027
5028 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5029 AMDGPU::OpName::src1_modifiers,
5030 AMDGPU::OpName::src2_modifiers};
5031
5032 for (unsigned i = 0; i < 3; ++i) {
5033 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5034 if (Neg & (1 << i))
5035 return false;
5036 }
5037 }
5038
5039 return true;
5040}
5041
5042bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5043 const OperandVector &Operands) {
5044 const unsigned Opc = Inst.getOpcode();
5045 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5046 if (DppCtrlIdx >= 0) {
5047 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5048
5049 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5050 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5051 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5052 // only on GFX12.
5053 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5054 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5055 : "DP ALU dpp only supports row_newbcast");
5056 return false;
5057 }
5058 }
5059
5060 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5061 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5062
5063 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5064 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5065 if (Src1Idx >= 0) {
5066 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5067 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5068 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5069 Error(getOperandLoc(Operands, Src1Idx),
5070 "invalid operand for instruction");
5071 return false;
5072 }
5073 if (Src1.isImm()) {
5074 Error(getInstLoc(Operands),
5075 "src1 immediate operand invalid for instruction");
5076 return false;
5077 }
5078 }
5079 }
5080
5081 return true;
5082}
5083
5084// Check if VCC register matches wavefront size
5085bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5086 return (Reg == AMDGPU::VCC && isWave64()) ||
5087 (Reg == AMDGPU::VCC_LO && isWave32());
5088}
5089
5090// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5091bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5092 const OperandVector &Operands) {
5093 unsigned Opcode = Inst.getOpcode();
5094 const MCInstrDesc &Desc = MII.get(Opcode);
5095 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5096 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5097 !HasMandatoryLiteral && !isVOPD(Opcode))
5098 return true;
5099
5100 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5101
5102 std::optional<unsigned> LiteralOpIdx;
5103 std::optional<uint64_t> LiteralValue;
5104
5105 for (int OpIdx : OpIndices) {
5106 if (OpIdx == -1)
5107 continue;
5108
5109 const MCOperand &MO = Inst.getOperand(OpIdx);
5110 if (!MO.isImm() && !MO.isExpr())
5111 continue;
5112 if (!isSISrcOperand(Desc, OpIdx))
5113 continue;
5114
5115 std::optional<int64_t> Imm;
5116 if (MO.isImm())
5117 Imm = MO.getImm();
5118 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5119 Imm = getLitValue(MO.getExpr());
5120
5121 bool IsAnotherLiteral = false;
5122 if (!Imm.has_value()) {
5123 // Literal value not known, so we conservately assume it's different.
5124 IsAnotherLiteral = true;
5125 } else if (!isInlineConstant(Inst, OpIdx)) {
5126 uint64_t Value = *Imm;
5127 bool IsForcedFP64 =
5128 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5130 HasMandatoryLiteral);
5131 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5132 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5133 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5134
5135 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5136 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5137 Error(getOperandLoc(Operands, OpIdx),
5138 "invalid operand for instruction");
5139 return false;
5140 }
5141
5142 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5143 Value = Hi_32(Value);
5144
5145 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5147 }
5148
5149 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5150 !getFeatureBits()[FeatureVOP3Literal]) {
5151 Error(getOperandLoc(Operands, OpIdx),
5152 "literal operands are not supported");
5153 return false;
5154 }
5155
5156 if (LiteralOpIdx && IsAnotherLiteral) {
5157 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5158 getOperandLoc(Operands, *LiteralOpIdx)),
5159 "only one unique literal operand is allowed");
5160 return false;
5161 }
5162
5163 if (IsAnotherLiteral)
5164 LiteralOpIdx = OpIdx;
5165 }
5166
5167 return true;
5168}
5169
5170// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5171static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5172 const MCRegisterInfo *MRI) {
5173 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5174 if (OpIdx < 0)
5175 return -1;
5176
5177 const MCOperand &Op = Inst.getOperand(OpIdx);
5178 if (!Op.isReg())
5179 return -1;
5180
5181 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5182 auto Reg = Sub ? Sub : Op.getReg();
5183 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5184 return AGPR32.contains(Reg) ? 1 : 0;
5185}
5186
5187bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5188 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5189 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5191 SIInstrFlags::DS)) == 0)
5192 return true;
5193
5194 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5195 ? AMDGPU::OpName::data0
5196 : AMDGPU::OpName::vdata;
5197
5198 const MCRegisterInfo *MRI = getMRI();
5199 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5200 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5201
5202 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5203 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5204 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5205 return false;
5206 }
5207
5208 auto FB = getFeatureBits();
5209 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5210 if (DataAreg < 0 || DstAreg < 0)
5211 return true;
5212 return DstAreg == DataAreg;
5213 }
5214
5215 return DstAreg < 1 && DataAreg < 1;
5216}
5217
5218bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5219 auto FB = getFeatureBits();
5220 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5221 return true;
5222
5223 unsigned Opc = Inst.getOpcode();
5224 const MCRegisterInfo *MRI = getMRI();
5225 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5226 // unaligned VGPR. All others only allow even aligned VGPRs.
5227 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5228 return true;
5229
5230 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5231 switch (Opc) {
5232 default:
5233 break;
5234 case AMDGPU::DS_LOAD_TR6_B96:
5235 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5236 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5237 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5238 return true;
5239 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5240 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5241 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5242 // allows unaligned VGPR for vdst, but other operands still only allow
5243 // even aligned VGPRs.
5244 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5245 if (VAddrIdx != -1) {
5246 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5247 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5248 if ((Sub - AMDGPU::VGPR0) & 1)
5249 return false;
5250 }
5251 return true;
5252 }
5253 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5254 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5255 return true;
5256 }
5257 }
5258
5259 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5260 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5261 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5262 const MCOperand &Op = Inst.getOperand(I);
5263 if (!Op.isReg())
5264 continue;
5265
5266 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5267 if (!Sub)
5268 continue;
5269
5270 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5271 return false;
5272 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5273 return false;
5274 }
5275
5276 return true;
5277}
5278
5279SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5280 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5281 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5282 if (Op.isBLGP())
5283 return Op.getStartLoc();
5284 }
5285 return SMLoc();
5286}
5287
5288bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5289 const OperandVector &Operands) {
5290 unsigned Opc = Inst.getOpcode();
5291 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5292 if (BlgpIdx == -1)
5293 return true;
5294 SMLoc BLGPLoc = getBLGPLoc(Operands);
5295 if (!BLGPLoc.isValid())
5296 return true;
5297 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5298 auto FB = getFeatureBits();
5299 bool UsesNeg = false;
5300 if (FB[AMDGPU::FeatureGFX940Insts]) {
5301 switch (Opc) {
5302 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5303 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5304 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5305 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5306 UsesNeg = true;
5307 }
5308 }
5309
5310 if (IsNeg == UsesNeg)
5311 return true;
5312
5313 Error(BLGPLoc,
5314 UsesNeg ? "invalid modifier: blgp is not supported"
5315 : "invalid modifier: neg is not supported");
5316
5317 return false;
5318}
5319
5320bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5321 const OperandVector &Operands) {
5322 if (!isGFX11Plus())
5323 return true;
5324
5325 unsigned Opc = Inst.getOpcode();
5326 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5327 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5328 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5329 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5330 return true;
5331
5332 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5333 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5334 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5335 if (Reg == AMDGPU::SGPR_NULL)
5336 return true;
5337
5338 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5339 return false;
5340}
5341
5342bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5343 const OperandVector &Operands) {
5344 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5345 if ((TSFlags & SIInstrFlags::DS) == 0)
5346 return true;
5347 if (TSFlags & SIInstrFlags::GWS)
5348 return validateGWS(Inst, Operands);
5349 // Only validate GDS for non-GWS instructions.
5350 if (hasGDS())
5351 return true;
5352 int GDSIdx =
5353 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5354 if (GDSIdx < 0)
5355 return true;
5356 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5357 if (GDS) {
5358 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5359 Error(S, "gds modifier is not supported on this GPU");
5360 return false;
5361 }
5362 return true;
5363}
5364
5365// gfx90a has an undocumented limitation:
5366// DS_GWS opcodes must use even aligned registers.
5367bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5368 const OperandVector &Operands) {
5369 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5370 return true;
5371
5372 int Opc = Inst.getOpcode();
5373 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5374 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5375 return true;
5376
5377 const MCRegisterInfo *MRI = getMRI();
5378 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5379 int Data0Pos =
5380 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5381 assert(Data0Pos != -1);
5382 auto Reg = Inst.getOperand(Data0Pos).getReg();
5383 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5384 if (RegIdx & 1) {
5385 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5386 return false;
5387 }
5388
5389 return true;
5390}
5391
5392bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5393 const OperandVector &Operands,
5394 SMLoc IDLoc) {
5395 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5396 AMDGPU::OpName::cpol);
5397 if (CPolPos == -1)
5398 return true;
5399
5400 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5401
5402 if (!isGFX1250Plus()) {
5403 if (CPol & CPol::SCAL) {
5404 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5405 StringRef CStr(S.getPointer());
5406 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5407 Error(S, "scale_offset is not supported on this GPU");
5408 }
5409 if (CPol & CPol::NV) {
5410 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5411 StringRef CStr(S.getPointer());
5412 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5413 Error(S, "nv is not supported on this GPU");
5414 }
5415 }
5416
5417 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5418 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5419 StringRef CStr(S.getPointer());
5420 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5421 Error(S, "scale_offset is not supported for this instruction");
5422 }
5423
5424 if (isGFX12Plus())
5425 return validateTHAndScopeBits(Inst, Operands, CPol);
5426
5427 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5428 if (TSFlags & SIInstrFlags::SMRD) {
5429 if (CPol && (isSI() || isCI())) {
5430 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5431 Error(S, "cache policy is not supported for SMRD instructions");
5432 return false;
5433 }
5434 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5435 Error(IDLoc, "invalid cache policy for SMEM instruction");
5436 return false;
5437 }
5438 }
5439
5440 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5441 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5444 if (!(TSFlags & AllowSCCModifier)) {
5445 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5446 StringRef CStr(S.getPointer());
5447 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5448 Error(S,
5449 "scc modifier is not supported for this instruction on this GPU");
5450 return false;
5451 }
5452 }
5453
5455 return true;
5456
5457 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5458 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5459 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5460 : "instruction must use glc");
5461 return false;
5462 }
5463 } else {
5464 if (CPol & CPol::GLC) {
5465 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5466 StringRef CStr(S.getPointer());
5468 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5469 Error(S, isGFX940() ? "instruction must not use sc0"
5470 : "instruction must not use glc");
5471 return false;
5472 }
5473 }
5474
5475 return true;
5476}
5477
5478bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5479 const OperandVector &Operands,
5480 const unsigned CPol) {
5481 const unsigned TH = CPol & AMDGPU::CPol::TH;
5482 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5483
5484 const unsigned Opcode = Inst.getOpcode();
5485 const MCInstrDesc &TID = MII.get(Opcode);
5486
5487 auto PrintError = [&](StringRef Msg) {
5488 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5489 Error(S, Msg);
5490 return false;
5491 };
5492
5493 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5495 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5496
5497 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5500 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5501
5502 if (TH == 0)
5503 return true;
5504
5505 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5506 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5507 (TH == AMDGPU::CPol::TH_NT_HT)))
5508 return PrintError("invalid th value for SMEM instruction");
5509
5510 if (TH == AMDGPU::CPol::TH_BYPASS) {
5511 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5513 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5515 return PrintError("scope and th combination is not valid");
5516 }
5517
5518 unsigned THType = AMDGPU::getTemporalHintType(TID);
5519 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5520 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5521 return PrintError("invalid th value for atomic instructions");
5522 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5523 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5524 return PrintError("invalid th value for store instructions");
5525 } else {
5526 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5527 return PrintError("invalid th value for load instructions");
5528 }
5529
5530 return true;
5531}
5532
5533bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5534 const OperandVector &Operands) {
5535 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5536 if (Desc.mayStore() &&
5538 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5539 if (Loc != getInstLoc(Operands)) {
5540 Error(Loc, "TFE modifier has no meaning for store instructions");
5541 return false;
5542 }
5543 }
5544
5545 return true;
5546}
5547
5548bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5549 const OperandVector &Operands) {
5550 unsigned Opc = Inst.getOpcode();
5551 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5552 const MCInstrDesc &Desc = MII.get(Opc);
5553
5554 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5555 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5556 if (FmtIdx == -1)
5557 return true;
5558 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5559 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5560 unsigned RegSize =
5561 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5562 .getSizeInBits();
5563
5565 return true;
5566
5567 Error(getOperandLoc(Operands, SrcIdx),
5568 "wrong register tuple size for " +
5569 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5570 return false;
5571 };
5572
5573 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5574 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5575}
5576
5577bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5578 const OperandVector &Operands) {
5579 if (!validateLdsDirect(Inst, Operands))
5580 return false;
5581 if (!validateTrue16OpSel(Inst)) {
5582 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5583 "op_sel operand conflicts with 16-bit operand suffix");
5584 return false;
5585 }
5586 if (!validateSOPLiteral(Inst, Operands))
5587 return false;
5588 if (!validateVOPLiteral(Inst, Operands)) {
5589 return false;
5590 }
5591 if (!validateConstantBusLimitations(Inst, Operands)) {
5592 return false;
5593 }
5594 if (!validateVOPD(Inst, Operands)) {
5595 return false;
5596 }
5597 if (!validateIntClampSupported(Inst)) {
5598 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5599 "integer clamping is not supported on this GPU");
5600 return false;
5601 }
5602 if (!validateOpSel(Inst)) {
5603 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5604 "invalid op_sel operand");
5605 return false;
5606 }
5607 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5608 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5609 "invalid neg_lo operand");
5610 return false;
5611 }
5612 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5613 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5614 "invalid neg_hi operand");
5615 return false;
5616 }
5617 if (!validateDPP(Inst, Operands)) {
5618 return false;
5619 }
5620 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5621 if (!validateMIMGD16(Inst)) {
5622 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5623 "d16 modifier is not supported on this GPU");
5624 return false;
5625 }
5626 if (!validateMIMGDim(Inst, Operands)) {
5627 Error(IDLoc, "missing dim operand");
5628 return false;
5629 }
5630 if (!validateTensorR128(Inst)) {
5631 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5632 "instruction must set modifier r128=0");
5633 return false;
5634 }
5635 if (!validateMIMGMSAA(Inst)) {
5636 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5637 "invalid dim; must be MSAA type");
5638 return false;
5639 }
5640 if (!validateMIMGDataSize(Inst, IDLoc)) {
5641 return false;
5642 }
5643 if (!validateMIMGAddrSize(Inst, IDLoc))
5644 return false;
5645 if (!validateMIMGAtomicDMask(Inst)) {
5646 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5647 "invalid atomic image dmask");
5648 return false;
5649 }
5650 if (!validateMIMGGatherDMask(Inst)) {
5651 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5652 "invalid image_gather dmask: only one bit must be set");
5653 return false;
5654 }
5655 if (!validateMovrels(Inst, Operands)) {
5656 return false;
5657 }
5658 if (!validateOffset(Inst, Operands)) {
5659 return false;
5660 }
5661 if (!validateMAIAccWrite(Inst, Operands)) {
5662 return false;
5663 }
5664 if (!validateMAISrc2(Inst, Operands)) {
5665 return false;
5666 }
5667 if (!validateMFMA(Inst, Operands)) {
5668 return false;
5669 }
5670 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5671 return false;
5672 }
5673
5674 if (!validateAGPRLdSt(Inst)) {
5675 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5676 ? "invalid register class: data and dst should be all VGPR or AGPR"
5677 : "invalid register class: agpr loads and stores not supported on this GPU"
5678 );
5679 return false;
5680 }
5681 if (!validateVGPRAlign(Inst)) {
5682 Error(IDLoc,
5683 "invalid register class: vgpr tuples must be 64 bit aligned");
5684 return false;
5685 }
5686 if (!validateDS(Inst, Operands)) {
5687 return false;
5688 }
5689
5690 if (!validateBLGP(Inst, Operands)) {
5691 return false;
5692 }
5693
5694 if (!validateDivScale(Inst)) {
5695 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5696 return false;
5697 }
5698 if (!validateWaitCnt(Inst, Operands)) {
5699 return false;
5700 }
5701 if (!validateTFE(Inst, Operands)) {
5702 return false;
5703 }
5704 if (!validateWMMA(Inst, Operands)) {
5705 return false;
5706 }
5707
5708 return true;
5709}
5710
5712 const FeatureBitset &FBS,
5713 unsigned VariantID = 0);
5714
5715static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5716 const FeatureBitset &AvailableFeatures,
5717 unsigned VariantID);
5718
5719bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5720 const FeatureBitset &FBS) {
5721 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5722}
5723
5724bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5725 const FeatureBitset &FBS,
5726 ArrayRef<unsigned> Variants) {
5727 for (auto Variant : Variants) {
5728 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5729 return true;
5730 }
5731
5732 return false;
5733}
5734
5735bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5736 SMLoc IDLoc) {
5737 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5738
5739 // Check if requested instruction variant is supported.
5740 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5741 return false;
5742
5743 // This instruction is not supported.
5744 // Clear any other pending errors because they are no longer relevant.
5745 getParser().clearPendingErrors();
5746
5747 // Requested instruction variant is not supported.
5748 // Check if any other variants are supported.
5749 StringRef VariantName = getMatchedVariantName();
5750 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5751 return Error(IDLoc,
5752 Twine(VariantName,
5753 " variant of this instruction is not supported"));
5754 }
5755
5756 // Check if this instruction may be used with a different wavesize.
5757 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5758 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5759 // FIXME: Use getAvailableFeatures, and do not manually recompute
5760 FeatureBitset FeaturesWS32 = getFeatureBits();
5761 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5762 .flip(AMDGPU::FeatureWavefrontSize32);
5763 FeatureBitset AvailableFeaturesWS32 =
5764 ComputeAvailableFeatures(FeaturesWS32);
5765
5766 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5767 return Error(IDLoc, "instruction requires wavesize=32");
5768 }
5769
5770 // Finally check if this instruction is supported on any other GPU.
5771 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5772 return Error(IDLoc, "instruction not supported on this GPU");
5773 }
5774
5775 // Instruction not supported on any GPU. Probably a typo.
5776 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5777 return Error(IDLoc, "invalid instruction" + Suggestion);
5778}
5779
5780static bool isInvalidVOPDY(const OperandVector &Operands,
5781 uint64_t InvalidOprIdx) {
5782 assert(InvalidOprIdx < Operands.size());
5783 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5784 if (Op.isToken() && InvalidOprIdx > 1) {
5785 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5786 return PrevOp.isToken() && PrevOp.getToken() == "::";
5787 }
5788 return false;
5789}
5790
5791bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5792 OperandVector &Operands,
5793 MCStreamer &Out,
5794 uint64_t &ErrorInfo,
5795 bool MatchingInlineAsm) {
5796 MCInst Inst;
5797 Inst.setLoc(IDLoc);
5798 unsigned Result = Match_Success;
5799 for (auto Variant : getMatchedVariants()) {
5800 uint64_t EI;
5801 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5802 Variant);
5803 // We order match statuses from least to most specific. We use most specific
5804 // status as resulting
5805 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5806 if (R == Match_Success || R == Match_MissingFeature ||
5807 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5808 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5809 Result != Match_MissingFeature)) {
5810 Result = R;
5811 ErrorInfo = EI;
5812 }
5813 if (R == Match_Success)
5814 break;
5815 }
5816
5817 if (Result == Match_Success) {
5818 if (!validateInstruction(Inst, IDLoc, Operands)) {
5819 return true;
5820 }
5821 Out.emitInstruction(Inst, getSTI());
5822 return false;
5823 }
5824
5825 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5826 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5827 return true;
5828 }
5829
5830 switch (Result) {
5831 default: break;
5832 case Match_MissingFeature:
5833 // It has been verified that the specified instruction
5834 // mnemonic is valid. A match was found but it requires
5835 // features which are not supported on this GPU.
5836 return Error(IDLoc, "operands are not valid for this GPU or mode");
5837
5838 case Match_InvalidOperand: {
5839 SMLoc ErrorLoc = IDLoc;
5840 if (ErrorInfo != ~0ULL) {
5841 if (ErrorInfo >= Operands.size()) {
5842 return Error(IDLoc, "too few operands for instruction");
5843 }
5844 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5845 if (ErrorLoc == SMLoc())
5846 ErrorLoc = IDLoc;
5847
5848 if (isInvalidVOPDY(Operands, ErrorInfo))
5849 return Error(ErrorLoc, "invalid VOPDY instruction");
5850 }
5851 return Error(ErrorLoc, "invalid operand for instruction");
5852 }
5853
5854 case Match_MnemonicFail:
5855 llvm_unreachable("Invalid instructions should have been handled already");
5856 }
5857 llvm_unreachable("Implement any new match types added!");
5858}
5859
5860bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5861 int64_t Tmp = -1;
5862 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5863 return true;
5864 }
5865 if (getParser().parseAbsoluteExpression(Tmp)) {
5866 return true;
5867 }
5868 Ret = static_cast<uint32_t>(Tmp);
5869 return false;
5870}
5871
5872bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5873 if (!getSTI().getTargetTriple().isAMDGCN())
5874 return TokError("directive only supported for amdgcn architecture");
5875
5876 std::string TargetIDDirective;
5877 SMLoc TargetStart = getTok().getLoc();
5878 if (getParser().parseEscapedString(TargetIDDirective))
5879 return true;
5880
5881 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5882 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5883 return getParser().Error(TargetRange.Start,
5884 (Twine(".amdgcn_target directive's target id ") +
5885 Twine(TargetIDDirective) +
5886 Twine(" does not match the specified target id ") +
5887 Twine(getTargetStreamer().getTargetID()->toString())).str());
5888
5889 return false;
5890}
5891
5892bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5893 return Error(Range.Start, "value out of range", Range);
5894}
5895
5896bool AMDGPUAsmParser::calculateGPRBlocks(
5897 const FeatureBitset &Features, const MCExpr *VCCUsed,
5898 const MCExpr *FlatScrUsed, bool XNACKUsed,
5899 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5900 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5901 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5902 // TODO(scott.linder): These calculations are duplicated from
5903 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5904 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5905 MCContext &Ctx = getContext();
5906
5907 const MCExpr *NumSGPRs = NextFreeSGPR;
5908 int64_t EvaluatedSGPRs;
5909
5910 if (Version.Major >= 10)
5912 else {
5913 unsigned MaxAddressableNumSGPRs =
5915
5916 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5917 !Features.test(FeatureSGPRInitBug) &&
5918 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5919 return OutOfRangeError(SGPRRange);
5920
5921 const MCExpr *ExtraSGPRs =
5922 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5923 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5924
5925 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5926 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5927 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5928 return OutOfRangeError(SGPRRange);
5929
5930 if (Features.test(FeatureSGPRInitBug))
5931 NumSGPRs =
5933 }
5934
5935 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5936 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5937 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5938 unsigned Granule) -> const MCExpr * {
5939 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5940 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5941 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5942 const MCExpr *AlignToGPR =
5943 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5944 const MCExpr *DivGPR =
5945 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5946 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5947 return SubGPR;
5948 };
5949
5950 VGPRBlocks = GetNumGPRBlocks(
5951 NextFreeVGPR,
5952 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5953 SGPRBlocks =
5954 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5955
5956 return false;
5957}
5958
5959bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5960 if (!getSTI().getTargetTriple().isAMDGCN())
5961 return TokError("directive only supported for amdgcn architecture");
5962
5963 if (!isHsaAbi(getSTI()))
5964 return TokError("directive only supported for amdhsa OS");
5965
5966 StringRef KernelName;
5967 if (getParser().parseIdentifier(KernelName))
5968 return true;
5969
5970 AMDGPU::MCKernelDescriptor KD =
5972 &getSTI(), getContext());
5973
5974 StringSet<> Seen;
5975
5976 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5977
5978 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5979 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5980
5981 SMRange VGPRRange;
5982 const MCExpr *NextFreeVGPR = ZeroExpr;
5983 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5984 const MCExpr *NamedBarCnt = ZeroExpr;
5985 uint64_t SharedVGPRCount = 0;
5986 uint64_t PreloadLength = 0;
5987 uint64_t PreloadOffset = 0;
5988 SMRange SGPRRange;
5989 const MCExpr *NextFreeSGPR = ZeroExpr;
5990
5991 // Count the number of user SGPRs implied from the enabled feature bits.
5992 unsigned ImpliedUserSGPRCount = 0;
5993
5994 // Track if the asm explicitly contains the directive for the user SGPR
5995 // count.
5996 std::optional<unsigned> ExplicitUserSGPRCount;
5997 const MCExpr *ReserveVCC = OneExpr;
5998 const MCExpr *ReserveFlatScr = OneExpr;
5999 std::optional<bool> EnableWavefrontSize32;
6000
6001 while (true) {
6002 while (trySkipToken(AsmToken::EndOfStatement));
6003
6004 StringRef ID;
6005 SMRange IDRange = getTok().getLocRange();
6006 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6007 return true;
6008
6009 if (ID == ".end_amdhsa_kernel")
6010 break;
6011
6012 if (!Seen.insert(ID).second)
6013 return TokError(".amdhsa_ directives cannot be repeated");
6014
6015 SMLoc ValStart = getLoc();
6016 const MCExpr *ExprVal;
6017 if (getParser().parseExpression(ExprVal))
6018 return true;
6019 SMLoc ValEnd = getLoc();
6020 SMRange ValRange = SMRange(ValStart, ValEnd);
6021
6022 int64_t IVal = 0;
6023 uint64_t Val = IVal;
6024 bool EvaluatableExpr;
6025 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6026 if (IVal < 0)
6027 return OutOfRangeError(ValRange);
6028 Val = IVal;
6029 }
6030
6031#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6032 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6033 return OutOfRangeError(RANGE); \
6034 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6035 getContext());
6036
6037// Some fields use the parsed value immediately which requires the expression to
6038// be solvable.
6039#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6040 if (!(RESOLVED)) \
6041 return Error(IDRange.Start, "directive should have resolvable expression", \
6042 IDRange);
6043
6044 if (ID == ".amdhsa_group_segment_fixed_size") {
6046 CHAR_BIT>(Val))
6047 return OutOfRangeError(ValRange);
6048 KD.group_segment_fixed_size = ExprVal;
6049 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6051 CHAR_BIT>(Val))
6052 return OutOfRangeError(ValRange);
6053 KD.private_segment_fixed_size = ExprVal;
6054 } else if (ID == ".amdhsa_kernarg_size") {
6055 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6056 return OutOfRangeError(ValRange);
6057 KD.kernarg_size = ExprVal;
6058 } else if (ID == ".amdhsa_user_sgpr_count") {
6059 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6060 ExplicitUserSGPRCount = Val;
6061 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6062 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6064 return Error(IDRange.Start,
6065 "directive is not supported with architected flat scratch",
6066 IDRange);
6068 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6069 ExprVal, ValRange);
6070 if (Val)
6071 ImpliedUserSGPRCount += 4;
6072 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6073 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6074 if (!hasKernargPreload())
6075 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6076
6077 if (Val > getMaxNumUserSGPRs())
6078 return OutOfRangeError(ValRange);
6079 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6080 ValRange);
6081 if (Val) {
6082 ImpliedUserSGPRCount += Val;
6083 PreloadLength = Val;
6084 }
6085 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6086 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6087 if (!hasKernargPreload())
6088 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6089
6090 if (Val >= 1024)
6091 return OutOfRangeError(ValRange);
6092 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6093 ValRange);
6094 if (Val)
6095 PreloadOffset = Val;
6096 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6097 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6099 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6100 ValRange);
6101 if (Val)
6102 ImpliedUserSGPRCount += 2;
6103 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6104 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6106 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6107 ValRange);
6108 if (Val)
6109 ImpliedUserSGPRCount += 2;
6110 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6111 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6113 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6114 ExprVal, ValRange);
6115 if (Val)
6116 ImpliedUserSGPRCount += 2;
6117 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6118 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6120 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6121 ValRange);
6122 if (Val)
6123 ImpliedUserSGPRCount += 2;
6124 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6126 return Error(IDRange.Start,
6127 "directive is not supported with architected flat scratch",
6128 IDRange);
6129 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6131 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6132 ExprVal, ValRange);
6133 if (Val)
6134 ImpliedUserSGPRCount += 2;
6135 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6136 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6138 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6139 ExprVal, ValRange);
6140 if (Val)
6141 ImpliedUserSGPRCount += 1;
6142 } else if (ID == ".amdhsa_wavefront_size32") {
6143 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6144 if (IVersion.Major < 10)
6145 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6146 EnableWavefrontSize32 = Val;
6148 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6149 ValRange);
6150 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6152 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6153 ValRange);
6154 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6156 return Error(IDRange.Start,
6157 "directive is not supported with architected flat scratch",
6158 IDRange);
6160 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6161 ValRange);
6162 } else if (ID == ".amdhsa_enable_private_segment") {
6164 return Error(
6165 IDRange.Start,
6166 "directive is not supported without architected flat scratch",
6167 IDRange);
6169 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6170 ValRange);
6171 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6173 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6174 ValRange);
6175 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6177 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6178 ValRange);
6179 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6181 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6182 ValRange);
6183 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6185 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6186 ValRange);
6187 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6189 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6190 ValRange);
6191 } else if (ID == ".amdhsa_next_free_vgpr") {
6192 VGPRRange = ValRange;
6193 NextFreeVGPR = ExprVal;
6194 } else if (ID == ".amdhsa_next_free_sgpr") {
6195 SGPRRange = ValRange;
6196 NextFreeSGPR = ExprVal;
6197 } else if (ID == ".amdhsa_accum_offset") {
6198 if (!isGFX90A())
6199 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6200 AccumOffset = ExprVal;
6201 } else if (ID == ".amdhsa_named_barrier_count") {
6202 if (!isGFX1250Plus())
6203 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6204 NamedBarCnt = ExprVal;
6205 } else if (ID == ".amdhsa_reserve_vcc") {
6206 if (EvaluatableExpr && !isUInt<1>(Val))
6207 return OutOfRangeError(ValRange);
6208 ReserveVCC = ExprVal;
6209 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6210 if (IVersion.Major < 7)
6211 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6213 return Error(IDRange.Start,
6214 "directive is not supported with architected flat scratch",
6215 IDRange);
6216 if (EvaluatableExpr && !isUInt<1>(Val))
6217 return OutOfRangeError(ValRange);
6218 ReserveFlatScr = ExprVal;
6219 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6220 if (IVersion.Major < 8)
6221 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6222 if (!isUInt<1>(Val))
6223 return OutOfRangeError(ValRange);
6224 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6225 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6226 IDRange);
6227 } else if (ID == ".amdhsa_float_round_mode_32") {
6229 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6230 ValRange);
6231 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6233 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6234 ValRange);
6235 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6237 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6238 ValRange);
6239 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6241 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6242 ValRange);
6243 } else if (ID == ".amdhsa_dx10_clamp") {
6244 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6245 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6246 IDRange);
6248 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6249 ValRange);
6250 } else if (ID == ".amdhsa_ieee_mode") {
6251 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6252 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6253 IDRange);
6255 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6256 ValRange);
6257 } else if (ID == ".amdhsa_fp16_overflow") {
6258 if (IVersion.Major < 9)
6259 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6261 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6262 ValRange);
6263 } else if (ID == ".amdhsa_tg_split") {
6264 if (!isGFX90A())
6265 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6266 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6267 ExprVal, ValRange);
6268 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6269 if (!supportsWGP(getSTI()))
6270 return Error(IDRange.Start,
6271 "directive unsupported on " + getSTI().getCPU(), IDRange);
6273 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6274 ValRange);
6275 } else if (ID == ".amdhsa_memory_ordered") {
6276 if (IVersion.Major < 10)
6277 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6279 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6280 ValRange);
6281 } else if (ID == ".amdhsa_forward_progress") {
6282 if (IVersion.Major < 10)
6283 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6285 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6286 ValRange);
6287 } else if (ID == ".amdhsa_shared_vgpr_count") {
6288 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6289 if (IVersion.Major < 10 || IVersion.Major >= 12)
6290 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6291 IDRange);
6292 SharedVGPRCount = Val;
6294 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6295 ValRange);
6296 } else if (ID == ".amdhsa_inst_pref_size") {
6297 if (IVersion.Major < 11)
6298 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6299 if (IVersion.Major == 11) {
6301 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6302 ValRange);
6303 } else {
6305 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6306 ValRange);
6307 }
6308 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6311 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6312 ExprVal, ValRange);
6313 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6315 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6316 ExprVal, ValRange);
6317 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6320 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6321 ExprVal, ValRange);
6322 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6324 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6325 ExprVal, ValRange);
6326 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6328 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6329 ExprVal, ValRange);
6330 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6332 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6333 ExprVal, ValRange);
6334 } else if (ID == ".amdhsa_exception_int_div_zero") {
6336 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6337 ExprVal, ValRange);
6338 } else if (ID == ".amdhsa_round_robin_scheduling") {
6339 if (IVersion.Major < 12)
6340 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6342 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6343 ValRange);
6344 } else {
6345 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6346 }
6347
6348#undef PARSE_BITS_ENTRY
6349 }
6350
6351 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6352 return TokError(".amdhsa_next_free_vgpr directive is required");
6353
6354 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6355 return TokError(".amdhsa_next_free_sgpr directive is required");
6356
6357 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6358
6359 // Consider the case where the total number of UserSGPRs with trailing
6360 // allocated preload SGPRs, is greater than the number of explicitly
6361 // referenced SGPRs.
6362 if (PreloadLength) {
6363 MCContext &Ctx = getContext();
6364 NextFreeSGPR = AMDGPUMCExpr::createMax(
6365 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6366 }
6367
6368 const MCExpr *VGPRBlocks;
6369 const MCExpr *SGPRBlocks;
6370 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6371 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6372 EnableWavefrontSize32, NextFreeVGPR,
6373 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6374 SGPRBlocks))
6375 return true;
6376
6377 int64_t EvaluatedVGPRBlocks;
6378 bool VGPRBlocksEvaluatable =
6379 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6380 if (VGPRBlocksEvaluatable &&
6382 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6383 return OutOfRangeError(VGPRRange);
6384 }
6386 KD.compute_pgm_rsrc1, VGPRBlocks,
6387 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6388 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6389
6390 int64_t EvaluatedSGPRBlocks;
6391 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6393 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6394 return OutOfRangeError(SGPRRange);
6396 KD.compute_pgm_rsrc1, SGPRBlocks,
6397 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6398 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6399
6400 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6401 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6402 "enabled user SGPRs");
6403
6404 if (isGFX1250Plus()) {
6406 return TokError("too many user SGPRs enabled");
6409 MCConstantExpr::create(UserSGPRCount, getContext()),
6410 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6411 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6412 } else {
6414 UserSGPRCount))
6415 return TokError("too many user SGPRs enabled");
6418 MCConstantExpr::create(UserSGPRCount, getContext()),
6419 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6420 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6421 }
6422
6423 int64_t IVal = 0;
6424 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6425 return TokError("Kernarg size should be resolvable");
6426 uint64_t kernarg_size = IVal;
6427 if (PreloadLength && kernarg_size &&
6428 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6429 return TokError("Kernarg preload length + offset is larger than the "
6430 "kernarg segment size");
6431
6432 if (isGFX90A()) {
6433 if (!Seen.contains(".amdhsa_accum_offset"))
6434 return TokError(".amdhsa_accum_offset directive is required");
6435 int64_t EvaluatedAccum;
6436 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6437 uint64_t UEvaluatedAccum = EvaluatedAccum;
6438 if (AccumEvaluatable &&
6439 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6440 return TokError("accum_offset should be in range [4..256] in "
6441 "increments of 4");
6442
6443 int64_t EvaluatedNumVGPR;
6444 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6445 AccumEvaluatable &&
6446 UEvaluatedAccum >
6447 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6448 return TokError("accum_offset exceeds total VGPR allocation");
6449 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6451 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6454 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6455 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6456 getContext());
6457 }
6458
6459 if (isGFX1250Plus())
6461 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6462 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6463 getContext());
6464
6465 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6466 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6467 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6468 return TokError("shared_vgpr_count directive not valid on "
6469 "wavefront size 32");
6470 }
6471
6472 if (VGPRBlocksEvaluatable &&
6473 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6474 63)) {
6475 return TokError("shared_vgpr_count*2 + "
6476 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6477 "exceed 63\n");
6478 }
6479 }
6480
6481 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6482 NextFreeVGPR, NextFreeSGPR,
6483 ReserveVCC, ReserveFlatScr);
6484 return false;
6485}
6486
6487bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6488 uint32_t Version;
6489 if (ParseAsAbsoluteExpression(Version))
6490 return true;
6491
6492 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6493 return false;
6494}
6495
6496bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6497 AMDGPUMCKernelCodeT &C) {
6498 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6499 // assembly for backwards compatibility.
6500 if (ID == "max_scratch_backing_memory_byte_size") {
6501 Parser.eatToEndOfStatement();
6502 return false;
6503 }
6504
6505 SmallString<40> ErrStr;
6506 raw_svector_ostream Err(ErrStr);
6507 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6508 return TokError(Err.str());
6509 }
6510 Lex();
6511
6512 if (ID == "enable_wavefront_size32") {
6513 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6514 if (!isGFX10Plus())
6515 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6516 if (!isWave32())
6517 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6518 } else {
6519 if (!isWave64())
6520 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6521 }
6522 }
6523
6524 if (ID == "wavefront_size") {
6525 if (C.wavefront_size == 5) {
6526 if (!isGFX10Plus())
6527 return TokError("wavefront_size=5 is only allowed on GFX10+");
6528 if (!isWave32())
6529 return TokError("wavefront_size=5 requires +WavefrontSize32");
6530 } else if (C.wavefront_size == 6) {
6531 if (!isWave64())
6532 return TokError("wavefront_size=6 requires +WavefrontSize64");
6533 }
6534 }
6535
6536 return false;
6537}
6538
6539bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6540 AMDGPUMCKernelCodeT KernelCode;
6541 KernelCode.initDefault(&getSTI(), getContext());
6542
6543 while (true) {
6544 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6545 // will set the current token to EndOfStatement.
6546 while(trySkipToken(AsmToken::EndOfStatement));
6547
6548 StringRef ID;
6549 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6550 return true;
6551
6552 if (ID == ".end_amd_kernel_code_t")
6553 break;
6554
6555 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6556 return true;
6557 }
6558
6559 KernelCode.validate(&getSTI(), getContext());
6560 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6561
6562 return false;
6563}
6564
6565bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6566 StringRef KernelName;
6567 if (!parseId(KernelName, "expected symbol name"))
6568 return true;
6569
6570 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6572
6573 KernelScope.initialize(getContext());
6574 return false;
6575}
6576
6577bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6578 if (!getSTI().getTargetTriple().isAMDGCN()) {
6579 return Error(getLoc(),
6580 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6581 "architectures");
6582 }
6583
6584 auto TargetIDDirective = getLexer().getTok().getStringContents();
6585 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6586 return Error(getParser().getTok().getLoc(), "target id must match options");
6587
6588 getTargetStreamer().EmitISAVersion();
6589 Lex();
6590
6591 return false;
6592}
6593
6594bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6595 assert(isHsaAbi(getSTI()));
6596
6597 std::string HSAMetadataString;
6598 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6599 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6600 return true;
6601
6602 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6603 return Error(getLoc(), "invalid HSA metadata");
6604
6605 return false;
6606}
6607
6608/// Common code to parse out a block of text (typically YAML) between start and
6609/// end directives.
6610bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6611 const char *AssemblerDirectiveEnd,
6612 std::string &CollectString) {
6613
6614 raw_string_ostream CollectStream(CollectString);
6615
6616 getLexer().setSkipSpace(false);
6617
6618 bool FoundEnd = false;
6619 while (!isToken(AsmToken::Eof)) {
6620 while (isToken(AsmToken::Space)) {
6621 CollectStream << getTokenStr();
6622 Lex();
6623 }
6624
6625 if (trySkipId(AssemblerDirectiveEnd)) {
6626 FoundEnd = true;
6627 break;
6628 }
6629
6630 CollectStream << Parser.parseStringToEndOfStatement()
6631 << getContext().getAsmInfo()->getSeparatorString();
6632
6633 Parser.eatToEndOfStatement();
6634 }
6635
6636 getLexer().setSkipSpace(true);
6637
6638 if (isToken(AsmToken::Eof) && !FoundEnd) {
6639 return TokError(Twine("expected directive ") +
6640 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6641 }
6642
6643 return false;
6644}
6645
6646/// Parse the assembler directive for new MsgPack-format PAL metadata.
6647bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6648 std::string String;
6649 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6651 return true;
6652
6653 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6654 if (!PALMetadata->setFromString(String))
6655 return Error(getLoc(), "invalid PAL metadata");
6656 return false;
6657}
6658
6659/// Parse the assembler directive for old linear-format PAL metadata.
6660bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6661 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6662 return Error(getLoc(),
6663 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6664 "not available on non-amdpal OSes")).str());
6665 }
6666
6667 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6668 PALMetadata->setLegacy();
6669 for (;;) {
6670 uint32_t Key, Value;
6671 if (ParseAsAbsoluteExpression(Key)) {
6672 return TokError(Twine("invalid value in ") +
6674 }
6675 if (!trySkipToken(AsmToken::Comma)) {
6676 return TokError(Twine("expected an even number of values in ") +
6678 }
6679 if (ParseAsAbsoluteExpression(Value)) {
6680 return TokError(Twine("invalid value in ") +
6682 }
6683 PALMetadata->setRegister(Key, Value);
6684 if (!trySkipToken(AsmToken::Comma))
6685 break;
6686 }
6687 return false;
6688}
6689
6690/// ParseDirectiveAMDGPULDS
6691/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6692bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6693 if (getParser().checkForValidSection())
6694 return true;
6695
6696 StringRef Name;
6697 SMLoc NameLoc = getLoc();
6698 if (getParser().parseIdentifier(Name))
6699 return TokError("expected identifier in directive");
6700
6701 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6702 if (getParser().parseComma())
6703 return true;
6704
6705 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6706
6707 int64_t Size;
6708 SMLoc SizeLoc = getLoc();
6709 if (getParser().parseAbsoluteExpression(Size))
6710 return true;
6711 if (Size < 0)
6712 return Error(SizeLoc, "size must be non-negative");
6713 if (Size > LocalMemorySize)
6714 return Error(SizeLoc, "size is too large");
6715
6716 int64_t Alignment = 4;
6717 if (trySkipToken(AsmToken::Comma)) {
6718 SMLoc AlignLoc = getLoc();
6719 if (getParser().parseAbsoluteExpression(Alignment))
6720 return true;
6721 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6722 return Error(AlignLoc, "alignment must be a power of two");
6723
6724 // Alignment larger than the size of LDS is possible in theory, as long
6725 // as the linker manages to place to symbol at address 0, but we do want
6726 // to make sure the alignment fits nicely into a 32-bit integer.
6727 if (Alignment >= 1u << 31)
6728 return Error(AlignLoc, "alignment is too large");
6729 }
6730
6731 if (parseEOL())
6732 return true;
6733
6734 Symbol->redefineIfPossible();
6735 if (!Symbol->isUndefined())
6736 return Error(NameLoc, "invalid symbol redefinition");
6737
6738 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6739 return false;
6740}
6741
6742bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6743 StringRef IDVal = DirectiveID.getString();
6744
6745 if (isHsaAbi(getSTI())) {
6746 if (IDVal == ".amdhsa_kernel")
6747 return ParseDirectiveAMDHSAKernel();
6748
6749 if (IDVal == ".amdhsa_code_object_version")
6750 return ParseDirectiveAMDHSACodeObjectVersion();
6751
6752 // TODO: Restructure/combine with PAL metadata directive.
6754 return ParseDirectiveHSAMetadata();
6755 } else {
6756 if (IDVal == ".amd_kernel_code_t")
6757 return ParseDirectiveAMDKernelCodeT();
6758
6759 if (IDVal == ".amdgpu_hsa_kernel")
6760 return ParseDirectiveAMDGPUHsaKernel();
6761
6762 if (IDVal == ".amd_amdgpu_isa")
6763 return ParseDirectiveISAVersion();
6764
6766 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6767 Twine(" directive is "
6768 "not available on non-amdhsa OSes"))
6769 .str());
6770 }
6771 }
6772
6773 if (IDVal == ".amdgcn_target")
6774 return ParseDirectiveAMDGCNTarget();
6775
6776 if (IDVal == ".amdgpu_lds")
6777 return ParseDirectiveAMDGPULDS();
6778
6779 if (IDVal == PALMD::AssemblerDirectiveBegin)
6780 return ParseDirectivePALMetadataBegin();
6781
6782 if (IDVal == PALMD::AssemblerDirective)
6783 return ParseDirectivePALMetadata();
6784
6785 return true;
6786}
6787
6788bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6789 MCRegister Reg) {
6790 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6791 return isGFX9Plus();
6792
6793 // GFX10+ has 2 more SGPRs 104 and 105.
6794 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6795 return hasSGPR104_SGPR105();
6796
6797 switch (Reg.id()) {
6798 case SRC_SHARED_BASE_LO:
6799 case SRC_SHARED_BASE:
6800 case SRC_SHARED_LIMIT_LO:
6801 case SRC_SHARED_LIMIT:
6802 case SRC_PRIVATE_BASE_LO:
6803 case SRC_PRIVATE_BASE:
6804 case SRC_PRIVATE_LIMIT_LO:
6805 case SRC_PRIVATE_LIMIT:
6806 return isGFX9Plus();
6807 case SRC_FLAT_SCRATCH_BASE_LO:
6808 case SRC_FLAT_SCRATCH_BASE_HI:
6809 return hasGloballyAddressableScratch();
6810 case SRC_POPS_EXITING_WAVE_ID:
6811 return isGFX9Plus() && !isGFX11Plus();
6812 case TBA:
6813 case TBA_LO:
6814 case TBA_HI:
6815 case TMA:
6816 case TMA_LO:
6817 case TMA_HI:
6818 return !isGFX9Plus();
6819 case XNACK_MASK:
6820 case XNACK_MASK_LO:
6821 case XNACK_MASK_HI:
6822 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6823 case SGPR_NULL:
6824 return isGFX10Plus();
6825 case SRC_EXECZ:
6826 case SRC_VCCZ:
6827 return !isGFX11Plus();
6828 default:
6829 break;
6830 }
6831
6832 if (isCI())
6833 return true;
6834
6835 if (isSI() || isGFX10Plus()) {
6836 // No flat_scr on SI.
6837 // On GFX10Plus flat scratch is not a valid register operand and can only be
6838 // accessed with s_setreg/s_getreg.
6839 switch (Reg.id()) {
6840 case FLAT_SCR:
6841 case FLAT_SCR_LO:
6842 case FLAT_SCR_HI:
6843 return false;
6844 default:
6845 return true;
6846 }
6847 }
6848
6849 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6850 // SI/CI have.
6851 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6852 return hasSGPR102_SGPR103();
6853
6854 return true;
6855}
6856
6857ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6858 StringRef Mnemonic,
6859 OperandMode Mode) {
6860 ParseStatus Res = parseVOPD(Operands);
6861 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6862 return Res;
6863
6864 // Try to parse with a custom parser
6865 Res = MatchOperandParserImpl(Operands, Mnemonic);
6866
6867 // If we successfully parsed the operand or if there as an error parsing,
6868 // we are done.
6869 //
6870 // If we are parsing after we reach EndOfStatement then this means we
6871 // are appending default values to the Operands list. This is only done
6872 // by custom parser, so we shouldn't continue on to the generic parsing.
6873 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6874 return Res;
6875
6876 SMLoc RBraceLoc;
6877 SMLoc LBraceLoc = getLoc();
6878 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6879 unsigned Prefix = Operands.size();
6880
6881 for (;;) {
6882 auto Loc = getLoc();
6883 Res = parseReg(Operands);
6884 if (Res.isNoMatch())
6885 Error(Loc, "expected a register");
6886 if (!Res.isSuccess())
6887 return ParseStatus::Failure;
6888
6889 RBraceLoc = getLoc();
6890 if (trySkipToken(AsmToken::RBrac))
6891 break;
6892
6893 if (!skipToken(AsmToken::Comma,
6894 "expected a comma or a closing square bracket"))
6895 return ParseStatus::Failure;
6896 }
6897
6898 if (Operands.size() - Prefix > 1) {
6899 Operands.insert(Operands.begin() + Prefix,
6900 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6901 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6902 }
6903
6904 return ParseStatus::Success;
6905 }
6906
6907 return parseRegOrImm(Operands);
6908}
6909
6910StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6911 // Clear any forced encodings from the previous instruction.
6912 setForcedEncodingSize(0);
6913 setForcedDPP(false);
6914 setForcedSDWA(false);
6915
6916 if (Name.consume_back("_e64_dpp")) {
6917 setForcedDPP(true);
6918 setForcedEncodingSize(64);
6919 return Name;
6920 }
6921 if (Name.consume_back("_e64")) {
6922 setForcedEncodingSize(64);
6923 return Name;
6924 }
6925 if (Name.consume_back("_e32")) {
6926 setForcedEncodingSize(32);
6927 return Name;
6928 }
6929 if (Name.consume_back("_dpp")) {
6930 setForcedDPP(true);
6931 return Name;
6932 }
6933 if (Name.consume_back("_sdwa")) {
6934 setForcedSDWA(true);
6935 return Name;
6936 }
6937 return Name;
6938}
6939
6940static void applyMnemonicAliases(StringRef &Mnemonic,
6941 const FeatureBitset &Features,
6942 unsigned VariantID);
6943
6944bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6945 StringRef Name, SMLoc NameLoc,
6946 OperandVector &Operands) {
6947 // Add the instruction mnemonic
6948 Name = parseMnemonicSuffix(Name);
6949
6950 // If the target architecture uses MnemonicAlias, call it here to parse
6951 // operands correctly.
6952 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6953
6954 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6955
6956 bool IsMIMG = Name.starts_with("image_");
6957
6958 while (!trySkipToken(AsmToken::EndOfStatement)) {
6959 OperandMode Mode = OperandMode_Default;
6960 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6961 Mode = OperandMode_NSA;
6962 ParseStatus Res = parseOperand(Operands, Name, Mode);
6963
6964 if (!Res.isSuccess()) {
6965 checkUnsupportedInstruction(Name, NameLoc);
6966 if (!Parser.hasPendingError()) {
6967 // FIXME: use real operand location rather than the current location.
6968 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6969 : "not a valid operand.";
6970 Error(getLoc(), Msg);
6971 }
6972 while (!trySkipToken(AsmToken::EndOfStatement)) {
6973 lex();
6974 }
6975 return true;
6976 }
6977
6978 // Eat the comma or space if there is one.
6979 trySkipToken(AsmToken::Comma);
6980 }
6981
6982 return false;
6983}
6984
6985//===----------------------------------------------------------------------===//
6986// Utility functions
6987//===----------------------------------------------------------------------===//
6988
6989ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6990 OperandVector &Operands) {
6991 SMLoc S = getLoc();
6992 if (!trySkipId(Name))
6993 return ParseStatus::NoMatch;
6994
6995 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6996 return ParseStatus::Success;
6997}
6998
6999ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7000 int64_t &IntVal) {
7001
7002 if (!trySkipId(Prefix, AsmToken::Colon))
7003 return ParseStatus::NoMatch;
7004
7006}
7007
7008ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7009 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7010 std::function<bool(int64_t &)> ConvertResult) {
7011 SMLoc S = getLoc();
7012 int64_t Value = 0;
7013
7014 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7015 if (!Res.isSuccess())
7016 return Res;
7017
7018 if (ConvertResult && !ConvertResult(Value)) {
7019 Error(S, "invalid " + StringRef(Prefix) + " value.");
7020 }
7021
7022 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7023 return ParseStatus::Success;
7024}
7025
7026ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7027 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7028 bool (*ConvertResult)(int64_t &)) {
7029 SMLoc S = getLoc();
7030 if (!trySkipId(Prefix, AsmToken::Colon))
7031 return ParseStatus::NoMatch;
7032
7033 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7034 return ParseStatus::Failure;
7035
7036 unsigned Val = 0;
7037 const unsigned MaxSize = 4;
7038
7039 // FIXME: How to verify the number of elements matches the number of src
7040 // operands?
7041 for (int I = 0; ; ++I) {
7042 int64_t Op;
7043 SMLoc Loc = getLoc();
7044 if (!parseExpr(Op))
7045 return ParseStatus::Failure;
7046
7047 if (Op != 0 && Op != 1)
7048 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7049
7050 Val |= (Op << I);
7051
7052 if (trySkipToken(AsmToken::RBrac))
7053 break;
7054
7055 if (I + 1 == MaxSize)
7056 return Error(getLoc(), "expected a closing square bracket");
7057
7058 if (!skipToken(AsmToken::Comma, "expected a comma"))
7059 return ParseStatus::Failure;
7060 }
7061
7062 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7063 return ParseStatus::Success;
7064}
7065
7066ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7067 OperandVector &Operands,
7068 AMDGPUOperand::ImmTy ImmTy,
7069 bool IgnoreNegative) {
7070 int64_t Bit;
7071 SMLoc S = getLoc();
7072
7073 if (trySkipId(Name)) {
7074 Bit = 1;
7075 } else if (trySkipId("no", Name)) {
7076 if (IgnoreNegative)
7077 return ParseStatus::Success;
7078 Bit = 0;
7079 } else {
7080 return ParseStatus::NoMatch;
7081 }
7082
7083 if (Name == "r128" && !hasMIMG_R128())
7084 return Error(S, "r128 modifier is not supported on this GPU");
7085 if (Name == "a16" && !hasA16())
7086 return Error(S, "a16 modifier is not supported on this GPU");
7087
7088 if (Bit == 0 && Name == "gds") {
7089 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7090 if (Mnemo.starts_with("ds_gws"))
7091 return Error(S, "nogds is not allowed");
7092 }
7093
7094 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7095 ImmTy = AMDGPUOperand::ImmTyR128A16;
7096
7097 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7098 return ParseStatus::Success;
7099}
7100
7101unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7102 bool &Disabling) const {
7103 Disabling = Id.consume_front("no");
7104
7105 if (isGFX940() && !Mnemo.starts_with("s_")) {
7106 return StringSwitch<unsigned>(Id)
7107 .Case("nt", AMDGPU::CPol::NT)
7108 .Case("sc0", AMDGPU::CPol::SC0)
7109 .Case("sc1", AMDGPU::CPol::SC1)
7110 .Default(0);
7111 }
7112
7113 return StringSwitch<unsigned>(Id)
7114 .Case("dlc", AMDGPU::CPol::DLC)
7115 .Case("glc", AMDGPU::CPol::GLC)
7116 .Case("scc", AMDGPU::CPol::SCC)
7117 .Case("slc", AMDGPU::CPol::SLC)
7118 .Default(0);
7119}
7120
7121ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7122 if (isGFX12Plus()) {
7123 SMLoc StringLoc = getLoc();
7124
7125 int64_t CPolVal = 0;
7126 ParseStatus ResTH = ParseStatus::NoMatch;
7127 ParseStatus ResScope = ParseStatus::NoMatch;
7128 ParseStatus ResNV = ParseStatus::NoMatch;
7129 ParseStatus ResScal = ParseStatus::NoMatch;
7130
7131 for (;;) {
7132 if (ResTH.isNoMatch()) {
7133 int64_t TH;
7134 ResTH = parseTH(Operands, TH);
7135 if (ResTH.isFailure())
7136 return ResTH;
7137 if (ResTH.isSuccess()) {
7138 CPolVal |= TH;
7139 continue;
7140 }
7141 }
7142
7143 if (ResScope.isNoMatch()) {
7144 int64_t Scope;
7145 ResScope = parseScope(Operands, Scope);
7146 if (ResScope.isFailure())
7147 return ResScope;
7148 if (ResScope.isSuccess()) {
7149 CPolVal |= Scope;
7150 continue;
7151 }
7152 }
7153
7154 // NV bit exists on GFX12+, but does something starting from GFX1250.
7155 // Allow parsing on all GFX12 and fail on validation for better
7156 // diagnostics.
7157 if (ResNV.isNoMatch()) {
7158 if (trySkipId("nv")) {
7159 ResNV = ParseStatus::Success;
7160 CPolVal |= CPol::NV;
7161 continue;
7162 } else if (trySkipId("no", "nv")) {
7163 ResNV = ParseStatus::Success;
7164 continue;
7165 }
7166 }
7167
7168 if (ResScal.isNoMatch()) {
7169 if (trySkipId("scale_offset")) {
7170 ResScal = ParseStatus::Success;
7171 CPolVal |= CPol::SCAL;
7172 continue;
7173 } else if (trySkipId("no", "scale_offset")) {
7174 ResScal = ParseStatus::Success;
7175 continue;
7176 }
7177 }
7178
7179 break;
7180 }
7181
7182 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7183 ResScal.isNoMatch())
7184 return ParseStatus::NoMatch;
7185
7186 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7187 AMDGPUOperand::ImmTyCPol));
7188 return ParseStatus::Success;
7189 }
7190
7191 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7192 SMLoc OpLoc = getLoc();
7193 unsigned Enabled = 0, Seen = 0;
7194 for (;;) {
7195 SMLoc S = getLoc();
7196 bool Disabling;
7197 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7198 if (!CPol)
7199 break;
7200
7201 lex();
7202
7203 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7204 return Error(S, "dlc modifier is not supported on this GPU");
7205
7206 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7207 return Error(S, "scc modifier is not supported on this GPU");
7208
7209 if (Seen & CPol)
7210 return Error(S, "duplicate cache policy modifier");
7211
7212 if (!Disabling)
7213 Enabled |= CPol;
7214
7215 Seen |= CPol;
7216 }
7217
7218 if (!Seen)
7219 return ParseStatus::NoMatch;
7220
7221 Operands.push_back(
7222 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7223 return ParseStatus::Success;
7224}
7225
7226ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7227 int64_t &Scope) {
7228 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7230
7231 ParseStatus Res = parseStringOrIntWithPrefix(
7232 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7233 Scope);
7234
7235 if (Res.isSuccess())
7236 Scope = Scopes[Scope];
7237
7238 return Res;
7239}
7240
7241ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7242 TH = AMDGPU::CPol::TH_RT; // default
7243
7244 StringRef Value;
7245 SMLoc StringLoc;
7246 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7247 if (!Res.isSuccess())
7248 return Res;
7249
7250 if (Value == "TH_DEFAULT")
7252 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7253 Value == "TH_LOAD_NT_WB") {
7254 return Error(StringLoc, "invalid th value");
7255 } else if (Value.consume_front("TH_ATOMIC_")) {
7257 } else if (Value.consume_front("TH_LOAD_")) {
7259 } else if (Value.consume_front("TH_STORE_")) {
7261 } else {
7262 return Error(StringLoc, "invalid th value");
7263 }
7264
7265 if (Value == "BYPASS")
7267
7268 if (TH != 0) {
7270 TH |= StringSwitch<int64_t>(Value)
7271 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7272 .Case("RT", AMDGPU::CPol::TH_RT)
7273 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7274 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7275 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7277 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7278 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7280 .Default(0xffffffff);
7281 else
7282 TH |= StringSwitch<int64_t>(Value)
7283 .Case("RT", AMDGPU::CPol::TH_RT)
7284 .Case("NT", AMDGPU::CPol::TH_NT)
7285 .Case("HT", AMDGPU::CPol::TH_HT)
7286 .Case("LU", AMDGPU::CPol::TH_LU)
7287 .Case("WB", AMDGPU::CPol::TH_WB)
7288 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7289 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7290 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7291 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7292 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7293 .Default(0xffffffff);
7294 }
7295
7296 if (TH == 0xffffffff)
7297 return Error(StringLoc, "invalid th value");
7298
7299 return ParseStatus::Success;
7300}
7301
7302static void
7304 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7305 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7306 std::optional<unsigned> InsertAt = std::nullopt) {
7307 auto i = OptionalIdx.find(ImmT);
7308 if (i != OptionalIdx.end()) {
7309 unsigned Idx = i->second;
7310 const AMDGPUOperand &Op =
7311 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7312 if (InsertAt)
7313 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7314 else
7315 Op.addImmOperands(Inst, 1);
7316 } else {
7317 if (InsertAt.has_value())
7318 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7319 else
7321 }
7322}
7323
7324ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7325 StringRef &Value,
7326 SMLoc &StringLoc) {
7327 if (!trySkipId(Prefix, AsmToken::Colon))
7328 return ParseStatus::NoMatch;
7329
7330 StringLoc = getLoc();
7331 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7333}
7334
7335ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7336 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7337 int64_t &IntVal) {
7338 if (!trySkipId(Name, AsmToken::Colon))
7339 return ParseStatus::NoMatch;
7340
7341 SMLoc StringLoc = getLoc();
7342
7343 StringRef Value;
7344 if (isToken(AsmToken::Identifier)) {
7345 Value = getTokenStr();
7346 lex();
7347
7348 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7349 if (Value == Ids[IntVal])
7350 break;
7351 } else if (!parseExpr(IntVal))
7352 return ParseStatus::Failure;
7353
7354 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7355 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7356
7357 return ParseStatus::Success;
7358}
7359
7360ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7361 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7362 AMDGPUOperand::ImmTy Type) {
7363 SMLoc S = getLoc();
7364 int64_t IntVal;
7365
7366 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7367 if (Res.isSuccess())
7368 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7369
7370 return Res;
7371}
7372
7373//===----------------------------------------------------------------------===//
7374// MTBUF format
7375//===----------------------------------------------------------------------===//
7376
7377bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7378 int64_t MaxVal,
7379 int64_t &Fmt) {
7380 int64_t Val;
7381 SMLoc Loc = getLoc();
7382
7383 auto Res = parseIntWithPrefix(Pref, Val);
7384 if (Res.isFailure())
7385 return false;
7386 if (Res.isNoMatch())
7387 return true;
7388
7389 if (Val < 0 || Val > MaxVal) {
7390 Error(Loc, Twine("out of range ", StringRef(Pref)));
7391 return false;
7392 }
7393
7394 Fmt = Val;
7395 return true;
7396}
7397
7398ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7399 AMDGPUOperand::ImmTy ImmTy) {
7400 const char *Pref = "index_key";
7401 int64_t ImmVal = 0;
7402 SMLoc Loc = getLoc();
7403 auto Res = parseIntWithPrefix(Pref, ImmVal);
7404 if (!Res.isSuccess())
7405 return Res;
7406
7407 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7408 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7409 (ImmVal < 0 || ImmVal > 1))
7410 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7411
7412 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7413 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7414
7415 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7416 return ParseStatus::Success;
7417}
7418
7419ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7420 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7421}
7422
7423ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7424 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7425}
7426
7427ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7428 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7429}
7430
7431ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7432 StringRef Name,
7433 AMDGPUOperand::ImmTy Type) {
7434 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7435 Type);
7436}
7437
7438ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7439 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7440 AMDGPUOperand::ImmTyMatrixAFMT);
7441}
7442
7443ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7444 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7445 AMDGPUOperand::ImmTyMatrixBFMT);
7446}
7447
7448ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7449 StringRef Name,
7450 AMDGPUOperand::ImmTy Type) {
7451 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7452 Type);
7453}
7454
7455ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7456 return tryParseMatrixScale(Operands, "matrix_a_scale",
7457 AMDGPUOperand::ImmTyMatrixAScale);
7458}
7459
7460ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7461 return tryParseMatrixScale(Operands, "matrix_b_scale",
7462 AMDGPUOperand::ImmTyMatrixBScale);
7463}
7464
7465ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7466 StringRef Name,
7467 AMDGPUOperand::ImmTy Type) {
7468 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7469 Type);
7470}
7471
7472ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7473 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7474 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7475}
7476
7477ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7478 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7479 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7480}
7481
7482// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7483// values to live in a joint format operand in the MCInst encoding.
7484ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7485 using namespace llvm::AMDGPU::MTBUFFormat;
7486
7487 int64_t Dfmt = DFMT_UNDEF;
7488 int64_t Nfmt = NFMT_UNDEF;
7489
7490 // dfmt and nfmt can appear in either order, and each is optional.
7491 for (int I = 0; I < 2; ++I) {
7492 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7493 return ParseStatus::Failure;
7494
7495 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7496 return ParseStatus::Failure;
7497
7498 // Skip optional comma between dfmt/nfmt
7499 // but guard against 2 commas following each other.
7500 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7501 !peekToken().is(AsmToken::Comma)) {
7502 trySkipToken(AsmToken::Comma);
7503 }
7504 }
7505
7506 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7507 return ParseStatus::NoMatch;
7508
7509 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7510 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7511
7512 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7513 return ParseStatus::Success;
7514}
7515
7516ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7517 using namespace llvm::AMDGPU::MTBUFFormat;
7518
7519 int64_t Fmt = UFMT_UNDEF;
7520
7521 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7522 return ParseStatus::Failure;
7523
7524 if (Fmt == UFMT_UNDEF)
7525 return ParseStatus::NoMatch;
7526
7527 Format = Fmt;
7528 return ParseStatus::Success;
7529}
7530
7531bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7532 int64_t &Nfmt,
7533 StringRef FormatStr,
7534 SMLoc Loc) {
7535 using namespace llvm::AMDGPU::MTBUFFormat;
7536 int64_t Format;
7537
7538 Format = getDfmt(FormatStr);
7539 if (Format != DFMT_UNDEF) {
7540 Dfmt = Format;
7541 return true;
7542 }
7543
7544 Format = getNfmt(FormatStr, getSTI());
7545 if (Format != NFMT_UNDEF) {
7546 Nfmt = Format;
7547 return true;
7548 }
7549
7550 Error(Loc, "unsupported format");
7551 return false;
7552}
7553
7554ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7555 SMLoc FormatLoc,
7556 int64_t &Format) {
7557 using namespace llvm::AMDGPU::MTBUFFormat;
7558
7559 int64_t Dfmt = DFMT_UNDEF;
7560 int64_t Nfmt = NFMT_UNDEF;
7561 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7562 return ParseStatus::Failure;
7563
7564 if (trySkipToken(AsmToken::Comma)) {
7565 StringRef Str;
7566 SMLoc Loc = getLoc();
7567 if (!parseId(Str, "expected a format string") ||
7568 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7569 return ParseStatus::Failure;
7570 if (Dfmt == DFMT_UNDEF)
7571 return Error(Loc, "duplicate numeric format");
7572 if (Nfmt == NFMT_UNDEF)
7573 return Error(Loc, "duplicate data format");
7574 }
7575
7576 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7577 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7578
7579 if (isGFX10Plus()) {
7580 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7581 if (Ufmt == UFMT_UNDEF)
7582 return Error(FormatLoc, "unsupported format");
7583 Format = Ufmt;
7584 } else {
7585 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7586 }
7587
7588 return ParseStatus::Success;
7589}
7590
7591ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7592 SMLoc Loc,
7593 int64_t &Format) {
7594 using namespace llvm::AMDGPU::MTBUFFormat;
7595
7596 auto Id = getUnifiedFormat(FormatStr, getSTI());
7597 if (Id == UFMT_UNDEF)
7598 return ParseStatus::NoMatch;
7599
7600 if (!isGFX10Plus())
7601 return Error(Loc, "unified format is not supported on this GPU");
7602
7603 Format = Id;
7604 return ParseStatus::Success;
7605}
7606
7607ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7608 using namespace llvm::AMDGPU::MTBUFFormat;
7609 SMLoc Loc = getLoc();
7610
7611 if (!parseExpr(Format))
7612 return ParseStatus::Failure;
7613 if (!isValidFormatEncoding(Format, getSTI()))
7614 return Error(Loc, "out of range format");
7615
7616 return ParseStatus::Success;
7617}
7618
7619ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7620 using namespace llvm::AMDGPU::MTBUFFormat;
7621
7622 if (!trySkipId("format", AsmToken::Colon))
7623 return ParseStatus::NoMatch;
7624
7625 if (trySkipToken(AsmToken::LBrac)) {
7626 StringRef FormatStr;
7627 SMLoc Loc = getLoc();
7628 if (!parseId(FormatStr, "expected a format string"))
7629 return ParseStatus::Failure;
7630
7631 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7632 if (Res.isNoMatch())
7633 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7634 if (!Res.isSuccess())
7635 return Res;
7636
7637 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7638 return ParseStatus::Failure;
7639
7640 return ParseStatus::Success;
7641 }
7642
7643 return parseNumericFormat(Format);
7644}
7645
7646ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7647 using namespace llvm::AMDGPU::MTBUFFormat;
7648
7649 int64_t Format = getDefaultFormatEncoding(getSTI());
7650 ParseStatus Res;
7651 SMLoc Loc = getLoc();
7652
7653 // Parse legacy format syntax.
7654 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7655 if (Res.isFailure())
7656 return Res;
7657
7658 bool FormatFound = Res.isSuccess();
7659
7660 Operands.push_back(
7661 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7662
7663 if (FormatFound)
7664 trySkipToken(AsmToken::Comma);
7665
7666 if (isToken(AsmToken::EndOfStatement)) {
7667 // We are expecting an soffset operand,
7668 // but let matcher handle the error.
7669 return ParseStatus::Success;
7670 }
7671
7672 // Parse soffset.
7673 Res = parseRegOrImm(Operands);
7674 if (!Res.isSuccess())
7675 return Res;
7676
7677 trySkipToken(AsmToken::Comma);
7678
7679 if (!FormatFound) {
7680 Res = parseSymbolicOrNumericFormat(Format);
7681 if (Res.isFailure())
7682 return Res;
7683 if (Res.isSuccess()) {
7684 auto Size = Operands.size();
7685 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7686 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7687 Op.setImm(Format);
7688 }
7689 return ParseStatus::Success;
7690 }
7691
7692 if (isId("format") && peekToken().is(AsmToken::Colon))
7693 return Error(getLoc(), "duplicate format");
7694 return ParseStatus::Success;
7695}
7696
7697ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7698 ParseStatus Res =
7699 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7700 if (Res.isNoMatch()) {
7701 Res = parseIntWithPrefix("inst_offset", Operands,
7702 AMDGPUOperand::ImmTyInstOffset);
7703 }
7704 return Res;
7705}
7706
7707ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7708 ParseStatus Res =
7709 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7710 if (Res.isNoMatch())
7711 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7712 return Res;
7713}
7714
7715ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7716 ParseStatus Res =
7717 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7718 if (Res.isNoMatch()) {
7719 Res =
7720 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7721 }
7722 return Res;
7723}
7724
7725//===----------------------------------------------------------------------===//
7726// Exp
7727//===----------------------------------------------------------------------===//
7728
7729void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7730 OptionalImmIndexMap OptionalIdx;
7731
7732 unsigned OperandIdx[4];
7733 unsigned EnMask = 0;
7734 int SrcIdx = 0;
7735
7736 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7737 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7738
7739 // Add the register arguments
7740 if (Op.isReg()) {
7741 assert(SrcIdx < 4);
7742 OperandIdx[SrcIdx] = Inst.size();
7743 Op.addRegOperands(Inst, 1);
7744 ++SrcIdx;
7745 continue;
7746 }
7747
7748 if (Op.isOff()) {
7749 assert(SrcIdx < 4);
7750 OperandIdx[SrcIdx] = Inst.size();
7751 Inst.addOperand(MCOperand::createReg(MCRegister()));
7752 ++SrcIdx;
7753 continue;
7754 }
7755
7756 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7757 Op.addImmOperands(Inst, 1);
7758 continue;
7759 }
7760
7761 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7762 continue;
7763
7764 // Handle optional arguments
7765 OptionalIdx[Op.getImmTy()] = i;
7766 }
7767
7768 assert(SrcIdx == 4);
7769
7770 bool Compr = false;
7771 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7772 Compr = true;
7773 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7774 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7775 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7776 }
7777
7778 for (auto i = 0; i < SrcIdx; ++i) {
7779 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7780 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7781 }
7782 }
7783
7784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7786
7787 Inst.addOperand(MCOperand::createImm(EnMask));
7788}
7789
7790//===----------------------------------------------------------------------===//
7791// s_waitcnt
7792//===----------------------------------------------------------------------===//
7793
7794static bool
7796 const AMDGPU::IsaVersion ISA,
7797 int64_t &IntVal,
7798 int64_t CntVal,
7799 bool Saturate,
7800 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7801 unsigned (*decode)(const IsaVersion &Version, unsigned))
7802{
7803 bool Failed = false;
7804
7805 IntVal = encode(ISA, IntVal, CntVal);
7806 if (CntVal != decode(ISA, IntVal)) {
7807 if (Saturate) {
7808 IntVal = encode(ISA, IntVal, -1);
7809 } else {
7810 Failed = true;
7811 }
7812 }
7813 return Failed;
7814}
7815
7816bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7817
7818 SMLoc CntLoc = getLoc();
7819 StringRef CntName = getTokenStr();
7820
7821 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7822 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7823 return false;
7824
7825 int64_t CntVal;
7826 SMLoc ValLoc = getLoc();
7827 if (!parseExpr(CntVal))
7828 return false;
7829
7830 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7831
7832 bool Failed = true;
7833 bool Sat = CntName.ends_with("_sat");
7834
7835 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7836 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7837 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7838 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7839 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7840 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7841 } else {
7842 Error(CntLoc, "invalid counter name " + CntName);
7843 return false;
7844 }
7845
7846 if (Failed) {
7847 Error(ValLoc, "too large value for " + CntName);
7848 return false;
7849 }
7850
7851 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7852 return false;
7853
7854 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7855 if (isToken(AsmToken::EndOfStatement)) {
7856 Error(getLoc(), "expected a counter name");
7857 return false;
7858 }
7859 }
7860
7861 return true;
7862}
7863
7864ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7865 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7866 int64_t Waitcnt = getWaitcntBitMask(ISA);
7867 SMLoc S = getLoc();
7868
7869 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7870 while (!isToken(AsmToken::EndOfStatement)) {
7871 if (!parseCnt(Waitcnt))
7872 return ParseStatus::Failure;
7873 }
7874 } else {
7875 if (!parseExpr(Waitcnt))
7876 return ParseStatus::Failure;
7877 }
7878
7879 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7880 return ParseStatus::Success;
7881}
7882
7883bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7884 SMLoc FieldLoc = getLoc();
7885 StringRef FieldName = getTokenStr();
7886 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7887 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7888 return false;
7889
7890 SMLoc ValueLoc = getLoc();
7891 StringRef ValueName = getTokenStr();
7892 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7893 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7894 return false;
7895
7896 unsigned Shift;
7897 if (FieldName == "instid0") {
7898 Shift = 0;
7899 } else if (FieldName == "instskip") {
7900 Shift = 4;
7901 } else if (FieldName == "instid1") {
7902 Shift = 7;
7903 } else {
7904 Error(FieldLoc, "invalid field name " + FieldName);
7905 return false;
7906 }
7907
7908 int Value;
7909 if (Shift == 4) {
7910 // Parse values for instskip.
7911 Value = StringSwitch<int>(ValueName)
7912 .Case("SAME", 0)
7913 .Case("NEXT", 1)
7914 .Case("SKIP_1", 2)
7915 .Case("SKIP_2", 3)
7916 .Case("SKIP_3", 4)
7917 .Case("SKIP_4", 5)
7918 .Default(-1);
7919 } else {
7920 // Parse values for instid0 and instid1.
7921 Value = StringSwitch<int>(ValueName)
7922 .Case("NO_DEP", 0)
7923 .Case("VALU_DEP_1", 1)
7924 .Case("VALU_DEP_2", 2)
7925 .Case("VALU_DEP_3", 3)
7926 .Case("VALU_DEP_4", 4)
7927 .Case("TRANS32_DEP_1", 5)
7928 .Case("TRANS32_DEP_2", 6)
7929 .Case("TRANS32_DEP_3", 7)
7930 .Case("FMA_ACCUM_CYCLE_1", 8)
7931 .Case("SALU_CYCLE_1", 9)
7932 .Case("SALU_CYCLE_2", 10)
7933 .Case("SALU_CYCLE_3", 11)
7934 .Default(-1);
7935 }
7936 if (Value < 0) {
7937 Error(ValueLoc, "invalid value name " + ValueName);
7938 return false;
7939 }
7940
7941 Delay |= Value << Shift;
7942 return true;
7943}
7944
7945ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7946 int64_t Delay = 0;
7947 SMLoc S = getLoc();
7948
7949 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7950 do {
7951 if (!parseDelay(Delay))
7952 return ParseStatus::Failure;
7953 } while (trySkipToken(AsmToken::Pipe));
7954 } else {
7955 if (!parseExpr(Delay))
7956 return ParseStatus::Failure;
7957 }
7958
7959 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7960 return ParseStatus::Success;
7961}
7962
7963bool
7964AMDGPUOperand::isSWaitCnt() const {
7965 return isImm();
7966}
7967
7968bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7969
7970//===----------------------------------------------------------------------===//
7971// DepCtr
7972//===----------------------------------------------------------------------===//
7973
7974void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7975 StringRef DepCtrName) {
7976 switch (ErrorId) {
7977 case OPR_ID_UNKNOWN:
7978 Error(Loc, Twine("invalid counter name ", DepCtrName));
7979 return;
7980 case OPR_ID_UNSUPPORTED:
7981 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7982 return;
7983 case OPR_ID_DUPLICATE:
7984 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7985 return;
7986 case OPR_VAL_INVALID:
7987 Error(Loc, Twine("invalid value for ", DepCtrName));
7988 return;
7989 default:
7990 assert(false);
7991 }
7992}
7993
7994bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7995
7996 using namespace llvm::AMDGPU::DepCtr;
7997
7998 SMLoc DepCtrLoc = getLoc();
7999 StringRef DepCtrName = getTokenStr();
8000
8001 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8002 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8003 return false;
8004
8005 int64_t ExprVal;
8006 if (!parseExpr(ExprVal))
8007 return false;
8008
8009 unsigned PrevOprMask = UsedOprMask;
8010 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8011
8012 if (CntVal < 0) {
8013 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8014 return false;
8015 }
8016
8017 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8018 return false;
8019
8020 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8021 if (isToken(AsmToken::EndOfStatement)) {
8022 Error(getLoc(), "expected a counter name");
8023 return false;
8024 }
8025 }
8026
8027 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8028 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8029 return true;
8030}
8031
8032ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8033 using namespace llvm::AMDGPU::DepCtr;
8034
8035 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8036 SMLoc Loc = getLoc();
8037
8038 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8039 unsigned UsedOprMask = 0;
8040 while (!isToken(AsmToken::EndOfStatement)) {
8041 if (!parseDepCtr(DepCtr, UsedOprMask))
8042 return ParseStatus::Failure;
8043 }
8044 } else {
8045 if (!parseExpr(DepCtr))
8046 return ParseStatus::Failure;
8047 }
8048
8049 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8050 return ParseStatus::Success;
8051}
8052
8053bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8054
8055//===----------------------------------------------------------------------===//
8056// hwreg
8057//===----------------------------------------------------------------------===//
8058
8059ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8060 OperandInfoTy &Offset,
8061 OperandInfoTy &Width) {
8062 using namespace llvm::AMDGPU::Hwreg;
8063
8064 if (!trySkipId("hwreg", AsmToken::LParen))
8065 return ParseStatus::NoMatch;
8066
8067 // The register may be specified by name or using a numeric code
8068 HwReg.Loc = getLoc();
8069 if (isToken(AsmToken::Identifier) &&
8070 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8071 HwReg.IsSymbolic = true;
8072 lex(); // skip register name
8073 } else if (!parseExpr(HwReg.Val, "a register name")) {
8074 return ParseStatus::Failure;
8075 }
8076
8077 if (trySkipToken(AsmToken::RParen))
8078 return ParseStatus::Success;
8079
8080 // parse optional params
8081 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8082 return ParseStatus::Failure;
8083
8084 Offset.Loc = getLoc();
8085 if (!parseExpr(Offset.Val))
8086 return ParseStatus::Failure;
8087
8088 if (!skipToken(AsmToken::Comma, "expected a comma"))
8089 return ParseStatus::Failure;
8090
8091 Width.Loc = getLoc();
8092 if (!parseExpr(Width.Val) ||
8093 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8094 return ParseStatus::Failure;
8095
8096 return ParseStatus::Success;
8097}
8098
8099ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8100 using namespace llvm::AMDGPU::Hwreg;
8101
8102 int64_t ImmVal = 0;
8103 SMLoc Loc = getLoc();
8104
8105 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8106 HwregId::Default);
8107 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8108 HwregOffset::Default);
8109 struct : StructuredOpField {
8110 using StructuredOpField::StructuredOpField;
8111 bool validate(AMDGPUAsmParser &Parser) const override {
8112 if (!isUIntN(Width, Val - 1))
8113 return Error(Parser, "only values from 1 to 32 are legal");
8114 return true;
8115 }
8116 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8117 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8118
8119 if (Res.isNoMatch())
8120 Res = parseHwregFunc(HwReg, Offset, Width);
8121
8122 if (Res.isSuccess()) {
8123 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8124 return ParseStatus::Failure;
8125 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8126 }
8127
8128 if (Res.isNoMatch() &&
8129 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8131
8132 if (!Res.isSuccess())
8133 return ParseStatus::Failure;
8134
8135 if (!isUInt<16>(ImmVal))
8136 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8137 Operands.push_back(
8138 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8139 return ParseStatus::Success;
8140}
8141
8142bool AMDGPUOperand::isHwreg() const {
8143 return isImmTy(ImmTyHwreg);
8144}
8145
8146//===----------------------------------------------------------------------===//
8147// sendmsg
8148//===----------------------------------------------------------------------===//
8149
8150bool
8151AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8152 OperandInfoTy &Op,
8153 OperandInfoTy &Stream) {
8154 using namespace llvm::AMDGPU::SendMsg;
8155
8156 Msg.Loc = getLoc();
8157 if (isToken(AsmToken::Identifier) &&
8158 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8159 Msg.IsSymbolic = true;
8160 lex(); // skip message name
8161 } else if (!parseExpr(Msg.Val, "a message name")) {
8162 return false;
8163 }
8164
8165 if (trySkipToken(AsmToken::Comma)) {
8166 Op.IsDefined = true;
8167 Op.Loc = getLoc();
8168 if (isToken(AsmToken::Identifier) &&
8169 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8171 lex(); // skip operation name
8172 } else if (!parseExpr(Op.Val, "an operation name")) {
8173 return false;
8174 }
8175
8176 if (trySkipToken(AsmToken::Comma)) {
8177 Stream.IsDefined = true;
8178 Stream.Loc = getLoc();
8179 if (!parseExpr(Stream.Val))
8180 return false;
8181 }
8182 }
8183
8184 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8185}
8186
8187bool
8188AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8189 const OperandInfoTy &Op,
8190 const OperandInfoTy &Stream) {
8191 using namespace llvm::AMDGPU::SendMsg;
8192
8193 // Validation strictness depends on whether message is specified
8194 // in a symbolic or in a numeric form. In the latter case
8195 // only encoding possibility is checked.
8196 bool Strict = Msg.IsSymbolic;
8197
8198 if (Strict) {
8199 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8200 Error(Msg.Loc, "specified message id is not supported on this GPU");
8201 return false;
8202 }
8203 } else {
8204 if (!isValidMsgId(Msg.Val, getSTI())) {
8205 Error(Msg.Loc, "invalid message id");
8206 return false;
8207 }
8208 }
8209 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8210 if (Op.IsDefined) {
8211 Error(Op.Loc, "message does not support operations");
8212 } else {
8213 Error(Msg.Loc, "missing message operation");
8214 }
8215 return false;
8216 }
8217 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8218 if (Op.Val == OPR_ID_UNSUPPORTED)
8219 Error(Op.Loc, "specified operation id is not supported on this GPU");
8220 else
8221 Error(Op.Loc, "invalid operation id");
8222 return false;
8223 }
8224 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8225 Stream.IsDefined) {
8226 Error(Stream.Loc, "message operation does not support streams");
8227 return false;
8228 }
8229 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8230 Error(Stream.Loc, "invalid message stream id");
8231 return false;
8232 }
8233 return true;
8234}
8235
8236ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8237 using namespace llvm::AMDGPU::SendMsg;
8238
8239 int64_t ImmVal = 0;
8240 SMLoc Loc = getLoc();
8241
8242 if (trySkipId("sendmsg", AsmToken::LParen)) {
8243 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8244 OperandInfoTy Op(OP_NONE_);
8245 OperandInfoTy Stream(STREAM_ID_NONE_);
8246 if (parseSendMsgBody(Msg, Op, Stream) &&
8247 validateSendMsg(Msg, Op, Stream)) {
8248 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8249 } else {
8250 return ParseStatus::Failure;
8251 }
8252 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8253 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8254 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8255 } else {
8256 return ParseStatus::Failure;
8257 }
8258
8259 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8260 return ParseStatus::Success;
8261}
8262
8263bool AMDGPUOperand::isSendMsg() const {
8264 return isImmTy(ImmTySendMsg);
8265}
8266
8267ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8268 using namespace llvm::AMDGPU::WaitEvent;
8269
8270 SMLoc Loc = getLoc();
8271 int64_t ImmVal = 0;
8272
8273 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8274 1, 0);
8275 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8276
8277 StructuredOpField *TargetBitfield =
8278 isGFX11() ? &DontWaitExportReady : &ExportReady;
8279
8280 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8281 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8283 else if (Res.isSuccess()) {
8284 if (!validateStructuredOpFields({TargetBitfield}))
8285 return ParseStatus::Failure;
8286 ImmVal = TargetBitfield->Val;
8287 }
8288
8289 if (!Res.isSuccess())
8290 return ParseStatus::Failure;
8291
8292 if (!isUInt<16>(ImmVal))
8293 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8294
8295 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8296 AMDGPUOperand::ImmTyWaitEvent));
8297 return ParseStatus::Success;
8298}
8299
8300bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8301
8302//===----------------------------------------------------------------------===//
8303// v_interp
8304//===----------------------------------------------------------------------===//
8305
8306ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8307 StringRef Str;
8308 SMLoc S = getLoc();
8309
8310 if (!parseId(Str))
8311 return ParseStatus::NoMatch;
8312
8313 int Slot = StringSwitch<int>(Str)
8314 .Case("p10", 0)
8315 .Case("p20", 1)
8316 .Case("p0", 2)
8317 .Default(-1);
8318
8319 if (Slot == -1)
8320 return Error(S, "invalid interpolation slot");
8321
8322 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8323 AMDGPUOperand::ImmTyInterpSlot));
8324 return ParseStatus::Success;
8325}
8326
8327ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8328 StringRef Str;
8329 SMLoc S = getLoc();
8330
8331 if (!parseId(Str))
8332 return ParseStatus::NoMatch;
8333
8334 if (!Str.starts_with("attr"))
8335 return Error(S, "invalid interpolation attribute");
8336
8337 StringRef Chan = Str.take_back(2);
8338 int AttrChan = StringSwitch<int>(Chan)
8339 .Case(".x", 0)
8340 .Case(".y", 1)
8341 .Case(".z", 2)
8342 .Case(".w", 3)
8343 .Default(-1);
8344 if (AttrChan == -1)
8345 return Error(S, "invalid or missing interpolation attribute channel");
8346
8347 Str = Str.drop_back(2).drop_front(4);
8348
8349 uint8_t Attr;
8350 if (Str.getAsInteger(10, Attr))
8351 return Error(S, "invalid or missing interpolation attribute number");
8352
8353 if (Attr > 32)
8354 return Error(S, "out of bounds interpolation attribute number");
8355
8356 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8357
8358 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8359 AMDGPUOperand::ImmTyInterpAttr));
8360 Operands.push_back(AMDGPUOperand::CreateImm(
8361 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8362 return ParseStatus::Success;
8363}
8364
8365//===----------------------------------------------------------------------===//
8366// exp
8367//===----------------------------------------------------------------------===//
8368
8369ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8370 using namespace llvm::AMDGPU::Exp;
8371
8372 StringRef Str;
8373 SMLoc S = getLoc();
8374
8375 if (!parseId(Str))
8376 return ParseStatus::NoMatch;
8377
8378 unsigned Id = getTgtId(Str);
8379 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8380 return Error(S, (Id == ET_INVALID)
8381 ? "invalid exp target"
8382 : "exp target is not supported on this GPU");
8383
8384 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8385 AMDGPUOperand::ImmTyExpTgt));
8386 return ParseStatus::Success;
8387}
8388
8389//===----------------------------------------------------------------------===//
8390// parser helpers
8391//===----------------------------------------------------------------------===//
8392
8393bool
8394AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8395 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8396}
8397
8398bool
8399AMDGPUAsmParser::isId(const StringRef Id) const {
8400 return isId(getToken(), Id);
8401}
8402
8403bool
8404AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8405 return getTokenKind() == Kind;
8406}
8407
8408StringRef AMDGPUAsmParser::getId() const {
8409 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8410}
8411
8412bool
8413AMDGPUAsmParser::trySkipId(const StringRef Id) {
8414 if (isId(Id)) {
8415 lex();
8416 return true;
8417 }
8418 return false;
8419}
8420
8421bool
8422AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8423 if (isToken(AsmToken::Identifier)) {
8424 StringRef Tok = getTokenStr();
8425 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8426 lex();
8427 return true;
8428 }
8429 }
8430 return false;
8431}
8432
8433bool
8434AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8435 if (isId(Id) && peekToken().is(Kind)) {
8436 lex();
8437 lex();
8438 return true;
8439 }
8440 return false;
8441}
8442
8443bool
8444AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8445 if (isToken(Kind)) {
8446 lex();
8447 return true;
8448 }
8449 return false;
8450}
8451
8452bool
8453AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8454 const StringRef ErrMsg) {
8455 if (!trySkipToken(Kind)) {
8456 Error(getLoc(), ErrMsg);
8457 return false;
8458 }
8459 return true;
8460}
8461
8462bool
8463AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8464 SMLoc S = getLoc();
8465
8466 const MCExpr *Expr;
8467 if (Parser.parseExpression(Expr))
8468 return false;
8469
8470 if (Expr->evaluateAsAbsolute(Imm))
8471 return true;
8472
8473 if (Expected.empty()) {
8474 Error(S, "expected absolute expression");
8475 } else {
8476 Error(S, Twine("expected ", Expected) +
8477 Twine(" or an absolute expression"));
8478 }
8479 return false;
8480}
8481
8482bool
8483AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8484 SMLoc S = getLoc();
8485
8486 const MCExpr *Expr;
8487 if (Parser.parseExpression(Expr))
8488 return false;
8489
8490 int64_t IntVal;
8491 if (Expr->evaluateAsAbsolute(IntVal)) {
8492 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8493 } else {
8494 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8495 }
8496 return true;
8497}
8498
8499bool
8500AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8501 if (isToken(AsmToken::String)) {
8502 Val = getToken().getStringContents();
8503 lex();
8504 return true;
8505 }
8506 Error(getLoc(), ErrMsg);
8507 return false;
8508}
8509
8510bool
8511AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8512 if (isToken(AsmToken::Identifier)) {
8513 Val = getTokenStr();
8514 lex();
8515 return true;
8516 }
8517 if (!ErrMsg.empty())
8518 Error(getLoc(), ErrMsg);
8519 return false;
8520}
8521
8522AsmToken
8523AMDGPUAsmParser::getToken() const {
8524 return Parser.getTok();
8525}
8526
8527AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8528 return isToken(AsmToken::EndOfStatement)
8529 ? getToken()
8530 : getLexer().peekTok(ShouldSkipSpace);
8531}
8532
8533void
8534AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8535 auto TokCount = getLexer().peekTokens(Tokens);
8536
8537 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8538 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8539}
8540
8542AMDGPUAsmParser::getTokenKind() const {
8543 return getLexer().getKind();
8544}
8545
8546SMLoc
8547AMDGPUAsmParser::getLoc() const {
8548 return getToken().getLoc();
8549}
8550
8551StringRef
8552AMDGPUAsmParser::getTokenStr() const {
8553 return getToken().getString();
8554}
8555
8556void
8557AMDGPUAsmParser::lex() {
8558 Parser.Lex();
8559}
8560
8561SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8562 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8563}
8564
8565// Returns one of the given locations that comes later in the source.
8566SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8567 return a.getPointer() < b.getPointer() ? b : a;
8568}
8569
8570SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8571 int MCOpIdx) const {
8572 for (const auto &Op : Operands) {
8573 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8574 if (TargetOp.getMCOpIdx() == MCOpIdx)
8575 return TargetOp.getStartLoc();
8576 }
8577 llvm_unreachable("No such MC operand!");
8578}
8579
8580SMLoc
8581AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8582 const OperandVector &Operands) const {
8583 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8584 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8585 if (Test(Op))
8586 return Op.getStartLoc();
8587 }
8588 return getInstLoc(Operands);
8589}
8590
8591SMLoc
8592AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8593 const OperandVector &Operands) const {
8594 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8595 return getOperandLoc(Test, Operands);
8596}
8597
8598ParseStatus
8599AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8600 if (!trySkipToken(AsmToken::LCurly))
8601 return ParseStatus::NoMatch;
8602
8603 bool First = true;
8604 while (!trySkipToken(AsmToken::RCurly)) {
8605 if (!First &&
8606 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8607 return ParseStatus::Failure;
8608
8609 StringRef Id = getTokenStr();
8610 SMLoc IdLoc = getLoc();
8611 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8612 !skipToken(AsmToken::Colon, "colon expected"))
8613 return ParseStatus::Failure;
8614
8615 const auto *I =
8616 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8617 if (I == Fields.end())
8618 return Error(IdLoc, "unknown field");
8619 if ((*I)->IsDefined)
8620 return Error(IdLoc, "duplicate field");
8621
8622 // TODO: Support symbolic values.
8623 (*I)->Loc = getLoc();
8624 if (!parseExpr((*I)->Val))
8625 return ParseStatus::Failure;
8626 (*I)->IsDefined = true;
8627
8628 First = false;
8629 }
8630 return ParseStatus::Success;
8631}
8632
8633bool AMDGPUAsmParser::validateStructuredOpFields(
8635 return all_of(Fields, [this](const StructuredOpField *F) {
8636 return F->validate(*this);
8637 });
8638}
8639
8640//===----------------------------------------------------------------------===//
8641// swizzle
8642//===----------------------------------------------------------------------===//
8643
8645static unsigned
8646encodeBitmaskPerm(const unsigned AndMask,
8647 const unsigned OrMask,
8648 const unsigned XorMask) {
8649 using namespace llvm::AMDGPU::Swizzle;
8650
8651 return BITMASK_PERM_ENC |
8652 (AndMask << BITMASK_AND_SHIFT) |
8653 (OrMask << BITMASK_OR_SHIFT) |
8654 (XorMask << BITMASK_XOR_SHIFT);
8655}
8656
8657bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8658 const unsigned MaxVal,
8659 const Twine &ErrMsg, SMLoc &Loc) {
8660 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8661 return false;
8662 }
8663 Loc = getLoc();
8664 if (!parseExpr(Op)) {
8665 return false;
8666 }
8667 if (Op < MinVal || Op > MaxVal) {
8668 Error(Loc, ErrMsg);
8669 return false;
8670 }
8671
8672 return true;
8673}
8674
8675bool
8676AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8677 const unsigned MinVal,
8678 const unsigned MaxVal,
8679 const StringRef ErrMsg) {
8680 SMLoc Loc;
8681 for (unsigned i = 0; i < OpNum; ++i) {
8682 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8683 return false;
8684 }
8685
8686 return true;
8687}
8688
8689bool
8690AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8691 using namespace llvm::AMDGPU::Swizzle;
8692
8693 int64_t Lane[LANE_NUM];
8694 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8695 "expected a 2-bit lane id")) {
8697 for (unsigned I = 0; I < LANE_NUM; ++I) {
8698 Imm |= Lane[I] << (LANE_SHIFT * I);
8699 }
8700 return true;
8701 }
8702 return false;
8703}
8704
8705bool
8706AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8707 using namespace llvm::AMDGPU::Swizzle;
8708
8709 SMLoc Loc;
8710 int64_t GroupSize;
8711 int64_t LaneIdx;
8712
8713 if (!parseSwizzleOperand(GroupSize,
8714 2, 32,
8715 "group size must be in the interval [2,32]",
8716 Loc)) {
8717 return false;
8718 }
8719 if (!isPowerOf2_64(GroupSize)) {
8720 Error(Loc, "group size must be a power of two");
8721 return false;
8722 }
8723 if (parseSwizzleOperand(LaneIdx,
8724 0, GroupSize - 1,
8725 "lane id must be in the interval [0,group size - 1]",
8726 Loc)) {
8727 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8728 return true;
8729 }
8730 return false;
8731}
8732
8733bool
8734AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8735 using namespace llvm::AMDGPU::Swizzle;
8736
8737 SMLoc Loc;
8738 int64_t GroupSize;
8739
8740 if (!parseSwizzleOperand(GroupSize,
8741 2, 32,
8742 "group size must be in the interval [2,32]",
8743 Loc)) {
8744 return false;
8745 }
8746 if (!isPowerOf2_64(GroupSize)) {
8747 Error(Loc, "group size must be a power of two");
8748 return false;
8749 }
8750
8751 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8752 return true;
8753}
8754
8755bool
8756AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8757 using namespace llvm::AMDGPU::Swizzle;
8758
8759 SMLoc Loc;
8760 int64_t GroupSize;
8761
8762 if (!parseSwizzleOperand(GroupSize,
8763 1, 16,
8764 "group size must be in the interval [1,16]",
8765 Loc)) {
8766 return false;
8767 }
8768 if (!isPowerOf2_64(GroupSize)) {
8769 Error(Loc, "group size must be a power of two");
8770 return false;
8771 }
8772
8773 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8774 return true;
8775}
8776
8777bool
8778AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8779 using namespace llvm::AMDGPU::Swizzle;
8780
8781 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8782 return false;
8783 }
8784
8785 StringRef Ctl;
8786 SMLoc StrLoc = getLoc();
8787 if (!parseString(Ctl)) {
8788 return false;
8789 }
8790 if (Ctl.size() != BITMASK_WIDTH) {
8791 Error(StrLoc, "expected a 5-character mask");
8792 return false;
8793 }
8794
8795 unsigned AndMask = 0;
8796 unsigned OrMask = 0;
8797 unsigned XorMask = 0;
8798
8799 for (size_t i = 0; i < Ctl.size(); ++i) {
8800 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8801 switch(Ctl[i]) {
8802 default:
8803 Error(StrLoc, "invalid mask");
8804 return false;
8805 case '0':
8806 break;
8807 case '1':
8808 OrMask |= Mask;
8809 break;
8810 case 'p':
8811 AndMask |= Mask;
8812 break;
8813 case 'i':
8814 AndMask |= Mask;
8815 XorMask |= Mask;
8816 break;
8817 }
8818 }
8819
8820 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8821 return true;
8822}
8823
8824bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8825 using namespace llvm::AMDGPU::Swizzle;
8826
8827 if (!AMDGPU::isGFX9Plus(getSTI())) {
8828 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8829 return false;
8830 }
8831
8832 int64_t Swizzle;
8833 SMLoc Loc;
8834 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8835 "FFT swizzle must be in the interval [0," +
8836 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8837 Loc))
8838 return false;
8839
8840 Imm = FFT_MODE_ENC | Swizzle;
8841 return true;
8842}
8843
8844bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8845 using namespace llvm::AMDGPU::Swizzle;
8846
8847 if (!AMDGPU::isGFX9Plus(getSTI())) {
8848 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8849 return false;
8850 }
8851
8852 SMLoc Loc;
8853 int64_t Direction;
8854
8855 if (!parseSwizzleOperand(Direction, 0, 1,
8856 "direction must be 0 (left) or 1 (right)", Loc))
8857 return false;
8858
8859 int64_t RotateSize;
8860 if (!parseSwizzleOperand(
8861 RotateSize, 0, ROTATE_MAX_SIZE,
8862 "number of threads to rotate must be in the interval [0," +
8863 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8864 Loc))
8865 return false;
8866
8868 (RotateSize << ROTATE_SIZE_SHIFT);
8869 return true;
8870}
8871
8872bool
8873AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8874
8875 SMLoc OffsetLoc = getLoc();
8876
8877 if (!parseExpr(Imm, "a swizzle macro")) {
8878 return false;
8879 }
8880 if (!isUInt<16>(Imm)) {
8881 Error(OffsetLoc, "expected a 16-bit offset");
8882 return false;
8883 }
8884 return true;
8885}
8886
8887bool
8888AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8889 using namespace llvm::AMDGPU::Swizzle;
8890
8891 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8892
8893 SMLoc ModeLoc = getLoc();
8894 bool Ok = false;
8895
8896 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8897 Ok = parseSwizzleQuadPerm(Imm);
8898 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8899 Ok = parseSwizzleBitmaskPerm(Imm);
8900 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8901 Ok = parseSwizzleBroadcast(Imm);
8902 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8903 Ok = parseSwizzleSwap(Imm);
8904 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8905 Ok = parseSwizzleReverse(Imm);
8906 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8907 Ok = parseSwizzleFFT(Imm);
8908 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8909 Ok = parseSwizzleRotate(Imm);
8910 } else {
8911 Error(ModeLoc, "expected a swizzle mode");
8912 }
8913
8914 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8915 }
8916
8917 return false;
8918}
8919
8920ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8921 SMLoc S = getLoc();
8922 int64_t Imm = 0;
8923
8924 if (trySkipId("offset")) {
8925
8926 bool Ok = false;
8927 if (skipToken(AsmToken::Colon, "expected a colon")) {
8928 if (trySkipId("swizzle")) {
8929 Ok = parseSwizzleMacro(Imm);
8930 } else {
8931 Ok = parseSwizzleOffset(Imm);
8932 }
8933 }
8934
8935 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8936
8938 }
8939 return ParseStatus::NoMatch;
8940}
8941
8942bool
8943AMDGPUOperand::isSwizzle() const {
8944 return isImmTy(ImmTySwizzle);
8945}
8946
8947//===----------------------------------------------------------------------===//
8948// VGPR Index Mode
8949//===----------------------------------------------------------------------===//
8950
8951int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8952
8953 using namespace llvm::AMDGPU::VGPRIndexMode;
8954
8955 if (trySkipToken(AsmToken::RParen)) {
8956 return OFF;
8957 }
8958
8959 int64_t Imm = 0;
8960
8961 while (true) {
8962 unsigned Mode = 0;
8963 SMLoc S = getLoc();
8964
8965 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8966 if (trySkipId(IdSymbolic[ModeId])) {
8967 Mode = 1 << ModeId;
8968 break;
8969 }
8970 }
8971
8972 if (Mode == 0) {
8973 Error(S, (Imm == 0)?
8974 "expected a VGPR index mode or a closing parenthesis" :
8975 "expected a VGPR index mode");
8976 return UNDEF;
8977 }
8978
8979 if (Imm & Mode) {
8980 Error(S, "duplicate VGPR index mode");
8981 return UNDEF;
8982 }
8983 Imm |= Mode;
8984
8985 if (trySkipToken(AsmToken::RParen))
8986 break;
8987 if (!skipToken(AsmToken::Comma,
8988 "expected a comma or a closing parenthesis"))
8989 return UNDEF;
8990 }
8991
8992 return Imm;
8993}
8994
8995ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8996
8997 using namespace llvm::AMDGPU::VGPRIndexMode;
8998
8999 int64_t Imm = 0;
9000 SMLoc S = getLoc();
9001
9002 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9003 Imm = parseGPRIdxMacro();
9004 if (Imm == UNDEF)
9005 return ParseStatus::Failure;
9006 } else {
9007 if (getParser().parseAbsoluteExpression(Imm))
9008 return ParseStatus::Failure;
9009 if (Imm < 0 || !isUInt<4>(Imm))
9010 return Error(S, "invalid immediate: only 4-bit values are legal");
9011 }
9012
9013 Operands.push_back(
9014 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9015 return ParseStatus::Success;
9016}
9017
9018bool AMDGPUOperand::isGPRIdxMode() const {
9019 return isImmTy(ImmTyGprIdxMode);
9020}
9021
9022//===----------------------------------------------------------------------===//
9023// sopp branch targets
9024//===----------------------------------------------------------------------===//
9025
9026ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9027
9028 // Make sure we are not parsing something
9029 // that looks like a label or an expression but is not.
9030 // This will improve error messages.
9031 if (isRegister() || isModifier())
9032 return ParseStatus::NoMatch;
9033
9034 if (!parseExpr(Operands))
9035 return ParseStatus::Failure;
9036
9037 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9038 assert(Opr.isImm() || Opr.isExpr());
9039 SMLoc Loc = Opr.getStartLoc();
9040
9041 // Currently we do not support arbitrary expressions as branch targets.
9042 // Only labels and absolute expressions are accepted.
9043 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9044 Error(Loc, "expected an absolute expression or a label");
9045 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9046 Error(Loc, "expected a 16-bit signed jump offset");
9047 }
9048
9049 return ParseStatus::Success;
9050}
9051
9052//===----------------------------------------------------------------------===//
9053// Boolean holding registers
9054//===----------------------------------------------------------------------===//
9055
9056ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9057 return parseReg(Operands);
9058}
9059
9060//===----------------------------------------------------------------------===//
9061// mubuf
9062//===----------------------------------------------------------------------===//
9063
9064void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9065 const OperandVector &Operands,
9066 bool IsAtomic) {
9067 OptionalImmIndexMap OptionalIdx;
9068 unsigned FirstOperandIdx = 1;
9069 bool IsAtomicReturn = false;
9070
9071 if (IsAtomic) {
9072 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9074 }
9075
9076 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9077 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9078
9079 // Add the register arguments
9080 if (Op.isReg()) {
9081 Op.addRegOperands(Inst, 1);
9082 // Insert a tied src for atomic return dst.
9083 // This cannot be postponed as subsequent calls to
9084 // addImmOperands rely on correct number of MC operands.
9085 if (IsAtomicReturn && i == FirstOperandIdx)
9086 Op.addRegOperands(Inst, 1);
9087 continue;
9088 }
9089
9090 // Handle the case where soffset is an immediate
9091 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9092 Op.addImmOperands(Inst, 1);
9093 continue;
9094 }
9095
9096 // Handle tokens like 'offen' which are sometimes hard-coded into the
9097 // asm string. There are no MCInst operands for these.
9098 if (Op.isToken()) {
9099 continue;
9100 }
9101 assert(Op.isImm());
9102
9103 // Handle optional arguments
9104 OptionalIdx[Op.getImmTy()] = i;
9105 }
9106
9107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9108 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9109 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9110 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9112}
9113
9114//===----------------------------------------------------------------------===//
9115// smrd
9116//===----------------------------------------------------------------------===//
9117
9118bool AMDGPUOperand::isSMRDOffset8() const {
9119 return isImmLiteral() && isUInt<8>(getImm());
9120}
9121
9122bool AMDGPUOperand::isSMEMOffset() const {
9123 // Offset range is checked later by validator.
9124 return isImmLiteral();
9125}
9126
9127bool AMDGPUOperand::isSMRDLiteralOffset() const {
9128 // 32-bit literals are only supported on CI and we only want to use them
9129 // when the offset is > 8-bits.
9130 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9131}
9132
9133//===----------------------------------------------------------------------===//
9134// vop3
9135//===----------------------------------------------------------------------===//
9136
9137static bool ConvertOmodMul(int64_t &Mul) {
9138 if (Mul != 1 && Mul != 2 && Mul != 4)
9139 return false;
9140
9141 Mul >>= 1;
9142 return true;
9143}
9144
9145static bool ConvertOmodDiv(int64_t &Div) {
9146 if (Div == 1) {
9147 Div = 0;
9148 return true;
9149 }
9150
9151 if (Div == 2) {
9152 Div = 3;
9153 return true;
9154 }
9155
9156 return false;
9157}
9158
9159// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9160// This is intentional and ensures compatibility with sp3.
9161// See bug 35397 for details.
9162bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9163 if (BoundCtrl == 0 || BoundCtrl == 1) {
9164 if (!isGFX11Plus())
9165 BoundCtrl = 1;
9166 return true;
9167 }
9168 return false;
9169}
9170
9171void AMDGPUAsmParser::onBeginOfFile() {
9172 if (!getParser().getStreamer().getTargetStreamer() ||
9173 getSTI().getTargetTriple().getArch() == Triple::r600)
9174 return;
9175
9176 if (!getTargetStreamer().getTargetID())
9177 getTargetStreamer().initializeTargetID(getSTI(),
9178 getSTI().getFeatureString());
9179
9180 if (isHsaAbi(getSTI()))
9181 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9182}
9183
9184/// Parse AMDGPU specific expressions.
9185///
9186/// expr ::= or(expr, ...) |
9187/// max(expr, ...)
9188///
9189bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9190 using AGVK = AMDGPUMCExpr::VariantKind;
9191
9192 if (isToken(AsmToken::Identifier)) {
9193 StringRef TokenId = getTokenStr();
9194 AGVK VK = StringSwitch<AGVK>(TokenId)
9195 .Case("max", AGVK::AGVK_Max)
9196 .Case("or", AGVK::AGVK_Or)
9197 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9198 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9199 .Case("alignto", AGVK::AGVK_AlignTo)
9200 .Case("occupancy", AGVK::AGVK_Occupancy)
9201 .Default(AGVK::AGVK_None);
9202
9203 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9205 uint64_t CommaCount = 0;
9206 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9207 lex(); // Eat '('
9208 while (true) {
9209 if (trySkipToken(AsmToken::RParen)) {
9210 if (Exprs.empty()) {
9211 Error(getToken().getLoc(),
9212 "empty " + Twine(TokenId) + " expression");
9213 return true;
9214 }
9215 if (CommaCount + 1 != Exprs.size()) {
9216 Error(getToken().getLoc(),
9217 "mismatch of commas in " + Twine(TokenId) + " expression");
9218 return true;
9219 }
9220 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9221 return false;
9222 }
9223 const MCExpr *Expr;
9224 if (getParser().parseExpression(Expr, EndLoc))
9225 return true;
9226 Exprs.push_back(Expr);
9227 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9228 if (LastTokenWasComma)
9229 CommaCount++;
9230 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9231 Error(getToken().getLoc(),
9232 "unexpected token in " + Twine(TokenId) + " expression");
9233 return true;
9234 }
9235 }
9236 }
9237 }
9238 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9239}
9240
9241ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9242 StringRef Name = getTokenStr();
9243 if (Name == "mul") {
9244 return parseIntWithPrefix("mul", Operands,
9245 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9246 }
9247
9248 if (Name == "div") {
9249 return parseIntWithPrefix("div", Operands,
9250 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9251 }
9252
9253 return ParseStatus::NoMatch;
9254}
9255
9256// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9257// the number of src operands present, then copies that bit into src0_modifiers.
9258static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9259 int Opc = Inst.getOpcode();
9260 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9261 if (OpSelIdx == -1)
9262 return;
9263
9264 int SrcNum;
9265 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9266 AMDGPU::OpName::src2};
9267 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9268 ++SrcNum)
9269 ;
9270 assert(SrcNum > 0);
9271
9272 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9273
9274 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9275 if (DstIdx == -1)
9276 return;
9277
9278 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9279 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9280 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9281 if (DstOp.isReg() &&
9282 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9283 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9284 ModVal |= SISrcMods::DST_OP_SEL;
9285 } else {
9286 if ((OpSel & (1 << SrcNum)) != 0)
9287 ModVal |= SISrcMods::DST_OP_SEL;
9288 }
9289 Inst.getOperand(ModIdx).setImm(ModVal);
9290}
9291
9292void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9293 const OperandVector &Operands) {
9294 cvtVOP3P(Inst, Operands);
9295 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9296}
9297
9298void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9299 OptionalImmIndexMap &OptionalIdx) {
9300 cvtVOP3P(Inst, Operands, OptionalIdx);
9301 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9302}
9303
9304static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9305 return
9306 // 1. This operand is input modifiers
9307 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9308 // 2. This is not last operand
9309 && Desc.NumOperands > (OpNum + 1)
9310 // 3. Next operand is register class
9311 && Desc.operands()[OpNum + 1].RegClass != -1
9312 // 4. Next register is not tied to any other operand
9313 && Desc.getOperandConstraint(OpNum + 1,
9315}
9316
9317void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9318 unsigned Opc = Inst.getOpcode();
9319 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9320 AMDGPU::OpName::src2};
9321 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9322 AMDGPU::OpName::src1_modifiers,
9323 AMDGPU::OpName::src2_modifiers};
9324 for (int J = 0; J < 3; ++J) {
9325 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9326 if (OpIdx == -1)
9327 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9328 // no src1. So continue instead of break.
9329 continue;
9330
9331 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9332 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9333
9334 if ((OpSel & (1 << J)) != 0)
9335 ModVal |= SISrcMods::OP_SEL_0;
9336 // op_sel[3] is encoded in src0_modifiers.
9337 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9338 ModVal |= SISrcMods::DST_OP_SEL;
9339
9340 Inst.getOperand(ModIdx).setImm(ModVal);
9341 }
9342}
9343
9344void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9345{
9346 OptionalImmIndexMap OptionalIdx;
9347 unsigned Opc = Inst.getOpcode();
9348
9349 unsigned I = 1;
9350 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9351 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9352 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9353 }
9354
9355 for (unsigned E = Operands.size(); I != E; ++I) {
9356 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9358 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9359 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9360 Op.isInterpAttrChan()) {
9361 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9362 } else if (Op.isImmModifier()) {
9363 OptionalIdx[Op.getImmTy()] = I;
9364 } else {
9365 llvm_unreachable("unhandled operand type");
9366 }
9367 }
9368
9369 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9370 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9371 AMDGPUOperand::ImmTyHigh);
9372
9373 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9374 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9375 AMDGPUOperand::ImmTyClamp);
9376
9377 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9378 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9379 AMDGPUOperand::ImmTyOModSI);
9380
9381 // Some v_interp instructions use op_sel[3] for dst.
9382 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9383 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9384 AMDGPUOperand::ImmTyOpSel);
9385 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9386 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9387
9388 cvtOpSelHelper(Inst, OpSel);
9389 }
9390}
9391
9392void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9393{
9394 OptionalImmIndexMap OptionalIdx;
9395 unsigned Opc = Inst.getOpcode();
9396
9397 unsigned I = 1;
9398 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9399 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9400 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9401 }
9402
9403 for (unsigned E = Operands.size(); I != E; ++I) {
9404 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9406 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9407 } else if (Op.isImmModifier()) {
9408 OptionalIdx[Op.getImmTy()] = I;
9409 } else {
9410 llvm_unreachable("unhandled operand type");
9411 }
9412 }
9413
9414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9415
9416 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9417 if (OpSelIdx != -1)
9418 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9419
9420 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9421
9422 if (OpSelIdx == -1)
9423 return;
9424
9425 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9426 cvtOpSelHelper(Inst, OpSel);
9427}
9428
9429void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9430 const OperandVector &Operands) {
9431 OptionalImmIndexMap OptionalIdx;
9432 unsigned Opc = Inst.getOpcode();
9433 unsigned I = 1;
9434 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9435
9436 const MCInstrDesc &Desc = MII.get(Opc);
9437
9438 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9439 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9440
9441 for (unsigned E = Operands.size(); I != E; ++I) {
9442 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9443 int NumOperands = Inst.getNumOperands();
9444 // The order of operands in MCInst and parsed operands are different.
9445 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9446 // indices for parsing scale values correctly.
9447 if (NumOperands == CbszOpIdx) {
9450 }
9451 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9452 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9453 } else if (Op.isImmModifier()) {
9454 OptionalIdx[Op.getImmTy()] = I;
9455 } else {
9456 Op.addRegOrImmOperands(Inst, 1);
9457 }
9458 }
9459
9460 // Insert CBSZ and BLGP operands for F8F6F4 variants
9461 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9462 if (CbszIdx != OptionalIdx.end()) {
9463 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9464 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9465 }
9466
9467 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9468 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9469 if (BlgpIdx != OptionalIdx.end()) {
9470 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9471 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9472 }
9473
9474 // Add dummy src_modifiers
9477
9478 // Handle op_sel fields
9479
9480 unsigned OpSel = 0;
9481 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9482 if (OpselIdx != OptionalIdx.end()) {
9483 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9484 .getImm();
9485 }
9486
9487 unsigned OpSelHi = 0;
9488 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9489 if (OpselHiIdx != OptionalIdx.end()) {
9490 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9491 .getImm();
9492 }
9493 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9494 AMDGPU::OpName::src1_modifiers};
9495
9496 for (unsigned J = 0; J < 2; ++J) {
9497 unsigned ModVal = 0;
9498 if (OpSel & (1 << J))
9499 ModVal |= SISrcMods::OP_SEL_0;
9500 if (OpSelHi & (1 << J))
9501 ModVal |= SISrcMods::OP_SEL_1;
9502
9503 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9504 Inst.getOperand(ModIdx).setImm(ModVal);
9505 }
9506}
9507
9508void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9509 OptionalImmIndexMap &OptionalIdx) {
9510 unsigned Opc = Inst.getOpcode();
9511
9512 unsigned I = 1;
9513 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9514 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9515 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9516 }
9517
9518 for (unsigned E = Operands.size(); I != E; ++I) {
9519 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9521 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9522 } else if (Op.isImmModifier()) {
9523 OptionalIdx[Op.getImmTy()] = I;
9524 } else {
9525 Op.addRegOrImmOperands(Inst, 1);
9526 }
9527 }
9528
9529 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9530 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9531 AMDGPUOperand::ImmTyScaleSel);
9532
9533 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9534 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9535 AMDGPUOperand::ImmTyClamp);
9536
9537 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9538 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9539 Inst.addOperand(Inst.getOperand(0));
9540 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9541 AMDGPUOperand::ImmTyByteSel);
9542 }
9543
9544 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9545 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9546 AMDGPUOperand::ImmTyOModSI);
9547
9548 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9549 // it has src2 register operand that is tied to dst operand
9550 // we don't allow modifiers for this operand in assembler so src2_modifiers
9551 // should be 0.
9552 if (isMAC(Opc)) {
9553 auto *it = Inst.begin();
9554 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9555 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9556 ++it;
9557 // Copy the operand to ensure it's not invalidated when Inst grows.
9558 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9559 }
9560}
9561
9562void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9563 OptionalImmIndexMap OptionalIdx;
9564 cvtVOP3(Inst, Operands, OptionalIdx);
9565}
9566
9567void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9568 OptionalImmIndexMap &OptIdx) {
9569 const int Opc = Inst.getOpcode();
9570 const MCInstrDesc &Desc = MII.get(Opc);
9571
9572 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9573
9574 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9575 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9576 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9577 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9578 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9579 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9580 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9581 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9582 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9583 Inst.addOperand(Inst.getOperand(0));
9584 }
9585
9586 // Adding vdst_in operand is already covered for these DPP instructions in
9587 // cvtVOP3DPP.
9588 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9589 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx11 ||
9590 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx11 ||
9591 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx11 ||
9592 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx11 ||
9593 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx11 ||
9594 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx11 ||
9595 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx11 ||
9596 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx11 ||
9597 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx11 ||
9598 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx11 ||
9599 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx11 ||
9600 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx11 ||
9601 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9602 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9603 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9604 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9605 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9606 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9607 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9608 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9609 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9610 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9611 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9612 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9613 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9614 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9615 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9616 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9617 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9618 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9619 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9620 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9621 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9622 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9623 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9624 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9625 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9626 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9627 Inst.addOperand(Inst.getOperand(0));
9628 }
9629
9630 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9631 if (BitOp3Idx != -1) {
9632 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9633 }
9634
9635 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9636 // instruction, and then figure out where to actually put the modifiers
9637
9638 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9639 if (OpSelIdx != -1) {
9640 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9641 }
9642
9643 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9644 if (OpSelHiIdx != -1) {
9645 int DefaultVal = IsPacked ? -1 : 0;
9646 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9647 DefaultVal);
9648 }
9649
9650 int MatrixAFMTIdx =
9651 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9652 if (MatrixAFMTIdx != -1) {
9653 addOptionalImmOperand(Inst, Operands, OptIdx,
9654 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9655 }
9656
9657 int MatrixBFMTIdx =
9658 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9659 if (MatrixBFMTIdx != -1) {
9660 addOptionalImmOperand(Inst, Operands, OptIdx,
9661 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9662 }
9663
9664 int MatrixAScaleIdx =
9665 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9666 if (MatrixAScaleIdx != -1) {
9667 addOptionalImmOperand(Inst, Operands, OptIdx,
9668 AMDGPUOperand::ImmTyMatrixAScale, 0);
9669 }
9670
9671 int MatrixBScaleIdx =
9672 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9673 if (MatrixBScaleIdx != -1) {
9674 addOptionalImmOperand(Inst, Operands, OptIdx,
9675 AMDGPUOperand::ImmTyMatrixBScale, 0);
9676 }
9677
9678 int MatrixAScaleFmtIdx =
9679 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9680 if (MatrixAScaleFmtIdx != -1) {
9681 addOptionalImmOperand(Inst, Operands, OptIdx,
9682 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9683 }
9684
9685 int MatrixBScaleFmtIdx =
9686 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9687 if (MatrixBScaleFmtIdx != -1) {
9688 addOptionalImmOperand(Inst, Operands, OptIdx,
9689 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9690 }
9691
9692 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9693 addOptionalImmOperand(Inst, Operands, OptIdx,
9694 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9695
9696 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9697 addOptionalImmOperand(Inst, Operands, OptIdx,
9698 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9699
9700 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9701 if (NegLoIdx != -1)
9702 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9703
9704 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9705 if (NegHiIdx != -1)
9706 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9707
9708 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9709 AMDGPU::OpName::src2};
9710 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9711 AMDGPU::OpName::src1_modifiers,
9712 AMDGPU::OpName::src2_modifiers};
9713
9714 unsigned OpSel = 0;
9715 unsigned OpSelHi = 0;
9716 unsigned NegLo = 0;
9717 unsigned NegHi = 0;
9718
9719 if (OpSelIdx != -1)
9720 OpSel = Inst.getOperand(OpSelIdx).getImm();
9721
9722 if (OpSelHiIdx != -1)
9723 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9724
9725 if (NegLoIdx != -1)
9726 NegLo = Inst.getOperand(NegLoIdx).getImm();
9727
9728 if (NegHiIdx != -1)
9729 NegHi = Inst.getOperand(NegHiIdx).getImm();
9730
9731 for (int J = 0; J < 3; ++J) {
9732 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9733 if (OpIdx == -1)
9734 break;
9735
9736 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9737
9738 if (ModIdx == -1)
9739 continue;
9740
9741 uint32_t ModVal = 0;
9742
9743 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9744 if (SrcOp.isReg() && getMRI()
9745 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9746 .contains(SrcOp.getReg())) {
9747 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9748 if (VGPRSuffixIsHi)
9749 ModVal |= SISrcMods::OP_SEL_0;
9750 } else {
9751 if ((OpSel & (1 << J)) != 0)
9752 ModVal |= SISrcMods::OP_SEL_0;
9753 }
9754
9755 if ((OpSelHi & (1 << J)) != 0)
9756 ModVal |= SISrcMods::OP_SEL_1;
9757
9758 if ((NegLo & (1 << J)) != 0)
9759 ModVal |= SISrcMods::NEG;
9760
9761 if ((NegHi & (1 << J)) != 0)
9762 ModVal |= SISrcMods::NEG_HI;
9763
9764 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9765 }
9766}
9767
9768void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9769 OptionalImmIndexMap OptIdx;
9770 cvtVOP3(Inst, Operands, OptIdx);
9771 cvtVOP3P(Inst, Operands, OptIdx);
9772}
9773
9775 unsigned i, unsigned Opc,
9776 AMDGPU::OpName OpName) {
9777 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9778 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9779 else
9780 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9781}
9782
9783void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9784 unsigned Opc = Inst.getOpcode();
9785
9786 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9787 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9788 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9789 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9790 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9791
9792 OptionalImmIndexMap OptIdx;
9793 for (unsigned i = 5; i < Operands.size(); ++i) {
9794 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9795 OptIdx[Op.getImmTy()] = i;
9796 }
9797
9798 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9799 addOptionalImmOperand(Inst, Operands, OptIdx,
9800 AMDGPUOperand::ImmTyIndexKey8bit);
9801
9802 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9803 addOptionalImmOperand(Inst, Operands, OptIdx,
9804 AMDGPUOperand::ImmTyIndexKey16bit);
9805
9806 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9807 addOptionalImmOperand(Inst, Operands, OptIdx,
9808 AMDGPUOperand::ImmTyIndexKey32bit);
9809
9810 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9811 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9812
9813 cvtVOP3P(Inst, Operands, OptIdx);
9814}
9815
9816//===----------------------------------------------------------------------===//
9817// VOPD
9818//===----------------------------------------------------------------------===//
9819
9820ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9821 if (!hasVOPD(getSTI()))
9822 return ParseStatus::NoMatch;
9823
9824 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9825 SMLoc S = getLoc();
9826 lex();
9827 lex();
9828 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9829 SMLoc OpYLoc = getLoc();
9830 StringRef OpYName;
9831 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9832 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9833 return ParseStatus::Success;
9834 }
9835 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9836 }
9837 return ParseStatus::NoMatch;
9838}
9839
9840// Create VOPD MCInst operands using parsed assembler operands.
9841void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9842 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9843
9844 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9845 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9847 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9848 return;
9849 }
9850 if (Op.isReg()) {
9851 Op.addRegOperands(Inst, 1);
9852 return;
9853 }
9854 if (Op.isImm()) {
9855 Op.addImmOperands(Inst, 1);
9856 return;
9857 }
9858 llvm_unreachable("Unhandled operand type in cvtVOPD");
9859 };
9860
9861 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9862
9863 // MCInst operands are ordered as follows:
9864 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9865
9866 for (auto CompIdx : VOPD::COMPONENTS) {
9867 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9868 }
9869
9870 for (auto CompIdx : VOPD::COMPONENTS) {
9871 const auto &CInfo = InstInfo[CompIdx];
9872 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9873 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9874 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9875 if (CInfo.hasSrc2Acc())
9876 addOp(CInfo.getIndexOfDstInParsedOperands());
9877 }
9878
9879 int BitOp3Idx =
9880 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9881 if (BitOp3Idx != -1) {
9882 OptionalImmIndexMap OptIdx;
9883 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9884 if (Op.isImm())
9885 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9886
9887 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9888 }
9889}
9890
9891//===----------------------------------------------------------------------===//
9892// dpp
9893//===----------------------------------------------------------------------===//
9894
9895bool AMDGPUOperand::isDPP8() const {
9896 return isImmTy(ImmTyDPP8);
9897}
9898
9899bool AMDGPUOperand::isDPPCtrl() const {
9900 using namespace AMDGPU::DPP;
9901
9902 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9903 if (result) {
9904 int64_t Imm = getImm();
9905 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9906 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9907 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9908 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9909 (Imm == DppCtrl::WAVE_SHL1) ||
9910 (Imm == DppCtrl::WAVE_ROL1) ||
9911 (Imm == DppCtrl::WAVE_SHR1) ||
9912 (Imm == DppCtrl::WAVE_ROR1) ||
9913 (Imm == DppCtrl::ROW_MIRROR) ||
9914 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9915 (Imm == DppCtrl::BCAST15) ||
9916 (Imm == DppCtrl::BCAST31) ||
9917 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9918 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9919 }
9920 return false;
9921}
9922
9923//===----------------------------------------------------------------------===//
9924// mAI
9925//===----------------------------------------------------------------------===//
9926
9927bool AMDGPUOperand::isBLGP() const {
9928 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9929}
9930
9931bool AMDGPUOperand::isS16Imm() const {
9932 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9933}
9934
9935bool AMDGPUOperand::isU16Imm() const {
9936 return isImmLiteral() && isUInt<16>(getImm());
9937}
9938
9939//===----------------------------------------------------------------------===//
9940// dim
9941//===----------------------------------------------------------------------===//
9942
9943bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9944 // We want to allow "dim:1D" etc.,
9945 // but the initial 1 is tokenized as an integer.
9946 std::string Token;
9947 if (isToken(AsmToken::Integer)) {
9948 SMLoc Loc = getToken().getEndLoc();
9949 Token = std::string(getTokenStr());
9950 lex();
9951 if (getLoc() != Loc)
9952 return false;
9953 }
9954
9955 StringRef Suffix;
9956 if (!parseId(Suffix))
9957 return false;
9958 Token += Suffix;
9959
9960 StringRef DimId = Token;
9961 DimId.consume_front("SQ_RSRC_IMG_");
9962
9963 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9964 if (!DimInfo)
9965 return false;
9966
9967 Encoding = DimInfo->Encoding;
9968 return true;
9969}
9970
9971ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9972 if (!isGFX10Plus())
9973 return ParseStatus::NoMatch;
9974
9975 SMLoc S = getLoc();
9976
9977 if (!trySkipId("dim", AsmToken::Colon))
9978 return ParseStatus::NoMatch;
9979
9980 unsigned Encoding;
9981 SMLoc Loc = getLoc();
9982 if (!parseDimId(Encoding))
9983 return Error(Loc, "invalid dim value");
9984
9985 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9986 AMDGPUOperand::ImmTyDim));
9987 return ParseStatus::Success;
9988}
9989
9990//===----------------------------------------------------------------------===//
9991// dpp
9992//===----------------------------------------------------------------------===//
9993
9994ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9995 SMLoc S = getLoc();
9996
9997 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9998 return ParseStatus::NoMatch;
9999
10000 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10001
10002 int64_t Sels[8];
10003
10004 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10005 return ParseStatus::Failure;
10006
10007 for (size_t i = 0; i < 8; ++i) {
10008 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10009 return ParseStatus::Failure;
10010
10011 SMLoc Loc = getLoc();
10012 if (getParser().parseAbsoluteExpression(Sels[i]))
10013 return ParseStatus::Failure;
10014 if (0 > Sels[i] || 7 < Sels[i])
10015 return Error(Loc, "expected a 3-bit value");
10016 }
10017
10018 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10019 return ParseStatus::Failure;
10020
10021 unsigned DPP8 = 0;
10022 for (size_t i = 0; i < 8; ++i)
10023 DPP8 |= (Sels[i] << (i * 3));
10024
10025 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10026 return ParseStatus::Success;
10027}
10028
10029bool
10030AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10031 const OperandVector &Operands) {
10032 if (Ctrl == "row_newbcast")
10033 return isGFX90A();
10034
10035 if (Ctrl == "row_share" ||
10036 Ctrl == "row_xmask")
10037 return isGFX10Plus();
10038
10039 if (Ctrl == "wave_shl" ||
10040 Ctrl == "wave_shr" ||
10041 Ctrl == "wave_rol" ||
10042 Ctrl == "wave_ror" ||
10043 Ctrl == "row_bcast")
10044 return isVI() || isGFX9();
10045
10046 return Ctrl == "row_mirror" ||
10047 Ctrl == "row_half_mirror" ||
10048 Ctrl == "quad_perm" ||
10049 Ctrl == "row_shl" ||
10050 Ctrl == "row_shr" ||
10051 Ctrl == "row_ror";
10052}
10053
10054int64_t
10055AMDGPUAsmParser::parseDPPCtrlPerm() {
10056 // quad_perm:[%d,%d,%d,%d]
10057
10058 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10059 return -1;
10060
10061 int64_t Val = 0;
10062 for (int i = 0; i < 4; ++i) {
10063 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10064 return -1;
10065
10066 int64_t Temp;
10067 SMLoc Loc = getLoc();
10068 if (getParser().parseAbsoluteExpression(Temp))
10069 return -1;
10070 if (Temp < 0 || Temp > 3) {
10071 Error(Loc, "expected a 2-bit value");
10072 return -1;
10073 }
10074
10075 Val += (Temp << i * 2);
10076 }
10077
10078 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10079 return -1;
10080
10081 return Val;
10082}
10083
10084int64_t
10085AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10086 using namespace AMDGPU::DPP;
10087
10088 // sel:%d
10089
10090 int64_t Val;
10091 SMLoc Loc = getLoc();
10092
10093 if (getParser().parseAbsoluteExpression(Val))
10094 return -1;
10095
10096 struct DppCtrlCheck {
10097 int64_t Ctrl;
10098 int Lo;
10099 int Hi;
10100 };
10101
10102 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10103 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10104 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10105 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10106 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10107 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10108 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10109 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10110 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10111 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10112 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10113 .Default({-1, 0, 0});
10114
10115 bool Valid;
10116 if (Check.Ctrl == -1) {
10117 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10118 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10119 } else {
10120 Valid = Check.Lo <= Val && Val <= Check.Hi;
10121 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10122 }
10123
10124 if (!Valid) {
10125 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10126 return -1;
10127 }
10128
10129 return Val;
10130}
10131
10132ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10133 using namespace AMDGPU::DPP;
10134
10135 if (!isToken(AsmToken::Identifier) ||
10136 !isSupportedDPPCtrl(getTokenStr(), Operands))
10137 return ParseStatus::NoMatch;
10138
10139 SMLoc S = getLoc();
10140 int64_t Val = -1;
10141 StringRef Ctrl;
10142
10143 parseId(Ctrl);
10144
10145 if (Ctrl == "row_mirror") {
10146 Val = DppCtrl::ROW_MIRROR;
10147 } else if (Ctrl == "row_half_mirror") {
10148 Val = DppCtrl::ROW_HALF_MIRROR;
10149 } else {
10150 if (skipToken(AsmToken::Colon, "expected a colon")) {
10151 if (Ctrl == "quad_perm") {
10152 Val = parseDPPCtrlPerm();
10153 } else {
10154 Val = parseDPPCtrlSel(Ctrl);
10155 }
10156 }
10157 }
10158
10159 if (Val == -1)
10160 return ParseStatus::Failure;
10161
10162 Operands.push_back(
10163 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10164 return ParseStatus::Success;
10165}
10166
10167void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10168 bool IsDPP8) {
10169 OptionalImmIndexMap OptionalIdx;
10170 unsigned Opc = Inst.getOpcode();
10171 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10172
10173 // MAC instructions are special because they have 'old'
10174 // operand which is not tied to dst (but assumed to be).
10175 // They also have dummy unused src2_modifiers.
10176 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10177 int Src2ModIdx =
10178 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10179 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10180 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10181
10182 unsigned I = 1;
10183 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10184 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10185 }
10186
10187 int Fi = 0;
10188 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10189 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10190 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10191 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10192 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10193
10194 for (unsigned E = Operands.size(); I != E; ++I) {
10195
10196 if (IsMAC) {
10197 int NumOperands = Inst.getNumOperands();
10198 if (OldIdx == NumOperands) {
10199 // Handle old operand
10200 constexpr int DST_IDX = 0;
10201 Inst.addOperand(Inst.getOperand(DST_IDX));
10202 } else if (Src2ModIdx == NumOperands) {
10203 // Add unused dummy src2_modifiers
10205 }
10206 }
10207
10208 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10209 Inst.addOperand(Inst.getOperand(0));
10210 }
10211
10212 if (IsVOP3CvtSrDpp) {
10213 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10215 Inst.addOperand(MCOperand::createReg(MCRegister()));
10216 }
10217 }
10218
10219 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10221 if (TiedTo != -1) {
10222 assert((unsigned)TiedTo < Inst.getNumOperands());
10223 // handle tied old or src2 for MAC instructions
10224 Inst.addOperand(Inst.getOperand(TiedTo));
10225 }
10226 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10227 // Add the register arguments
10228 if (IsDPP8 && Op.isDppFI()) {
10229 Fi = Op.getImm();
10230 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10231 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10232 } else if (Op.isReg()) {
10233 Op.addRegOperands(Inst, 1);
10234 } else if (Op.isImm() &&
10235 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10236 Op.addImmOperands(Inst, 1);
10237 } else if (Op.isImm()) {
10238 OptionalIdx[Op.getImmTy()] = I;
10239 } else {
10240 llvm_unreachable("unhandled operand type");
10241 }
10242 }
10243
10244 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10245 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10246 AMDGPUOperand::ImmTyClamp);
10247
10248 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10249 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10250 Inst.addOperand(Inst.getOperand(0));
10251 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10252 AMDGPUOperand::ImmTyByteSel);
10253 }
10254
10255 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10256 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10257
10258 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10259 cvtVOP3P(Inst, Operands, OptionalIdx);
10260 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10261 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10262 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10264 }
10265
10266 if (IsDPP8) {
10267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10268 using namespace llvm::AMDGPU::DPP;
10269 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10270 } else {
10271 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10272 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10273 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10274 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10275
10276 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10277 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10278 AMDGPUOperand::ImmTyDppFI);
10279 }
10280}
10281
10282void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10283 OptionalImmIndexMap OptionalIdx;
10284
10285 unsigned I = 1;
10286 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10287 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10288 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10289 }
10290
10291 int Fi = 0;
10292 for (unsigned E = Operands.size(); I != E; ++I) {
10293 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10295 if (TiedTo != -1) {
10296 assert((unsigned)TiedTo < Inst.getNumOperands());
10297 // handle tied old or src2 for MAC instructions
10298 Inst.addOperand(Inst.getOperand(TiedTo));
10299 }
10300 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10301 // Add the register arguments
10302 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10303 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10304 // Skip it.
10305 continue;
10306 }
10307
10308 if (IsDPP8) {
10309 if (Op.isDPP8()) {
10310 Op.addImmOperands(Inst, 1);
10311 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10312 Op.addRegWithFPInputModsOperands(Inst, 2);
10313 } else if (Op.isDppFI()) {
10314 Fi = Op.getImm();
10315 } else if (Op.isReg()) {
10316 Op.addRegOperands(Inst, 1);
10317 } else {
10318 llvm_unreachable("Invalid operand type");
10319 }
10320 } else {
10322 Op.addRegWithFPInputModsOperands(Inst, 2);
10323 } else if (Op.isReg()) {
10324 Op.addRegOperands(Inst, 1);
10325 } else if (Op.isDPPCtrl()) {
10326 Op.addImmOperands(Inst, 1);
10327 } else if (Op.isImm()) {
10328 // Handle optional arguments
10329 OptionalIdx[Op.getImmTy()] = I;
10330 } else {
10331 llvm_unreachable("Invalid operand type");
10332 }
10333 }
10334 }
10335
10336 if (IsDPP8) {
10337 using namespace llvm::AMDGPU::DPP;
10338 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10339 } else {
10340 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10341 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10343 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10344 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10345 AMDGPUOperand::ImmTyDppFI);
10346 }
10347 }
10348}
10349
10350//===----------------------------------------------------------------------===//
10351// sdwa
10352//===----------------------------------------------------------------------===//
10353
10354ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10355 StringRef Prefix,
10356 AMDGPUOperand::ImmTy Type) {
10357 return parseStringOrIntWithPrefix(
10358 Operands, Prefix,
10359 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10360 Type);
10361}
10362
10363ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10364 return parseStringOrIntWithPrefix(
10365 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10366 AMDGPUOperand::ImmTySDWADstUnused);
10367}
10368
10369void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10370 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10371}
10372
10373void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10374 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10375}
10376
10377void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10378 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10379}
10380
10381void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10382 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10383}
10384
10385void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10386 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10387}
10388
10389void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10390 uint64_t BasicInstType,
10391 bool SkipDstVcc,
10392 bool SkipSrcVcc) {
10393 using namespace llvm::AMDGPU::SDWA;
10394
10395 OptionalImmIndexMap OptionalIdx;
10396 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10397 bool SkippedVcc = false;
10398
10399 unsigned I = 1;
10400 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10401 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10402 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10403 }
10404
10405 for (unsigned E = Operands.size(); I != E; ++I) {
10406 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10407 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10408 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10409 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10410 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10411 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10412 // Skip VCC only if we didn't skip it on previous iteration.
10413 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10414 if (BasicInstType == SIInstrFlags::VOP2 &&
10415 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10416 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10417 SkippedVcc = true;
10418 continue;
10419 }
10420 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10421 SkippedVcc = true;
10422 continue;
10423 }
10424 }
10426 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10427 } else if (Op.isImm()) {
10428 // Handle optional arguments
10429 OptionalIdx[Op.getImmTy()] = I;
10430 } else {
10431 llvm_unreachable("Invalid operand type");
10432 }
10433 SkippedVcc = false;
10434 }
10435
10436 const unsigned Opc = Inst.getOpcode();
10437 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10438 Opc != AMDGPU::V_NOP_sdwa_vi) {
10439 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10440 switch (BasicInstType) {
10441 case SIInstrFlags::VOP1:
10442 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10443 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10444 AMDGPUOperand::ImmTyClamp, 0);
10445
10446 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10447 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10448 AMDGPUOperand::ImmTyOModSI, 0);
10449
10450 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10451 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10452 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10453
10454 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10455 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10456 AMDGPUOperand::ImmTySDWADstUnused,
10457 DstUnused::UNUSED_PRESERVE);
10458
10459 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10460 break;
10461
10462 case SIInstrFlags::VOP2:
10463 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10464 AMDGPUOperand::ImmTyClamp, 0);
10465
10466 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10468
10469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10473 break;
10474
10475 case SIInstrFlags::VOPC:
10476 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10477 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10478 AMDGPUOperand::ImmTyClamp, 0);
10479 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10480 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10481 break;
10482
10483 default:
10484 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10485 }
10486 }
10487
10488 // special case v_mac_{f16, f32}:
10489 // it has src2 register operand that is tied to dst operand
10490 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10491 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10492 auto *it = Inst.begin();
10493 std::advance(
10494 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10495 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10496 }
10497}
10498
10499/// Force static initialization.
10500extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10505
10506#define GET_MATCHER_IMPLEMENTATION
10507#define GET_MNEMONIC_SPELL_CHECKER
10508#define GET_MNEMONIC_CHECKER
10509#include "AMDGPUGenAsmMatcher.inc"
10510
10511ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10512 unsigned MCK) {
10513 switch (MCK) {
10514 case MCK_addr64:
10515 return parseTokenOp("addr64", Operands);
10516 case MCK_done:
10517 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10518 case MCK_idxen:
10519 return parseTokenOp("idxen", Operands);
10520 case MCK_lds:
10521 return parseTokenOp("lds", Operands);
10522 case MCK_offen:
10523 return parseTokenOp("offen", Operands);
10524 case MCK_off:
10525 return parseTokenOp("off", Operands);
10526 case MCK_row_95_en:
10527 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10528 case MCK_gds:
10529 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10530 case MCK_tfe:
10531 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10532 }
10533 return tryCustomParseOperand(Operands, MCK);
10534}
10535
10536// This function should be defined after auto-generated include so that we have
10537// MatchClassKind enum defined
10538unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10539 unsigned Kind) {
10540 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10541 // But MatchInstructionImpl() expects to meet token and fails to validate
10542 // operand. This method checks if we are given immediate operand but expect to
10543 // get corresponding token.
10544 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10545 switch (Kind) {
10546 case MCK_addr64:
10547 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10548 case MCK_gds:
10549 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10550 case MCK_lds:
10551 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10552 case MCK_idxen:
10553 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10554 case MCK_offen:
10555 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10556 case MCK_tfe:
10557 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10558 case MCK_done:
10559 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10560 case MCK_row_95_en:
10561 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10562 case MCK_SSrc_b32:
10563 // When operands have expression values, they will return true for isToken,
10564 // because it is not possible to distinguish between a token and an
10565 // expression at parse time. MatchInstructionImpl() will always try to
10566 // match an operand as a token, when isToken returns true, and when the
10567 // name of the expression is not a valid token, the match will fail,
10568 // so we need to handle it here.
10569 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10570 case MCK_SSrc_f32:
10571 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10572 case MCK_SOPPBrTarget:
10573 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10574 case MCK_VReg32OrOff:
10575 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10576 case MCK_InterpSlot:
10577 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10578 case MCK_InterpAttr:
10579 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10580 case MCK_InterpAttrChan:
10581 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10582 case MCK_SReg_64:
10583 case MCK_SReg_64_XEXEC:
10584 // Null is defined as a 32-bit register but
10585 // it should also be enabled with 64-bit operands or larger.
10586 // The following code enables it for SReg_64 and larger operands
10587 // used as source and destination. Remaining source
10588 // operands are handled in isInlinableImm.
10589 case MCK_SReg_96:
10590 case MCK_SReg_128:
10591 case MCK_SReg_256:
10592 case MCK_SReg_512:
10593 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10594 default:
10595 return Match_InvalidOperand;
10596 }
10597}
10598
10599//===----------------------------------------------------------------------===//
10600// endpgm
10601//===----------------------------------------------------------------------===//
10602
10603ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10604 SMLoc S = getLoc();
10605 int64_t Imm = 0;
10606
10607 if (!parseExpr(Imm)) {
10608 // The operand is optional, if not present default to 0
10609 Imm = 0;
10610 }
10611
10612 if (!isUInt<16>(Imm))
10613 return Error(S, "expected a 16-bit value");
10614
10615 Operands.push_back(
10616 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10617 return ParseStatus::Success;
10618}
10619
10620bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10621
10622//===----------------------------------------------------------------------===//
10623// Split Barrier
10624//===----------------------------------------------------------------------===//
10625
10626bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5976
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1432
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...