LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
47#include <optional>
48
49using namespace llvm;
50using namespace llvm::AMDGPU;
51using namespace llvm::amdhsa;
52
53namespace {
54
55class AMDGPUAsmParser;
56
57enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
58
59//===----------------------------------------------------------------------===//
60// Operand
61//===----------------------------------------------------------------------===//
62
63class AMDGPUOperand : public MCParsedAsmOperand {
64 enum KindTy {
65 Token,
66 Immediate,
67 Register,
68 Expression
69 } Kind;
70
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
73
74public:
75 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
77
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
79
80 struct Modifiers {
81 bool Abs = false;
82 bool Neg = false;
83 bool Sext = false;
84 LitModifier Lit = LitModifier::None;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit() const { return Lit == LitModifier::Lit; }
90 bool isForcedLit64() const { return Lit == LitModifier::Lit64; }
91
92 int64_t getFPModifiersOperand() const {
93 int64_t Operand = 0;
94 Operand |= Abs ? SISrcMods::ABS : 0u;
95 Operand |= Neg ? SISrcMods::NEG : 0u;
96 return Operand;
97 }
98
99 int64_t getIntModifiersOperand() const {
100 int64_t Operand = 0;
101 Operand |= Sext ? SISrcMods::SEXT : 0u;
102 return Operand;
103 }
104
105 int64_t getModifiersOperand() const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 && "fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
112 return 0;
113 }
114
115 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
116 };
117
118 enum ImmTy {
119 ImmTyNone,
120 ImmTyGDS,
121 ImmTyLDS,
122 ImmTyOffen,
123 ImmTyIdxen,
124 ImmTyAddr64,
125 ImmTyOffset,
126 ImmTyInstOffset,
127 ImmTyOffset0,
128 ImmTyOffset1,
129 ImmTySMEMOffsetMod,
130 ImmTyCPol,
131 ImmTyTFE,
132 ImmTyIsAsync,
133 ImmTyD16,
134 ImmTyClamp,
135 ImmTyOModSI,
136 ImmTySDWADstSel,
137 ImmTySDWASrc0Sel,
138 ImmTySDWASrc1Sel,
139 ImmTySDWADstUnused,
140 ImmTyDMask,
141 ImmTyDim,
142 ImmTyUNorm,
143 ImmTyDA,
144 ImmTyR128A16,
145 ImmTyA16,
146 ImmTyLWE,
147 ImmTyExpTgt,
148 ImmTyExpCompr,
149 ImmTyExpVM,
150 ImmTyDone,
151 ImmTyRowEn,
152 ImmTyFORMAT,
153 ImmTyHwreg,
154 ImmTyOff,
155 ImmTySendMsg,
156 ImmTyWaitEvent,
157 ImmTyInterpSlot,
158 ImmTyInterpAttr,
159 ImmTyInterpAttrChan,
160 ImmTyOpSel,
161 ImmTyOpSelHi,
162 ImmTyNegLo,
163 ImmTyNegHi,
164 ImmTyIndexKey8bit,
165 ImmTyIndexKey16bit,
166 ImmTyIndexKey32bit,
167 ImmTyDPP8,
168 ImmTyDppCtrl,
169 ImmTyDppRowMask,
170 ImmTyDppBankMask,
171 ImmTyDppBoundCtrl,
172 ImmTyDppFI,
173 ImmTySwizzle,
174 ImmTyGprIdxMode,
175 ImmTyHigh,
176 ImmTyBLGP,
177 ImmTyCBSZ,
178 ImmTyABID,
179 ImmTyEndpgm,
180 ImmTyWaitVDST,
181 ImmTyWaitEXP,
182 ImmTyWaitVAVDst,
183 ImmTyWaitVMVSrc,
184 ImmTyBitOp3,
185 ImmTyMatrixAFMT,
186 ImmTyMatrixBFMT,
187 ImmTyMatrixAScale,
188 ImmTyMatrixBScale,
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
191 ImmTyMatrixAReuse,
192 ImmTyMatrixBReuse,
193 ImmTyScaleSel,
194 ImmTyByteSel,
195 };
196
197private:
198 struct TokOp {
199 const char *Data;
200 unsigned Length;
201 };
202
203 struct ImmOp {
204 int64_t Val;
205 ImmTy Type;
206 bool IsFPImm;
207 Modifiers Mods;
208 };
209
210 struct RegOp {
211 MCRegister RegNo;
212 Modifiers Mods;
213 };
214
215 union {
216 TokOp Tok;
217 ImmOp Imm;
218 RegOp Reg;
219 const MCExpr *Expr;
220 };
221
222 // The index of the associated MCInst operand.
223 mutable int MCOpIdx = -1;
224
225public:
226 bool isToken() const override { return Kind == Token; }
227
228 bool isSymbolRefExpr() const {
229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 }
231
232 bool isImm() const override {
233 return Kind == Immediate;
234 }
235
236 bool isInlinableImm(MVT type) const;
237 bool isLiteralImm(MVT type) const;
238
239 bool isRegKind() const {
240 return Kind == Register;
241 }
242
243 bool isReg() const override {
244 return isRegKind() && !hasModifiers();
245 }
246
247 bool isRegOrInline(unsigned RCID, MVT type) const {
248 return isRegClass(RCID) || isInlinableImm(type);
249 }
250
251 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
253 }
254
255 bool isRegOrImmWithInt16InputMods() const {
256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
257 }
258
259 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
262 }
263
264 bool isRegOrImmWithInt32InputMods() const {
265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266 }
267
268 bool isRegOrInlineImmWithInt16InputMods() const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
270 }
271
272 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
275 }
276
277 bool isRegOrInlineImmWithInt32InputMods() const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
279 }
280
281 bool isRegOrImmWithInt64InputMods() const {
282 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
283 }
284
285 bool isRegOrImmWithFP16InputMods() const {
286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
287 }
288
289 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
292 }
293
294 bool isRegOrImmWithFP32InputMods() const {
295 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
296 }
297
298 bool isRegOrImmWithFP64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
300 }
301
302 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
305 }
306
307 bool isRegOrInlineImmWithFP32InputMods() const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
309 }
310
311 bool isRegOrInlineImmWithFP64InputMods() const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
313 }
314
315 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
316
317 bool isVRegWithFP32InputMods() const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
319 }
320
321 bool isVRegWithFP64InputMods() const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
323 }
324
325 bool isPackedFP16InputMods() const {
326 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
327 }
328
329 bool isPackedVGPRFP32InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isAV_LdSt_32_Align2_RegOp() const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
360 }
361
362 bool isVRegWithInputMods() const;
363 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
364 template <bool IsFake16> bool isT16VRegWithInputMods() const;
365
366 bool isSDWAOperand(MVT type) const;
367 bool isSDWAFP16Operand() const;
368 bool isSDWAFP32Operand() const;
369 bool isSDWAInt16Operand() const;
370 bool isSDWAInt32Operand() const;
371
372 bool isImmTy(ImmTy ImmT) const {
373 return isImm() && Imm.Type == ImmT;
374 }
375
376 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
377
378 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
379
380 bool isImmModifier() const {
381 return isImm() && Imm.Type != ImmTyNone;
382 }
383
384 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
385 bool isDim() const { return isImmTy(ImmTyDim); }
386 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
387 bool isOff() const { return isImmTy(ImmTyOff); }
388 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
389 bool isOffen() const { return isImmTy(ImmTyOffen); }
390 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
391 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS() const { return isImmTy(ImmTyGDS); }
395 bool isLDS() const { return isImmTy(ImmTyLDS); }
396 bool isCPol() const { return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE() const { return isImmTy(ImmTyTFE); }
409 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
410 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
421 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
422 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
423 bool isDone() const { return isImmTy(ImmTyDone); }
424 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
425
426 bool isRegOrImm() const {
427 return isReg() || isImm();
428 }
429
430 bool isRegClass(unsigned RCID) const;
431
432 bool isInlineValue() const;
433
434 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
436 }
437
438 bool isSCSrcB16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
440 }
441
442 bool isSCSrcV2B16() const {
443 return isSCSrcB16();
444 }
445
446 bool isSCSrc_b32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
448 }
449
450 bool isSCSrc_b64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
452 }
453
454 bool isBoolReg() const;
455
456 bool isSCSrcF16() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
458 }
459
460 bool isSCSrcV2F16() const {
461 return isSCSrcF16();
462 }
463
464 bool isSCSrcF32() const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
466 }
467
468 bool isSCSrcF64() const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
470 }
471
472 bool isSSrc_b32() const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
474 }
475
476 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
477
478 bool isSSrcV2B16() const {
479 llvm_unreachable("cannot happen");
480 return isSSrc_b16();
481 }
482
483 bool isSSrc_b64() const {
484 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
485 // See isVSrc64().
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
489 isExpr());
490 }
491
492 bool isSSrc_f32() const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
494 }
495
496 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
497
498 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
499
500 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
501
502 bool isSSrcV2F16() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_f16();
505 }
506
507 bool isSSrcV2FP32() const {
508 llvm_unreachable("cannot happen");
509 return isSSrc_f32();
510 }
511
512 bool isSCSrcV2FP32() const {
513 llvm_unreachable("cannot happen");
514 return isSCSrcF32();
515 }
516
517 bool isSSrcV2INT32() const {
518 llvm_unreachable("cannot happen");
519 return isSSrc_b32();
520 }
521
522 bool isSCSrcV2INT32() const {
523 llvm_unreachable("cannot happen");
524 return isSCSrc_b32();
525 }
526
527 bool isSSrcOrLds_b32() const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
530 }
531
532 bool isVCSrc_b32() const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
534 }
535
536 bool isVCSrc_b32_Lo256() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
538 }
539
540 bool isVCSrc_b64_Lo256() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
542 }
543
544 bool isVCSrc_b64() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
546 }
547
548 bool isVCSrcT_b16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
550 }
551
552 bool isVCSrcTB16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
554 }
555
556 bool isVCSrcFake16B16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
558 }
559
560 bool isVCSrc_b16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
562 }
563
564 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
565
566 bool isVCSrc_f32() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
568 }
569
570 bool isVCSrc_f64() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
572 }
573
574 bool isVCSrcTBF16() const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
576 }
577
578 bool isVCSrcT_f16() const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
580 }
581
582 bool isVCSrcT_bf16() const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
584 }
585
586 bool isVCSrcTBF16_Lo128() const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
588 }
589
590 bool isVCSrcTF16_Lo128() const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
592 }
593
594 bool isVCSrcFake16BF16_Lo128() const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
596 }
597
598 bool isVCSrcFake16F16_Lo128() const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
600 }
601
602 bool isVCSrc_bf16() const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
604 }
605
606 bool isVCSrc_f16() const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
608 }
609
610 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
611
612 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
613
614 bool isVSrc_b32() const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
616 }
617
618 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
619
620 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
621
622 bool isVSrcT_b16_Lo128() const {
623 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
624 }
625
626 bool isVSrcFake16_b16_Lo128() const {
627 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
628 }
629
630 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
631
632 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
633
634 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
635
636 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
637
638 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
639
640 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
641
642 bool isVSrc_f32() const {
643 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
644 }
645
646 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
647
648 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
649
650 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
651
652 bool isVSrcT_bf16_Lo128() const {
653 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
654 }
655
656 bool isVSrcT_f16_Lo128() const {
657 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
658 }
659
660 bool isVSrcFake16_bf16_Lo128() const {
661 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
662 }
663
664 bool isVSrcFake16_f16_Lo128() const {
665 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
666 }
667
668 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
669
670 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
671
672 bool isVSrc_v2bf16() const {
673 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
674 }
675
676 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
677
678 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
679
680 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
681
682 bool isVISrcB32() const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
684 }
685
686 bool isVISrcB16() const {
687 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
688 }
689
690 bool isVISrcV2B16() const {
691 return isVISrcB16();
692 }
693
694 bool isVISrcF32() const {
695 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
696 }
697
698 bool isVISrcF16() const {
699 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
700 }
701
702 bool isVISrcV2F16() const {
703 return isVISrcF16() || isVISrcB32();
704 }
705
706 bool isVISrc_64_bf16() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
708 }
709
710 bool isVISrc_64_f16() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
712 }
713
714 bool isVISrc_64_b32() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
716 }
717
718 bool isVISrc_64B64() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
720 }
721
722 bool isVISrc_64_f64() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
724 }
725
726 bool isVISrc_64V2FP32() const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
728 }
729
730 bool isVISrc_64V2INT32() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
732 }
733
734 bool isVISrc_256_b32() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
736 }
737
738 bool isVISrc_256_f32() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
740 }
741
742 bool isVISrc_256B64() const {
743 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
744 }
745
746 bool isVISrc_256_f64() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
748 }
749
750 bool isVISrc_512_f64() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
752 }
753
754 bool isVISrc_128B16() const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
756 }
757
758 bool isVISrc_128V2B16() const {
759 return isVISrc_128B16();
760 }
761
762 bool isVISrc_128_b32() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
764 }
765
766 bool isVISrc_128_f32() const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
768 }
769
770 bool isVISrc_256V2FP32() const {
771 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
772 }
773
774 bool isVISrc_256V2INT32() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
776 }
777
778 bool isVISrc_512_b32() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
780 }
781
782 bool isVISrc_512B16() const {
783 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
784 }
785
786 bool isVISrc_512V2B16() const {
787 return isVISrc_512B16();
788 }
789
790 bool isVISrc_512_f32() const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
792 }
793
794 bool isVISrc_512F16() const {
795 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
796 }
797
798 bool isVISrc_512V2F16() const {
799 return isVISrc_512F16() || isVISrc_512_b32();
800 }
801
802 bool isVISrc_1024_b32() const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
804 }
805
806 bool isVISrc_1024B16() const {
807 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
808 }
809
810 bool isVISrc_1024V2B16() const {
811 return isVISrc_1024B16();
812 }
813
814 bool isVISrc_1024_f32() const {
815 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
816 }
817
818 bool isVISrc_1024F16() const {
819 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
820 }
821
822 bool isVISrc_1024V2F16() const {
823 return isVISrc_1024F16() || isVISrc_1024_b32();
824 }
825
826 bool isAISrcB32() const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
828 }
829
830 bool isAISrcB16() const {
831 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
832 }
833
834 bool isAISrcV2B16() const {
835 return isAISrcB16();
836 }
837
838 bool isAISrcF32() const {
839 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
840 }
841
842 bool isAISrcF16() const {
843 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
844 }
845
846 bool isAISrcV2F16() const {
847 return isAISrcF16() || isAISrcB32();
848 }
849
850 bool isAISrc_64B64() const {
851 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
852 }
853
854 bool isAISrc_64_f64() const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
856 }
857
858 bool isAISrc_128_b32() const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
860 }
861
862 bool isAISrc_128B16() const {
863 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
864 }
865
866 bool isAISrc_128V2B16() const {
867 return isAISrc_128B16();
868 }
869
870 bool isAISrc_128_f32() const {
871 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
872 }
873
874 bool isAISrc_128F16() const {
875 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
876 }
877
878 bool isAISrc_128V2F16() const {
879 return isAISrc_128F16() || isAISrc_128_b32();
880 }
881
882 bool isVISrc_128_bf16() const {
883 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
884 }
885
886 bool isVISrc_128_f16() const {
887 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
888 }
889
890 bool isVISrc_128V2F16() const {
891 return isVISrc_128_f16() || isVISrc_128_b32();
892 }
893
894 bool isAISrc_256B64() const {
895 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
896 }
897
898 bool isAISrc_256_f64() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
900 }
901
902 bool isAISrc_512_b32() const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
904 }
905
906 bool isAISrc_512B16() const {
907 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
908 }
909
910 bool isAISrc_512V2B16() const {
911 return isAISrc_512B16();
912 }
913
914 bool isAISrc_512_f32() const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
916 }
917
918 bool isAISrc_512F16() const {
919 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
920 }
921
922 bool isAISrc_512V2F16() const {
923 return isAISrc_512F16() || isAISrc_512_b32();
924 }
925
926 bool isAISrc_1024_b32() const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
928 }
929
930 bool isAISrc_1024B16() const {
931 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
932 }
933
934 bool isAISrc_1024V2B16() const {
935 return isAISrc_1024B16();
936 }
937
938 bool isAISrc_1024_f32() const {
939 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
940 }
941
942 bool isAISrc_1024F16() const {
943 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
944 }
945
946 bool isAISrc_1024V2F16() const {
947 return isAISrc_1024F16() || isAISrc_1024_b32();
948 }
949
950 bool isKImmFP32() const {
951 return isLiteralImm(MVT::f32);
952 }
953
954 bool isKImmFP16() const {
955 return isLiteralImm(MVT::f16);
956 }
957
958 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
959
960 bool isMem() const override {
961 return false;
962 }
963
964 bool isExpr() const {
965 return Kind == Expression;
966 }
967
968 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
969
970 bool isSWaitCnt() const;
971 bool isDepCtr() const;
972 bool isSDelayALU() const;
973 bool isHwreg() const;
974 bool isSendMsg() const;
975 bool isWaitEvent() const;
976 bool isSplitBarrier() const;
977 bool isSwizzle() const;
978 bool isSMRDOffset8() const;
979 bool isSMEMOffset() const;
980 bool isSMRDLiteralOffset() const;
981 bool isDPP8() const;
982 bool isDPPCtrl() const;
983 bool isBLGP() const;
984 bool isGPRIdxMode() const;
985 bool isS16Imm() const;
986 bool isU16Imm() const;
987 bool isEndpgm() const;
988
989 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
990 return [this, P]() { return P(*this); };
991 }
992
993 StringRef getToken() const {
994 assert(isToken());
995 return StringRef(Tok.Data, Tok.Length);
996 }
997
998 int64_t getImm() const {
999 assert(isImm());
1000 return Imm.Val;
1001 }
1002
1003 void setImm(int64_t Val) {
1004 assert(isImm());
1005 Imm.Val = Val;
1006 }
1007
1008 ImmTy getImmTy() const {
1009 assert(isImm());
1010 return Imm.Type;
1011 }
1012
1013 MCRegister getReg() const override {
1014 assert(isRegKind());
1015 return Reg.RegNo;
1016 }
1017
1018 SMLoc getStartLoc() const override {
1019 return StartLoc;
1020 }
1021
1022 SMLoc getEndLoc() const override {
1023 return EndLoc;
1024 }
1025
1026 SMRange getLocRange() const {
1027 return SMRange(StartLoc, EndLoc);
1028 }
1029
1030 int getMCOpIdx() const { return MCOpIdx; }
1031
1032 Modifiers getModifiers() const {
1033 assert(isRegKind() || isImmTy(ImmTyNone));
1034 return isRegKind() ? Reg.Mods : Imm.Mods;
1035 }
1036
1037 void setModifiers(Modifiers Mods) {
1038 assert(isRegKind() || isImmTy(ImmTyNone));
1039 if (isRegKind())
1040 Reg.Mods = Mods;
1041 else
1042 Imm.Mods = Mods;
1043 }
1044
1045 bool hasModifiers() const {
1046 return getModifiers().hasModifiers();
1047 }
1048
1049 bool hasFPModifiers() const {
1050 return getModifiers().hasFPModifiers();
1051 }
1052
1053 bool hasIntModifiers() const {
1054 return getModifiers().hasIntModifiers();
1055 }
1056
1057 bool isForcedLit() const {
1058 return isImmLiteral() && getModifiers().isForcedLit();
1059 }
1060
1061 bool isForcedLit64() const {
1062 return isImmLiteral() && getModifiers().isForcedLit64();
1063 }
1064
1065 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1066
1067 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1068
1069 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1070
1071 void addRegOperands(MCInst &Inst, unsigned N) const;
1072
1073 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1074 if (isRegKind())
1075 addRegOperands(Inst, N);
1076 else
1077 addImmOperands(Inst, N);
1078 }
1079
1080 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1081 Modifiers Mods = getModifiers();
1082 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1083 if (isRegKind()) {
1084 addRegOperands(Inst, N);
1085 } else {
1086 addImmOperands(Inst, N, false);
1087 }
1088 }
1089
1090 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1091 assert(!hasIntModifiers());
1092 addRegOrImmWithInputModsOperands(Inst, N);
1093 }
1094
1095 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1096 assert(!hasFPModifiers());
1097 addRegOrImmWithInputModsOperands(Inst, N);
1098 }
1099
1100 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1101 Modifiers Mods = getModifiers();
1102 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1103 assert(isRegKind());
1104 addRegOperands(Inst, N);
1105 }
1106
1107 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1108 assert(!hasIntModifiers());
1109 addRegWithInputModsOperands(Inst, N);
1110 }
1111
1112 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1113 assert(!hasFPModifiers());
1114 addRegWithInputModsOperands(Inst, N);
1115 }
1116
1117 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1118 // clang-format off
1119 switch (Type) {
1120 case ImmTyNone: OS << "None"; break;
1121 case ImmTyGDS: OS << "GDS"; break;
1122 case ImmTyLDS: OS << "LDS"; break;
1123 case ImmTyOffen: OS << "Offen"; break;
1124 case ImmTyIdxen: OS << "Idxen"; break;
1125 case ImmTyAddr64: OS << "Addr64"; break;
1126 case ImmTyOffset: OS << "Offset"; break;
1127 case ImmTyInstOffset: OS << "InstOffset"; break;
1128 case ImmTyOffset0: OS << "Offset0"; break;
1129 case ImmTyOffset1: OS << "Offset1"; break;
1130 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1131 case ImmTyCPol: OS << "CPol"; break;
1132 case ImmTyIndexKey8bit: OS << "index_key"; break;
1133 case ImmTyIndexKey16bit: OS << "index_key"; break;
1134 case ImmTyIndexKey32bit: OS << "index_key"; break;
1135 case ImmTyTFE: OS << "TFE"; break;
1136 case ImmTyIsAsync: OS << "IsAsync"; break;
1137 case ImmTyD16: OS << "D16"; break;
1138 case ImmTyFORMAT: OS << "FORMAT"; break;
1139 case ImmTyClamp: OS << "Clamp"; break;
1140 case ImmTyOModSI: OS << "OModSI"; break;
1141 case ImmTyDPP8: OS << "DPP8"; break;
1142 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1143 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1144 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1145 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1146 case ImmTyDppFI: OS << "DppFI"; break;
1147 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1148 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1149 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1150 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1151 case ImmTyDMask: OS << "DMask"; break;
1152 case ImmTyDim: OS << "Dim"; break;
1153 case ImmTyUNorm: OS << "UNorm"; break;
1154 case ImmTyDA: OS << "DA"; break;
1155 case ImmTyR128A16: OS << "R128A16"; break;
1156 case ImmTyA16: OS << "A16"; break;
1157 case ImmTyLWE: OS << "LWE"; break;
1158 case ImmTyOff: OS << "Off"; break;
1159 case ImmTyExpTgt: OS << "ExpTgt"; break;
1160 case ImmTyExpCompr: OS << "ExpCompr"; break;
1161 case ImmTyExpVM: OS << "ExpVM"; break;
1162 case ImmTyDone: OS << "Done"; break;
1163 case ImmTyRowEn: OS << "RowEn"; break;
1164 case ImmTyHwreg: OS << "Hwreg"; break;
1165 case ImmTySendMsg: OS << "SendMsg"; break;
1166 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1167 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1168 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1169 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1170 case ImmTyOpSel: OS << "OpSel"; break;
1171 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1172 case ImmTyNegLo: OS << "NegLo"; break;
1173 case ImmTyNegHi: OS << "NegHi"; break;
1174 case ImmTySwizzle: OS << "Swizzle"; break;
1175 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1176 case ImmTyHigh: OS << "High"; break;
1177 case ImmTyBLGP: OS << "BLGP"; break;
1178 case ImmTyCBSZ: OS << "CBSZ"; break;
1179 case ImmTyABID: OS << "ABID"; break;
1180 case ImmTyEndpgm: OS << "Endpgm"; break;
1181 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1182 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1183 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1184 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1185 case ImmTyBitOp3: OS << "BitOp3"; break;
1186 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1187 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1188 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1189 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1190 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1191 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1192 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1193 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1194 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1195 case ImmTyByteSel: OS << "ByteSel" ; break;
1196 }
1197 // clang-format on
1198 }
1199
1200 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1201 switch (Kind) {
1202 case Register:
1203 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1204 << " mods: " << Reg.Mods << '>';
1205 break;
1206 case Immediate:
1207 OS << '<' << getImm();
1208 if (getImmTy() != ImmTyNone) {
1209 OS << " type: "; printImmTy(OS, getImmTy());
1210 }
1211 OS << " mods: " << Imm.Mods << '>';
1212 break;
1213 case Token:
1214 OS << '\'' << getToken() << '\'';
1215 break;
1216 case Expression:
1217 OS << "<expr ";
1218 MAI.printExpr(OS, *Expr);
1219 OS << '>';
1220 break;
1221 }
1222 }
1223
1224 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1225 int64_t Val, SMLoc Loc,
1226 ImmTy Type = ImmTyNone,
1227 bool IsFPImm = false) {
1228 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1229 Op->Imm.Val = Val;
1230 Op->Imm.IsFPImm = IsFPImm;
1231 Op->Imm.Type = Type;
1232 Op->Imm.Mods = Modifiers();
1233 Op->StartLoc = Loc;
1234 Op->EndLoc = Loc;
1235 return Op;
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1239 StringRef Str, SMLoc Loc,
1240 bool HasExplicitEncodingSize = true) {
1241 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1242 Res->Tok.Data = Str.data();
1243 Res->Tok.Length = Str.size();
1244 Res->StartLoc = Loc;
1245 Res->EndLoc = Loc;
1246 return Res;
1247 }
1248
1249 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1250 MCRegister Reg, SMLoc S, SMLoc E) {
1251 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1252 Op->Reg.RegNo = Reg;
1253 Op->Reg.Mods = Modifiers();
1254 Op->StartLoc = S;
1255 Op->EndLoc = E;
1256 return Op;
1257 }
1258
1259 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1260 const class MCExpr *Expr, SMLoc S) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1262 Op->Expr = Expr;
1263 Op->StartLoc = S;
1264 Op->EndLoc = S;
1265 return Op;
1266 }
1267};
1268
1269raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1270 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1271 return OS;
1272}
1273
1274//===----------------------------------------------------------------------===//
1275// AsmParser
1276//===----------------------------------------------------------------------===//
1277
1278// TODO: define GET_SUBTARGET_FEATURE_NAME
1279#define GET_REGISTER_MATCHER
1280#include "AMDGPUGenAsmMatcher.inc"
1281#undef GET_REGISTER_MATCHER
1282#undef GET_SUBTARGET_FEATURE_NAME
1283
1284// Holds info related to the current kernel, e.g. count of SGPRs used.
1285// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1286// .amdgpu_hsa_kernel or at EOF.
1287class KernelScopeInfo {
1288 int SgprIndexUnusedMin = -1;
1289 int VgprIndexUnusedMin = -1;
1290 int AgprIndexUnusedMin = -1;
1291 MCContext *Ctx = nullptr;
1292 MCSubtargetInfo const *MSTI = nullptr;
1293
1294 void usesSgprAt(int i) {
1295 if (i >= SgprIndexUnusedMin) {
1296 SgprIndexUnusedMin = ++i;
1297 if (Ctx) {
1298 MCSymbol* const Sym =
1299 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1300 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1301 }
1302 }
1303 }
1304
1305 void usesVgprAt(int i) {
1306 if (i >= VgprIndexUnusedMin) {
1307 VgprIndexUnusedMin = ++i;
1308 if (Ctx) {
1309 MCSymbol* const Sym =
1310 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1311 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1312 VgprIndexUnusedMin);
1313 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1314 }
1315 }
1316 }
1317
1318 void usesAgprAt(int i) {
1319 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1320 if (!hasMAIInsts(*MSTI))
1321 return;
1322
1323 if (i >= AgprIndexUnusedMin) {
1324 AgprIndexUnusedMin = ++i;
1325 if (Ctx) {
1326 MCSymbol* const Sym =
1327 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1328 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1329
1330 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1331 MCSymbol* const vSym =
1332 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1333 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1334 VgprIndexUnusedMin);
1335 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1336 }
1337 }
1338 }
1339
1340public:
1341 KernelScopeInfo() = default;
1342
1343 void initialize(MCContext &Context) {
1344 Ctx = &Context;
1345 MSTI = Ctx->getSubtargetInfo();
1346
1347 usesSgprAt(SgprIndexUnusedMin = -1);
1348 usesVgprAt(VgprIndexUnusedMin = -1);
1349 if (hasMAIInsts(*MSTI)) {
1350 usesAgprAt(AgprIndexUnusedMin = -1);
1351 }
1352 }
1353
1354 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1355 unsigned RegWidth) {
1356 switch (RegKind) {
1357 case IS_SGPR:
1358 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1359 break;
1360 case IS_AGPR:
1361 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1362 break;
1363 case IS_VGPR:
1364 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1365 break;
1366 default:
1367 break;
1368 }
1369 }
1370};
1371
1372class AMDGPUAsmParser : public MCTargetAsmParser {
1373 MCAsmParser &Parser;
1374
1375 unsigned ForcedEncodingSize = 0;
1376 bool ForcedDPP = false;
1377 bool ForcedSDWA = false;
1378 KernelScopeInfo KernelScope;
1379 const unsigned HwMode;
1380
1381 /// @name Auto-generated Match Functions
1382 /// {
1383
1384#define GET_ASSEMBLER_HEADER
1385#include "AMDGPUGenAsmMatcher.inc"
1386
1387 /// }
1388
1389 /// Get size of register operand
1390 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1391 assert(OpNo < Desc.NumOperands);
1392 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1393 return getRegBitWidth(RCID) / 8;
1394 }
1395
1396 std::optional<AMDGPU::InfoSectionData> InfoData;
1397
1398private:
1399 void createConstantSymbol(StringRef Id, int64_t Val);
1400
1401 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1402 bool OutOfRangeError(SMRange Range);
1403 /// Calculate VGPR/SGPR blocks required for given target, reserved
1404 /// registers, and user-specified NextFreeXGPR values.
1405 ///
1406 /// \param Features [in] Target features, used for bug corrections.
1407 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1408 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1409 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1410 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1411 /// descriptor field, if valid.
1412 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1413 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1414 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1415 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1416 /// \param VGPRBlocks [out] Result VGPR block count.
1417 /// \param SGPRBlocks [out] Result SGPR block count.
1418 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1419 const MCExpr *FlatScrUsed, bool XNACKUsed,
1420 std::optional<bool> EnableWavefrontSize32,
1421 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1422 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1423 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1424 bool ParseDirectiveAMDGCNTarget();
1425 bool ParseDirectiveAMDHSACodeObjectVersion();
1426 bool ParseDirectiveAMDHSAKernel();
1427 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1428 bool ParseDirectiveAMDKernelCodeT();
1429 // TODO: Possibly make subtargetHasRegister const.
1430 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1431 bool ParseDirectiveAMDGPUHsaKernel();
1432
1433 bool ParseDirectiveISAVersion();
1434 bool ParseDirectiveHSAMetadata();
1435 bool ParseDirectivePALMetadataBegin();
1436 bool ParseDirectivePALMetadata();
1437 bool ParseDirectiveAMDGPULDS();
1438 bool ParseDirectiveAMDGPUInfo();
1439
1440 /// Common code to parse out a block of text (typically YAML) between start and
1441 /// end directives.
1442 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1443 const char *AssemblerDirectiveEnd,
1444 std::string &CollectString);
1445
1446 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1447 RegisterKind RegKind, MCRegister Reg1,
1448 RegisterKind RegKind1, SMLoc Loc);
1449 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1450 unsigned &RegNum, unsigned &RegWidth,
1451 bool RestoreOnFailure = false);
1452 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1453 unsigned &RegNum, unsigned &RegWidth,
1454 SmallVectorImpl<AsmToken> &Tokens);
1455 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1456 unsigned &RegWidth,
1457 SmallVectorImpl<AsmToken> &Tokens);
1458 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1459 unsigned &RegWidth,
1460 SmallVectorImpl<AsmToken> &Tokens);
1461 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1462 unsigned &RegWidth,
1463 SmallVectorImpl<AsmToken> &Tokens);
1464 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1465 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1466 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1467
1468 bool isRegister();
1469 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1470 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1471 void initializeGprCountSymbol(RegisterKind RegKind);
1472 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1473 unsigned RegWidth);
1474 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1475 bool IsAtomic);
1476
1477public:
1478 enum OperandMode {
1479 OperandMode_Default,
1480 OperandMode_NSA,
1481 };
1482
1483 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1484
1485 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1486 const MCInstrInfo &MII)
1487 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1488 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1490
1491 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1492
1493 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1494 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1495 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1496 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1497 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1498 } else {
1499 createConstantSymbol(".option.machine_version_major", ISA.Major);
1500 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1501 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1502 }
1503 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1504 initializeGprCountSymbol(IS_VGPR);
1505 initializeGprCountSymbol(IS_SGPR);
1506 } else
1507 KernelScope.initialize(getContext());
1508
1509 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1510 createConstantSymbol(Symbol, Code);
1511
1512 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1513 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1514 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1515 }
1516
1517 bool hasMIMG_R128() const {
1518 return AMDGPU::hasMIMG_R128(getSTI());
1519 }
1520
1521 bool hasPackedD16() const {
1522 return AMDGPU::hasPackedD16(getSTI());
1523 }
1524
1525 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1526
1527 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1528
1529 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1530
1531 bool isSI() const {
1532 return AMDGPU::isSI(getSTI());
1533 }
1534
1535 bool isCI() const {
1536 return AMDGPU::isCI(getSTI());
1537 }
1538
1539 bool isVI() const {
1540 return AMDGPU::isVI(getSTI());
1541 }
1542
1543 bool isGFX9() const {
1544 return AMDGPU::isGFX9(getSTI());
1545 }
1546
1547 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1548 bool isGFX90A() const {
1549 return AMDGPU::isGFX90A(getSTI());
1550 }
1551
1552 bool isGFX940() const {
1553 return AMDGPU::isGFX940(getSTI());
1554 }
1555
1556 bool isGFX9Plus() const {
1557 return AMDGPU::isGFX9Plus(getSTI());
1558 }
1559
1560 bool isGFX10() const {
1561 return AMDGPU::isGFX10(getSTI());
1562 }
1563
1564 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1565
1566 bool isGFX11() const {
1567 return AMDGPU::isGFX11(getSTI());
1568 }
1569
1570 bool isGFX11Plus() const {
1571 return AMDGPU::isGFX11Plus(getSTI());
1572 }
1573
1574 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1575
1576 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1577
1578 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1579
1580 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1581
1582 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1583
1584 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1585
1586 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1587
1588 bool isGFX10_BEncoding() const {
1589 return AMDGPU::isGFX10_BEncoding(getSTI());
1590 }
1591
1592 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1593
1594 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1595
1596 bool hasInv2PiInlineImm() const {
1597 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1598 }
1599
1600 bool has64BitLiterals() const {
1601 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1602 }
1603
1604 bool hasFlatOffsets() const {
1605 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1606 }
1607
1608 bool hasTrue16Insts() const {
1609 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1610 }
1611
1612 bool hasArchitectedFlatScratch() const {
1613 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1614 }
1615
1616 bool hasSGPR102_SGPR103() const {
1617 return !isVI() && !isGFX9();
1618 }
1619
1620 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1621
1622 bool hasIntClamp() const {
1623 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1624 }
1625
1626 bool hasPartialNSAEncoding() const {
1627 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1628 }
1629
1630 bool hasGloballyAddressableScratch() const {
1631 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1632 }
1633
1634 unsigned getNSAMaxSize(bool HasSampler = false) const {
1635 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1636 }
1637
1638 unsigned getMaxNumUserSGPRs() const {
1639 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1640 }
1641
1642 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1643
1644 AMDGPUTargetStreamer &getTargetStreamer() {
1645 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1646 return static_cast<AMDGPUTargetStreamer &>(TS);
1647 }
1648
1649 MCContext &getContext() const {
1650 // We need this const_cast because for some reason getContext() is not const
1651 // in MCAsmParser.
1652 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1653 }
1654
1655 const MCRegisterInfo *getMRI() const {
1656 return getContext().getRegisterInfo();
1657 }
1658
1659 const MCInstrInfo *getMII() const {
1660 return &MII;
1661 }
1662
1663 // FIXME: This should not be used. Instead, should use queries derived from
1664 // getAvailableFeatures().
1665 const FeatureBitset &getFeatureBits() const {
1666 return getSTI().getFeatureBits();
1667 }
1668
1669 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1670 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1671 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1672
1673 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1674 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1675 bool isForcedDPP() const { return ForcedDPP; }
1676 bool isForcedSDWA() const { return ForcedSDWA; }
1677 ArrayRef<unsigned> getMatchedVariants() const;
1678 StringRef getMatchedVariantName() const;
1679
1680 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1681 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1682 bool RestoreOnFailure);
1683 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1684 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1685 SMLoc &EndLoc) override;
1686 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1687 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1688 unsigned Kind) override;
1689 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1690 OperandVector &Operands, MCStreamer &Out,
1691 uint64_t &ErrorInfo,
1692 bool MatchingInlineAsm) override;
1693 bool ParseDirective(AsmToken DirectiveID) override;
1694 void onEndOfFile() override;
1695 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1696 OperandMode Mode = OperandMode_Default);
1697 StringRef parseMnemonicSuffix(StringRef Name);
1698 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1699 SMLoc NameLoc, OperandVector &Operands) override;
1700 //bool ProcessInstruction(MCInst &Inst);
1701
1702 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1703
1704 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1705
1706 ParseStatus
1707 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1708 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1709 std::function<bool(int64_t &)> ConvertResult = nullptr);
1710
1711 ParseStatus parseOperandArrayWithPrefix(
1712 const char *Prefix, OperandVector &Operands,
1713 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1714 bool (*ConvertResult)(int64_t &) = nullptr);
1715
1716 ParseStatus
1717 parseNamedBit(StringRef Name, OperandVector &Operands,
1718 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1719 bool IgnoreNegative = false);
1720 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1721 ParseStatus parseCPol(OperandVector &Operands);
1722 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1723 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1724 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1725 SMLoc &StringLoc);
1726 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1727 StringRef Name,
1728 ArrayRef<const char *> Ids,
1729 int64_t &IntVal);
1730 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1731 StringRef Name,
1732 ArrayRef<const char *> Ids,
1733 AMDGPUOperand::ImmTy Type);
1734
1735 bool isModifier();
1736 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1737 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1738 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1739 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1740 bool parseSP3NegModifier();
1741 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1742 LitModifier Lit = LitModifier::None);
1743 ParseStatus parseReg(OperandVector &Operands);
1744 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1745 LitModifier Lit = LitModifier::None);
1746 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1747 bool AllowImm = true);
1748 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1749 bool AllowImm = true);
1750 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1751 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1752 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1753 ParseStatus tryParseIndexKey(OperandVector &Operands,
1754 AMDGPUOperand::ImmTy ImmTy);
1755 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1756 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1757 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1758 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1759 AMDGPUOperand::ImmTy Type);
1760 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1761 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1762 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1763 AMDGPUOperand::ImmTy Type);
1764 ParseStatus parseMatrixAScale(OperandVector &Operands);
1765 ParseStatus parseMatrixBScale(OperandVector &Operands);
1766 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1767 AMDGPUOperand::ImmTy Type);
1768 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1769 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1770
1771 ParseStatus parseDfmtNfmt(int64_t &Format);
1772 ParseStatus parseUfmt(int64_t &Format);
1773 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1774 int64_t &Format);
1775 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1776 int64_t &Format);
1777 ParseStatus parseFORMAT(OperandVector &Operands);
1778 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1779 ParseStatus parseNumericFormat(int64_t &Format);
1780 ParseStatus parseFlatOffset(OperandVector &Operands);
1781 ParseStatus parseR128A16(OperandVector &Operands);
1782 ParseStatus parseBLGP(OperandVector &Operands);
1783 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1784 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1785
1786 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1787
1788 bool parseCnt(int64_t &IntVal);
1789 ParseStatus parseSWaitCnt(OperandVector &Operands);
1790
1791 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1792 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1793 ParseStatus parseDepCtr(OperandVector &Operands);
1794
1795 bool parseDelay(int64_t &Delay);
1796 ParseStatus parseSDelayALU(OperandVector &Operands);
1797
1798 ParseStatus parseHwreg(OperandVector &Operands);
1799
1800private:
1801 struct OperandInfoTy {
1802 SMLoc Loc;
1803 int64_t Val;
1804 bool IsSymbolic = false;
1805 bool IsDefined = false;
1806
1807 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1808 };
1809
1810 struct StructuredOpField : OperandInfoTy {
1811 StringLiteral Id;
1812 StringLiteral Desc;
1813 unsigned Width;
1814 bool IsDefined = false;
1815
1816 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1817 unsigned Width, int64_t Default)
1818 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1819 virtual ~StructuredOpField() = default;
1820
1821 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1822 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1823 return false;
1824 }
1825
1826 virtual bool validate(AMDGPUAsmParser &Parser) const {
1827 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1828 return Error(Parser, "not supported on this GPU");
1829 if (!isUIntN(Width, Val))
1830 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1831 return true;
1832 }
1833 };
1834
1835 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1836 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1837
1838 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1839 bool validateSendMsg(const OperandInfoTy &Msg,
1840 const OperandInfoTy &Op,
1841 const OperandInfoTy &Stream);
1842
1843 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1844 OperandInfoTy &Width);
1845
1846 const AMDGPUOperand &findMCOperand(const OperandVector &Operands,
1847 int MCOpIdx) const;
1848
1849 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1850
1851 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1852 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1853 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1854
1855 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1856 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1857 const OperandVector &Operands) const;
1858 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1859 const OperandVector &Operands) const;
1860 SMLoc getInstLoc(const OperandVector &Operands) const;
1861
1862 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1863 const OperandVector &Operands);
1864 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1866 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1867 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1868 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1869 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1870 bool AsVOPD3);
1871 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1872 bool tryVOPD(const MCInst &Inst);
1873 bool tryVOPD3(const MCInst &Inst);
1874 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1875
1876 bool validateIntClampSupported(const MCInst &Inst);
1877 bool validateMIMGAtomicDMask(const MCInst &Inst);
1878 bool validateMIMGGatherDMask(const MCInst &Inst);
1879 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1880 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1881 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1882 bool validateMIMGD16(const MCInst &Inst);
1883 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1884 bool validateTensorR128(const MCInst &Inst);
1885 bool validateMIMGMSAA(const MCInst &Inst);
1886 bool validateOpSel(const MCInst &Inst);
1887 bool validateTrue16OpSel(const MCInst &Inst);
1888 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1889 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1890 bool validateVccOperand(MCRegister Reg) const;
1891 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1892 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1893 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1894 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1895 bool validateAGPRLdSt(const MCInst &Inst) const;
1896 bool validateVGPRAlign(const MCInst &Inst) const;
1897 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1898 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1899 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1900 bool validateDivScale(const MCInst &Inst);
1901 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1902 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1903 SMLoc IDLoc);
1904 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1905 const unsigned CPol);
1906 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1907 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1908 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1909 unsigned getConstantBusLimit(unsigned Opcode) const;
1910 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1911 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1912 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1913
1914 bool isSupportedMnemo(StringRef Mnemo,
1915 const FeatureBitset &FBS);
1916 bool isSupportedMnemo(StringRef Mnemo,
1917 const FeatureBitset &FBS,
1918 ArrayRef<unsigned> Variants);
1919 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1920
1921 bool isId(const StringRef Id) const;
1922 bool isId(const AsmToken &Token, const StringRef Id) const;
1923 bool isToken(const AsmToken::TokenKind Kind) const;
1924 StringRef getId() const;
1925 bool trySkipId(const StringRef Id);
1926 bool trySkipId(const StringRef Pref, const StringRef Id);
1927 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1928 bool trySkipToken(const AsmToken::TokenKind Kind);
1929 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1930 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1931 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1932
1933 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1934 AsmToken::TokenKind getTokenKind() const;
1935 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1936 bool parseExpr(OperandVector &Operands);
1937 StringRef getTokenStr() const;
1938 AsmToken peekToken(bool ShouldSkipSpace = true);
1939 AsmToken getToken() const;
1940 SMLoc getLoc() const;
1941 void lex();
1942
1943public:
1944 void onBeginOfFile() override;
1945 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1946
1947 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1948
1949 ParseStatus parseExpTgt(OperandVector &Operands);
1950 ParseStatus parseSendMsg(OperandVector &Operands);
1951 ParseStatus parseWaitEvent(OperandVector &Operands);
1952 ParseStatus parseInterpSlot(OperandVector &Operands);
1953 ParseStatus parseInterpAttr(OperandVector &Operands);
1954 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1955 ParseStatus parseBoolReg(OperandVector &Operands);
1956
1957 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1958 const unsigned MaxVal, const Twine &ErrMsg,
1959 SMLoc &Loc);
1960 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1961 const unsigned MinVal,
1962 const unsigned MaxVal,
1963 const StringRef ErrMsg);
1964 ParseStatus parseSwizzle(OperandVector &Operands);
1965 bool parseSwizzleOffset(int64_t &Imm);
1966 bool parseSwizzleMacro(int64_t &Imm);
1967 bool parseSwizzleQuadPerm(int64_t &Imm);
1968 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1969 bool parseSwizzleBroadcast(int64_t &Imm);
1970 bool parseSwizzleSwap(int64_t &Imm);
1971 bool parseSwizzleReverse(int64_t &Imm);
1972 bool parseSwizzleFFT(int64_t &Imm);
1973 bool parseSwizzleRotate(int64_t &Imm);
1974
1975 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1976 int64_t parseGPRIdxMacro();
1977
1978 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1979 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1980
1981 ParseStatus parseOModSI(OperandVector &Operands);
1982
1983 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1984 OptionalImmIndexMap &OptionalIdx);
1985 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1986 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1987 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1988 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1989 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1990
1991 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1992 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1993 OptionalImmIndexMap &OptionalIdx);
1994 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1995 OptionalImmIndexMap &OptionalIdx);
1996
1997 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1998 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1999 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
2000
2001 bool parseDimId(unsigned &Encoding);
2002 ParseStatus parseDim(OperandVector &Operands);
2003 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2004 ParseStatus parseDPP8(OperandVector &Operands);
2005 ParseStatus parseDPPCtrl(OperandVector &Operands);
2006 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
2007 int64_t parseDPPCtrlSel(StringRef Ctrl);
2008 int64_t parseDPPCtrlPerm();
2009 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
2010 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2011 cvtDPP(Inst, Operands, true);
2012 }
2013 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2014 bool IsDPP8 = false);
2015 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2016 cvtVOP3DPP(Inst, Operands, true);
2017 }
2018
2019 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2020 AMDGPUOperand::ImmTy Type);
2021 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2022 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2023 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2024 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2025 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2026 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2027
2028 enum class SDWAInstType : unsigned { VOP1 = 0, VOP2 = 1, VOPC = 2 };
2029
2030 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2031 SDWAInstType BasicInstType, bool SkipDstVcc = false,
2032 bool SkipSrcVcc = false);
2033
2034 ParseStatus parseEndpgm(OperandVector &Operands);
2035
2036 ParseStatus parseVOPD(OperandVector &Operands);
2037};
2038
2039} // end anonymous namespace
2040
2041// May be called with integer type with equivalent bitwidth.
2042static const fltSemantics *getFltSemantics(unsigned Size) {
2043 switch (Size) {
2044 case 4:
2045 return &APFloat::IEEEsingle();
2046 case 8:
2047 return &APFloat::IEEEdouble();
2048 case 2:
2049 return &APFloat::IEEEhalf();
2050 default:
2051 llvm_unreachable("unsupported fp type");
2052 }
2053}
2054
2056 return getFltSemantics(VT.getSizeInBits() / 8);
2057}
2058
2060 switch (OperandType) {
2061 // When floating-point immediate is used as operand of type i16, the 32-bit
2062 // representation of the constant truncated to the 16 LSBs should be used.
2077 return &APFloat::IEEEsingle();
2084 return &APFloat::IEEEdouble();
2092 return &APFloat::IEEEhalf();
2097 return &APFloat::BFloat();
2098 default:
2099 llvm_unreachable("unsupported fp type");
2100 }
2101}
2102
2103//===----------------------------------------------------------------------===//
2104// Operand
2105//===----------------------------------------------------------------------===//
2106
2107static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2108 bool Lost;
2109
2110 // Convert literal to single precision
2113 &Lost);
2114 // We allow precision lost but not overflow or underflow
2115 if (Status != APFloat::opOK &&
2116 Lost &&
2117 ((Status & APFloat::opOverflow) != 0 ||
2118 (Status & APFloat::opUnderflow) != 0)) {
2119 return false;
2120 }
2121
2122 return true;
2123}
2124
2125static bool isSafeTruncation(int64_t Val, unsigned Size) {
2126 return isUIntN(Size, Val) || isIntN(Size, Val);
2127}
2128
2129static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2130 if (VT.getScalarType() == MVT::i16)
2131 return isInlinableLiteral32(Val, HasInv2Pi);
2132
2133 if (VT.getScalarType() == MVT::f16)
2134 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2135
2136 assert(VT.getScalarType() == MVT::bf16);
2137
2138 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2139}
2140
2141bool AMDGPUOperand::isInlinableImm(MVT type) const {
2142
2143 // This is a hack to enable named inline values like
2144 // shared_base with both 32-bit and 64-bit operands.
2145 // Note that these values are defined as
2146 // 32-bit operands only.
2147 if (isInlineValue()) {
2148 return true;
2149 }
2150
2151 if (!isImmTy(ImmTyNone)) {
2152 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2153 return false;
2154 }
2155
2156 if (getModifiers().Lit != LitModifier::None)
2157 return false;
2158
2159 // TODO: We should avoid using host float here. It would be better to
2160 // check the float bit values which is what a few other places do.
2161 // We've had bot failures before due to weird NaN support on mips hosts.
2162
2163 APInt Literal(64, Imm.Val);
2164
2165 if (Imm.IsFPImm) { // We got fp literal token
2166 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2168 AsmParser->hasInv2PiInlineImm());
2169 }
2170
2171 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2172 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2173 return false;
2174
2175 if (type.getScalarSizeInBits() == 16) {
2176 bool Lost = false;
2177 switch (type.getScalarType().SimpleTy) {
2178 default:
2179 llvm_unreachable("unknown 16-bit type");
2180 case MVT::bf16:
2181 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2182 &Lost);
2183 break;
2184 case MVT::f16:
2185 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2186 &Lost);
2187 break;
2188 case MVT::i16:
2189 FPLiteral.convert(APFloatBase::IEEEsingle(),
2190 APFloat::rmNearestTiesToEven, &Lost);
2191 break;
2192 }
2193 // We need to use 32-bit representation here because when a floating-point
2194 // inline constant is used as an i16 operand, its 32-bit representation
2195 // representation will be used. We will need the 32-bit value to check if
2196 // it is FP inline constant.
2197 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2198 return isInlineableLiteralOp16(ImmVal, type,
2199 AsmParser->hasInv2PiInlineImm());
2200 }
2201
2202 // Check if single precision literal is inlinable
2204 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2205 AsmParser->hasInv2PiInlineImm());
2206 }
2207
2208 // We got int literal token.
2209 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2211 AsmParser->hasInv2PiInlineImm());
2212 }
2213
2214 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2215 return false;
2216 }
2217
2218 if (type.getScalarSizeInBits() == 16) {
2220 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2221 type, AsmParser->hasInv2PiInlineImm());
2222 }
2223
2225 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2226 AsmParser->hasInv2PiInlineImm());
2227}
2228
2229bool AMDGPUOperand::isLiteralImm(MVT type) const {
2230 // Check that this immediate can be added as literal
2231 if (!isImmTy(ImmTyNone)) {
2232 return false;
2233 }
2234
2235 bool Allow64Bit =
2236 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2237
2238 if (!Imm.IsFPImm) {
2239 // We got int literal token.
2240
2241 if (type == MVT::f64 && hasFPModifiers()) {
2242 // Cannot apply fp modifiers to int literals preserving the same semantics
2243 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2244 // disable these cases.
2245 return false;
2246 }
2247
2248 unsigned Size = type.getSizeInBits();
2249 if (Size == 64) {
2250 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2251 return true;
2252 Size = 32;
2253 }
2254
2255 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2256 // types.
2257 return isSafeTruncation(Imm.Val, Size);
2258 }
2259
2260 // We got fp literal token
2261 if (type == MVT::f64) { // Expected 64-bit fp operand
2262 // We would set low 64-bits of literal to zeroes but we accept this literals
2263 return true;
2264 }
2265
2266 if (type == MVT::i64) { // Expected 64-bit int operand
2267 // We don't allow fp literals in 64-bit integer instructions. It is
2268 // unclear how we should encode them.
2269 return false;
2270 }
2271
2272 // We allow fp literals with f16x2 operands assuming that the specified
2273 // literal goes into the lower half and the upper half is zero. We also
2274 // require that the literal may be losslessly converted to f16.
2275 //
2276 // For i16x2 operands, we assume that the specified literal is encoded as a
2277 // single-precision float. This is pretty odd, but it matches SP3 and what
2278 // happens in hardware.
2279 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2280 : (type == MVT::v2i16) ? MVT::f32
2281 : (type == MVT::v2f32) ? MVT::f32
2282 : type;
2283
2284 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2285 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2286}
2287
2288bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2289 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2290}
2291
2292bool AMDGPUOperand::isVRegWithInputMods() const {
2293 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2294 // GFX90A allows DPP on 64-bit operands.
2295 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2296 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2297}
2298
2299template <bool IsFake16>
2300bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2301 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2302 : AMDGPU::VGPR_16_Lo128RegClassID);
2303}
2304
2305template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2306 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2307 : AMDGPU::VGPR_16RegClassID);
2308}
2309
2310bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2311 if (AsmParser->isVI())
2312 return isVReg32();
2313 if (AsmParser->isGFX9Plus())
2314 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2315 return false;
2316}
2317
2318bool AMDGPUOperand::isSDWAFP16Operand() const {
2319 return isSDWAOperand(MVT::f16);
2320}
2321
2322bool AMDGPUOperand::isSDWAFP32Operand() const {
2323 return isSDWAOperand(MVT::f32);
2324}
2325
2326bool AMDGPUOperand::isSDWAInt16Operand() const {
2327 return isSDWAOperand(MVT::i16);
2328}
2329
2330bool AMDGPUOperand::isSDWAInt32Operand() const {
2331 return isSDWAOperand(MVT::i32);
2332}
2333
2334bool AMDGPUOperand::isBoolReg() const {
2335 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2336 (AsmParser->isWave32() && isSCSrc_b32()));
2337}
2338
2339uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2340{
2341 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2342 assert(Size == 2 || Size == 4 || Size == 8);
2343
2344 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2345
2346 if (Imm.Mods.Abs) {
2347 Val &= ~FpSignMask;
2348 }
2349 if (Imm.Mods.Neg) {
2350 Val ^= FpSignMask;
2351 }
2352
2353 return Val;
2354}
2355
2356void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2357 MCOpIdx = Inst.getNumOperands();
2358
2359 if (isExpr()) {
2361 return;
2362 }
2363
2364 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2365 Inst.getNumOperands())) {
2366 addLiteralImmOperand(Inst, Imm.Val,
2367 ApplyModifiers &
2368 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2369 } else {
2370 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2372 }
2373}
2374
2375void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2376 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2377 auto OpNum = Inst.getNumOperands();
2378 // Check that this operand accepts literals
2379 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2380
2381 if (ApplyModifiers) {
2382 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2383 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2384 Val = applyInputFPModifiers(Val, Size);
2385 }
2386
2387 APInt Literal(64, Val);
2388 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2389
2390 bool CanUse64BitLiterals =
2391 AsmParser->has64BitLiterals() &&
2392 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2393 LitModifier Lit = getModifiers().Lit;
2394 MCContext &Ctx = AsmParser->getContext();
2395
2396 if (Imm.IsFPImm) { // We got fp literal token
2397 switch (OpTy) {
2403 if (Lit == LitModifier::None &&
2405 AsmParser->hasInv2PiInlineImm())) {
2406 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2407 return;
2408 }
2409
2410 // Non-inlineable
2411 if (AMDGPU::isSISrcFPOperand(InstDesc,
2412 OpNum)) { // Expected 64-bit fp operand
2413 bool HasMandatoryLiteral =
2414 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2415 // For fp operands we check if low 32 bits are zeros
2416 if (Literal.getLoBits(32) != 0 &&
2417 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2418 !HasMandatoryLiteral) {
2419 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2420 Inst.getLoc(),
2421 "Can't encode literal as exact 64-bit floating-point operand. "
2422 "Low 32-bits will be set to zero");
2423 Val &= 0xffffffff00000000u;
2424 }
2425
2426 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2429 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2430 (isInt<32>(Val) || isUInt<32>(Val))) {
2431 // The floating-point operand will be verbalized as an
2432 // integer one. If that integer happens to fit 32 bits, on
2433 // re-assembling it will be intepreted as the high half of
2434 // the actual value, so we have to wrap it into lit64().
2435 Lit = LitModifier::Lit64;
2436 } else if (Lit == LitModifier::Lit) {
2437 // For FP64 operands lit() specifies the high half of the value.
2438 Val = Hi_32(Val);
2439 }
2440 }
2441 break;
2442 }
2443
2444 // We don't allow fp literals in 64-bit integer instructions. It is
2445 // unclear how we should encode them. This case should be checked earlier
2446 // in predicate methods (isLiteralImm())
2447 llvm_unreachable("fp literal in 64-bit integer instruction.");
2448
2450 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2451 (isInt<32>(Val) || isUInt<32>(Val)))
2452 Lit = LitModifier::Lit64;
2453 break;
2454
2459 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2460 Literal == 0x3fc45f306725feed) {
2461 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2462 // loss of precision. The constant represents ideomatic fp32 value of
2463 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2464 // bits. Prevent rounding below.
2465 Inst.addOperand(MCOperand::createImm(0x3e22));
2466 return;
2467 }
2468 [[fallthrough]];
2469
2491 bool lost;
2492 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2493 // Convert literal to single precision
2494 FPLiteral.convert(*getOpFltSemantics(OpTy),
2495 APFloat::rmNearestTiesToEven, &lost);
2496 // We allow precision lost but not overflow or underflow. This should be
2497 // checked earlier in isLiteralImm()
2498
2499 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2500 break;
2501 }
2502 default:
2503 llvm_unreachable("invalid operand size");
2504 }
2505
2506 if (Lit != LitModifier::None) {
2507 Inst.addOperand(
2509 } else {
2511 }
2512 return;
2513 }
2514
2515 // We got int literal token.
2516 // Only sign extend inline immediates.
2517 switch (OpTy) {
2532 break;
2533
2536 if (Lit == LitModifier::None &&
2537 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2539 return;
2540 }
2541
2542 // When the 32 MSBs are not zero (effectively means it can't be safely
2543 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2544 // the lit modifier is explicitly used, we need to truncate it to the 32
2545 // LSBs.
2546 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2547 Val = Lo_32(Val);
2548 break;
2549
2553 if (Lit == LitModifier::None &&
2554 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2556 return;
2557 }
2558
2559 // If the target doesn't support 64-bit literals, we need to use the
2560 // constant as the high 32 MSBs of a double-precision floating point value.
2561 if (!AsmParser->has64BitLiterals()) {
2562 Val = static_cast<uint64_t>(Val) << 32;
2563 } else {
2564 // Now the target does support 64-bit literals, there are two cases
2565 // where we still want to use src_literal encoding:
2566 // 1) explicitly forced by using lit modifier;
2567 // 2) the value is a valid 32-bit representation (signed or unsigned),
2568 // meanwhile not forced by lit64 modifier.
2569 if (Lit == LitModifier::Lit ||
2570 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2571 Val = static_cast<uint64_t>(Val) << 32;
2572 }
2573
2574 // For FP64 operands lit() specifies the high half of the value.
2575 if (Lit == LitModifier::Lit)
2576 Val = Hi_32(Val);
2577 break;
2578
2590 break;
2591
2593 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2594 Val <<= 32;
2595 break;
2596
2597 default:
2598 llvm_unreachable("invalid operand type");
2599 }
2600
2601 if (Lit != LitModifier::None) {
2602 Inst.addOperand(
2604 } else {
2606 }
2607}
2608
2609void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2610 MCOpIdx = Inst.getNumOperands();
2611 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2612}
2613
2614bool AMDGPUOperand::isInlineValue() const {
2615 return isRegKind() && ::isInlineValue(getReg());
2616}
2617
2618//===----------------------------------------------------------------------===//
2619// AsmParser
2620//===----------------------------------------------------------------------===//
2621
2622void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2623 // TODO: make those pre-defined variables read-only.
2624 // Currently there is none suitable machinery in the core llvm-mc for this.
2625 // MCSymbol::isRedefinable is intended for another purpose, and
2626 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2627 MCContext &Ctx = getContext();
2628 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2630}
2631
2632static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2633 if (Is == IS_VGPR) {
2634 switch (RegWidth) {
2635 default: return -1;
2636 case 32:
2637 return AMDGPU::VGPR_32RegClassID;
2638 case 64:
2639 return AMDGPU::VReg_64RegClassID;
2640 case 96:
2641 return AMDGPU::VReg_96RegClassID;
2642 case 128:
2643 return AMDGPU::VReg_128RegClassID;
2644 case 160:
2645 return AMDGPU::VReg_160RegClassID;
2646 case 192:
2647 return AMDGPU::VReg_192RegClassID;
2648 case 224:
2649 return AMDGPU::VReg_224RegClassID;
2650 case 256:
2651 return AMDGPU::VReg_256RegClassID;
2652 case 288:
2653 return AMDGPU::VReg_288RegClassID;
2654 case 320:
2655 return AMDGPU::VReg_320RegClassID;
2656 case 352:
2657 return AMDGPU::VReg_352RegClassID;
2658 case 384:
2659 return AMDGPU::VReg_384RegClassID;
2660 case 512:
2661 return AMDGPU::VReg_512RegClassID;
2662 case 1024:
2663 return AMDGPU::VReg_1024RegClassID;
2664 }
2665 } else if (Is == IS_TTMP) {
2666 switch (RegWidth) {
2667 default: return -1;
2668 case 32:
2669 return AMDGPU::TTMP_32RegClassID;
2670 case 64:
2671 return AMDGPU::TTMP_64RegClassID;
2672 case 128:
2673 return AMDGPU::TTMP_128RegClassID;
2674 case 256:
2675 return AMDGPU::TTMP_256RegClassID;
2676 case 512:
2677 return AMDGPU::TTMP_512RegClassID;
2678 }
2679 } else if (Is == IS_SGPR) {
2680 switch (RegWidth) {
2681 default: return -1;
2682 case 32:
2683 return AMDGPU::SGPR_32RegClassID;
2684 case 64:
2685 return AMDGPU::SGPR_64RegClassID;
2686 case 96:
2687 return AMDGPU::SGPR_96RegClassID;
2688 case 128:
2689 return AMDGPU::SGPR_128RegClassID;
2690 case 160:
2691 return AMDGPU::SGPR_160RegClassID;
2692 case 192:
2693 return AMDGPU::SGPR_192RegClassID;
2694 case 224:
2695 return AMDGPU::SGPR_224RegClassID;
2696 case 256:
2697 return AMDGPU::SGPR_256RegClassID;
2698 case 288:
2699 return AMDGPU::SGPR_288RegClassID;
2700 case 320:
2701 return AMDGPU::SGPR_320RegClassID;
2702 case 352:
2703 return AMDGPU::SGPR_352RegClassID;
2704 case 384:
2705 return AMDGPU::SGPR_384RegClassID;
2706 case 512:
2707 return AMDGPU::SGPR_512RegClassID;
2708 }
2709 } else if (Is == IS_AGPR) {
2710 switch (RegWidth) {
2711 default: return -1;
2712 case 32:
2713 return AMDGPU::AGPR_32RegClassID;
2714 case 64:
2715 return AMDGPU::AReg_64RegClassID;
2716 case 96:
2717 return AMDGPU::AReg_96RegClassID;
2718 case 128:
2719 return AMDGPU::AReg_128RegClassID;
2720 case 160:
2721 return AMDGPU::AReg_160RegClassID;
2722 case 192:
2723 return AMDGPU::AReg_192RegClassID;
2724 case 224:
2725 return AMDGPU::AReg_224RegClassID;
2726 case 256:
2727 return AMDGPU::AReg_256RegClassID;
2728 case 288:
2729 return AMDGPU::AReg_288RegClassID;
2730 case 320:
2731 return AMDGPU::AReg_320RegClassID;
2732 case 352:
2733 return AMDGPU::AReg_352RegClassID;
2734 case 384:
2735 return AMDGPU::AReg_384RegClassID;
2736 case 512:
2737 return AMDGPU::AReg_512RegClassID;
2738 case 1024:
2739 return AMDGPU::AReg_1024RegClassID;
2740 }
2741 }
2742 return -1;
2743}
2744
2747 .Case("exec", AMDGPU::EXEC)
2748 .Case("vcc", AMDGPU::VCC)
2749 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2750 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2751 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2752 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2753 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2754 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2755 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2756 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2757 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2758 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2759 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2760 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2761 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2762 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2763 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2764 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2765 .Case("m0", AMDGPU::M0)
2766 .Case("vccz", AMDGPU::SRC_VCCZ)
2767 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2768 .Case("execz", AMDGPU::SRC_EXECZ)
2769 .Case("src_execz", AMDGPU::SRC_EXECZ)
2770 .Case("scc", AMDGPU::SRC_SCC)
2771 .Case("src_scc", AMDGPU::SRC_SCC)
2772 .Case("tba", AMDGPU::TBA)
2773 .Case("tma", AMDGPU::TMA)
2774 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2775 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2776 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2777 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2778 .Case("vcc_lo", AMDGPU::VCC_LO)
2779 .Case("vcc_hi", AMDGPU::VCC_HI)
2780 .Case("exec_lo", AMDGPU::EXEC_LO)
2781 .Case("exec_hi", AMDGPU::EXEC_HI)
2782 .Case("tma_lo", AMDGPU::TMA_LO)
2783 .Case("tma_hi", AMDGPU::TMA_HI)
2784 .Case("tba_lo", AMDGPU::TBA_LO)
2785 .Case("tba_hi", AMDGPU::TBA_HI)
2786 .Case("pc", AMDGPU::PC_REG)
2787 .Case("null", AMDGPU::SGPR_NULL)
2788 .Default(AMDGPU::NoRegister);
2789}
2790
2791bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2792 SMLoc &EndLoc, bool RestoreOnFailure) {
2793 auto R = parseRegister();
2794 if (!R) return true;
2795 assert(R->isReg());
2796 RegNo = R->getReg();
2797 StartLoc = R->getStartLoc();
2798 EndLoc = R->getEndLoc();
2799 return false;
2800}
2801
2802bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2803 SMLoc &EndLoc) {
2804 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2805}
2806
2807ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2808 SMLoc &EndLoc) {
2809 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2810 bool PendingErrors = getParser().hasPendingError();
2811 getParser().clearPendingErrors();
2812 if (PendingErrors)
2813 return ParseStatus::Failure;
2814 if (Result)
2815 return ParseStatus::NoMatch;
2816 return ParseStatus::Success;
2817}
2818
2819bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2820 RegisterKind RegKind,
2821 MCRegister Reg1,
2822 RegisterKind RegKind1, SMLoc Loc) {
2823 // Allow VCC_LO/HI at the end of SGPR lists.
2824 if (RegKind == IS_SGPR) {
2825 unsigned RegIdx = (Reg - AMDGPU::SGPR0) + RegWidth / 32;
2826 if ((RegIdx == 106 && Reg1 == AMDGPU::VCC_LO) ||
2827 (RegIdx == 107 && Reg1 == AMDGPU::VCC_HI)) {
2828 RegWidth += 32;
2829 return true;
2830 }
2831 }
2832
2833 if (RegKind != RegKind1) {
2834 Error(Loc, "registers in a list must be of the same kind");
2835 return MCRegister();
2836 }
2837
2838 switch (RegKind) {
2839 case IS_SPECIAL:
2840 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2841 Reg = AMDGPU::EXEC;
2842 RegWidth = 64;
2843 return true;
2844 }
2845 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2846 Reg = AMDGPU::FLAT_SCR;
2847 RegWidth = 64;
2848 return true;
2849 }
2850 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2851 Reg = AMDGPU::XNACK_MASK;
2852 RegWidth = 64;
2853 return true;
2854 }
2855 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2856 Reg = AMDGPU::VCC;
2857 RegWidth = 64;
2858 return true;
2859 }
2860 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2861 Reg = AMDGPU::TBA;
2862 RegWidth = 64;
2863 return true;
2864 }
2865 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2866 Reg = AMDGPU::TMA;
2867 RegWidth = 64;
2868 return true;
2869 }
2870 Error(Loc, "register does not fit in the list");
2871 return false;
2872 case IS_VGPR:
2873 case IS_SGPR:
2874 case IS_AGPR:
2875 case IS_TTMP:
2876 if (Reg1 != Reg + RegWidth / 32) {
2877 Error(Loc, "registers in a list must have consecutive indices");
2878 return false;
2879 }
2880 RegWidth += 32;
2881 return true;
2882 default:
2883 llvm_unreachable("unexpected register kind");
2884 }
2885}
2886
2887struct RegInfo {
2889 RegisterKind Kind;
2890};
2891
2892static constexpr RegInfo RegularRegisters[] = {
2893 {{"v"}, IS_VGPR},
2894 {{"s"}, IS_SGPR},
2895 {{"ttmp"}, IS_TTMP},
2896 {{"acc"}, IS_AGPR},
2897 {{"a"}, IS_AGPR},
2898};
2899
2900static bool isRegularReg(RegisterKind Kind) {
2901 return Kind == IS_VGPR ||
2902 Kind == IS_SGPR ||
2903 Kind == IS_TTMP ||
2904 Kind == IS_AGPR;
2905}
2906
2908 for (const RegInfo &Reg : RegularRegisters)
2909 if (Str.starts_with(Reg.Name))
2910 return &Reg;
2911 return nullptr;
2912}
2913
2914static bool getRegNum(StringRef Str, unsigned& Num) {
2915 return !Str.getAsInteger(10, Num);
2916}
2917
2918bool
2919AMDGPUAsmParser::isRegister(const AsmToken &Token,
2920 const AsmToken &NextToken) const {
2921
2922 // A list of consecutive registers: [s0,s1,s2,s3]
2923 if (Token.is(AsmToken::LBrac))
2924 return true;
2925
2926 if (!Token.is(AsmToken::Identifier))
2927 return false;
2928
2929 // A single register like s0 or a range of registers like s[0:1]
2930
2931 StringRef Str = Token.getString();
2932 const RegInfo *Reg = getRegularRegInfo(Str);
2933 if (Reg) {
2934 StringRef RegName = Reg->Name;
2935 StringRef RegSuffix = Str.substr(RegName.size());
2936 if (!RegSuffix.empty()) {
2937 RegSuffix.consume_back(".l");
2938 RegSuffix.consume_back(".h");
2939 unsigned Num;
2940 // A single register with an index: rXX
2941 if (getRegNum(RegSuffix, Num))
2942 return true;
2943 } else {
2944 // A range of registers: r[XX:YY].
2945 if (NextToken.is(AsmToken::LBrac))
2946 return true;
2947 }
2948 }
2949
2950 return getSpecialRegForName(Str).isValid();
2951}
2952
2953bool
2954AMDGPUAsmParser::isRegister()
2955{
2956 return isRegister(getToken(), peekToken());
2957}
2958
2959MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2960 unsigned SubReg, unsigned RegWidth,
2961 SMLoc Loc) {
2962 assert(isRegularReg(RegKind));
2963
2964 unsigned AlignSize = 1;
2965 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2966 // SGPR and TTMP registers must be aligned.
2967 // Max required alignment is 4 dwords.
2968 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2969 }
2970
2971 if (RegNum % AlignSize != 0) {
2972 Error(Loc, "invalid register alignment");
2973 return MCRegister();
2974 }
2975
2976 unsigned RegIdx = RegNum / AlignSize;
2977 int RCID = getRegClass(RegKind, RegWidth);
2978 if (RCID == -1) {
2979 Error(Loc, "invalid or unsupported register size");
2980 return MCRegister();
2981 }
2982
2983 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2984 const MCRegisterClass RC = TRI->getRegClass(RCID);
2985 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2986 Error(Loc, "register index is out of range");
2987 return AMDGPU::NoRegister;
2988 }
2989
2990 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2991 Error(Loc, "register index is out of range");
2992 return MCRegister();
2993 }
2994
2995 MCRegister Reg = RC.getRegister(RegIdx);
2996
2997 if (SubReg) {
2998 Reg = TRI->getSubReg(Reg, SubReg);
2999
3000 // Currently all regular registers have their .l and .h subregisters, so
3001 // we should never need to generate an error here.
3002 assert(Reg && "Invalid subregister!");
3003 }
3004
3005 return Reg;
3006}
3007
3008bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
3009 unsigned &SubReg) {
3010 int64_t RegLo, RegHi;
3011 if (!skipToken(AsmToken::LBrac, "missing register index"))
3012 return false;
3013
3014 SMLoc FirstIdxLoc = getLoc();
3015 SMLoc SecondIdxLoc;
3016
3017 if (!parseExpr(RegLo))
3018 return false;
3019
3020 if (trySkipToken(AsmToken::Colon)) {
3021 SecondIdxLoc = getLoc();
3022 if (!parseExpr(RegHi))
3023 return false;
3024 } else {
3025 RegHi = RegLo;
3026 }
3027
3028 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3029 return false;
3030
3031 if (!isUInt<32>(RegLo)) {
3032 Error(FirstIdxLoc, "invalid register index");
3033 return false;
3034 }
3035
3036 if (!isUInt<32>(RegHi)) {
3037 Error(SecondIdxLoc, "invalid register index");
3038 return false;
3039 }
3040
3041 if (RegLo > RegHi) {
3042 Error(FirstIdxLoc, "first register index should not exceed second index");
3043 return false;
3044 }
3045
3046 if (RegHi == RegLo) {
3047 StringRef RegSuffix = getTokenStr();
3048 if (RegSuffix == ".l") {
3049 SubReg = AMDGPU::lo16;
3050 lex();
3051 } else if (RegSuffix == ".h") {
3052 SubReg = AMDGPU::hi16;
3053 lex();
3054 }
3055 }
3056
3057 Num = static_cast<unsigned>(RegLo);
3058 RegWidth = 32 * ((RegHi - RegLo) + 1);
3059
3060 return true;
3061}
3062
3063MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3064 unsigned &RegNum,
3065 unsigned &RegWidth,
3066 SmallVectorImpl<AsmToken> &Tokens) {
3067 assert(isToken(AsmToken::Identifier));
3068 MCRegister Reg = getSpecialRegForName(getTokenStr());
3069 if (Reg) {
3070 RegNum = 0;
3071 RegWidth = 32;
3072 RegKind = IS_SPECIAL;
3073 Tokens.push_back(getToken());
3074 lex(); // skip register name
3075 }
3076 return Reg;
3077}
3078
3079MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3080 unsigned &RegNum,
3081 unsigned &RegWidth,
3082 SmallVectorImpl<AsmToken> &Tokens) {
3083 assert(isToken(AsmToken::Identifier));
3084 StringRef RegName = getTokenStr();
3085 auto Loc = getLoc();
3086
3087 const RegInfo *RI = getRegularRegInfo(RegName);
3088 if (!RI) {
3089 Error(Loc, "invalid register name");
3090 return MCRegister();
3091 }
3092
3093 Tokens.push_back(getToken());
3094 lex(); // skip register name
3095
3096 RegKind = RI->Kind;
3097 StringRef RegSuffix = RegName.substr(RI->Name.size());
3098 unsigned SubReg = NoSubRegister;
3099 bool IsRange = false;
3100 if (!RegSuffix.empty()) {
3101 if (RegSuffix.consume_back(".l"))
3102 SubReg = AMDGPU::lo16;
3103 else if (RegSuffix.consume_back(".h"))
3104 SubReg = AMDGPU::hi16;
3105
3106 // Single 32-bit register: vXX.
3107 if (!getRegNum(RegSuffix, RegNum)) {
3108 Error(Loc, "invalid register index");
3109 return MCRegister();
3110 }
3111 RegWidth = 32;
3112 } else {
3113 // Range of registers: v[XX:YY]. ":YY" is optional.
3114 IsRange = true;
3115 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3116 return MCRegister();
3117 }
3118
3119 // Do not allow vcc_lo/hi be referred as s106/107.
3120 MCRegister Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3121 const MCRegisterInfo &TRI = *getContext().getRegisterInfo();
3122 if (RegKind == IS_SGPR && IsRange
3123 ? (TRI.isSubRegister(Reg, VCC_LO) || TRI.isSubRegister(Reg, VCC_HI))
3124 : (Reg == VCC_LO || Reg == VCC_HI)) {
3125 Error(Loc, "register index is out of range");
3126 return MCRegister();
3127 }
3128
3129 return Reg;
3130}
3131
3132MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3133 unsigned &RegNum, unsigned &RegWidth,
3134 SmallVectorImpl<AsmToken> &Tokens) {
3135 MCRegister Reg;
3136 auto ListLoc = getLoc();
3137
3138 if (!skipToken(AsmToken::LBrac,
3139 "expected a register or a list of registers")) {
3140 return MCRegister();
3141 }
3142
3143 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3144
3145 auto Loc = getLoc();
3146 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3147 return MCRegister();
3148 if (RegWidth != 32) {
3149 Error(Loc, "expected a single 32-bit register");
3150 return MCRegister();
3151 }
3152
3153 for (; trySkipToken(AsmToken::Comma); ) {
3154 RegisterKind NextRegKind;
3155 MCRegister NextReg;
3156 unsigned NextRegNum, NextRegWidth;
3157 Loc = getLoc();
3158
3159 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3160 NextRegNum, NextRegWidth,
3161 Tokens)) {
3162 return MCRegister();
3163 }
3164 if (NextRegWidth != 32) {
3165 Error(Loc, "expected a single 32-bit register");
3166 return MCRegister();
3167 }
3168 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, NextRegKind,
3169 Loc))
3170 return MCRegister();
3171 }
3172
3173 if (!skipToken(AsmToken::RBrac,
3174 "expected a comma or a closing square bracket")) {
3175 return MCRegister();
3176 }
3177
3178 if (isRegularReg(RegKind))
3179 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3180
3181 return Reg;
3182}
3183
3184bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3185 MCRegister &Reg, unsigned &RegNum,
3186 unsigned &RegWidth,
3187 SmallVectorImpl<AsmToken> &Tokens) {
3188 auto Loc = getLoc();
3189 Reg = MCRegister();
3190
3191 if (isToken(AsmToken::Identifier)) {
3192 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3193 if (!Reg)
3194 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3195 } else {
3196 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3197 }
3198
3199 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3200 if (!Reg) {
3201 assert(Parser.hasPendingError());
3202 return false;
3203 }
3204
3205 if (!subtargetHasRegister(*TRI, Reg)) {
3206 if (Reg == AMDGPU::SGPR_NULL) {
3207 Error(Loc, "'null' operand is not supported on this GPU");
3208 } else {
3210 " register not available on this GPU");
3211 }
3212 return false;
3213 }
3214
3215 return true;
3216}
3217
3218bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3219 MCRegister &Reg, unsigned &RegNum,
3220 unsigned &RegWidth,
3221 bool RestoreOnFailure /*=false*/) {
3222 Reg = MCRegister();
3223
3225 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3226 if (RestoreOnFailure) {
3227 while (!Tokens.empty()) {
3228 getLexer().UnLex(Tokens.pop_back_val());
3229 }
3230 }
3231 return true;
3232 }
3233 return false;
3234}
3235
3236std::optional<StringRef>
3237AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3238 switch (RegKind) {
3239 case IS_VGPR:
3240 return StringRef(".amdgcn.next_free_vgpr");
3241 case IS_SGPR:
3242 return StringRef(".amdgcn.next_free_sgpr");
3243 default:
3244 return std::nullopt;
3245 }
3246}
3247
3248void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3249 auto SymbolName = getGprCountSymbolName(RegKind);
3250 assert(SymbolName && "initializing invalid register kind");
3251 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3253 Sym->setRedefinable(true);
3254}
3255
3256bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3257 unsigned DwordRegIndex,
3258 unsigned RegWidth) {
3259 // Symbols are only defined for GCN targets
3260 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3261 return true;
3262
3263 auto SymbolName = getGprCountSymbolName(RegKind);
3264 if (!SymbolName)
3265 return true;
3266 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3267
3268 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3269 int64_t OldCount;
3270
3271 if (!Sym->isVariable())
3272 return !Error(getLoc(),
3273 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3274 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3275 return !Error(
3276 getLoc(),
3277 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3278
3279 if (OldCount <= NewMax)
3281
3282 return true;
3283}
3284
3285std::unique_ptr<AMDGPUOperand>
3286AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3287 const auto &Tok = getToken();
3288 SMLoc StartLoc = Tok.getLoc();
3289 SMLoc EndLoc = Tok.getEndLoc();
3290 RegisterKind RegKind;
3291 MCRegister Reg;
3292 unsigned RegNum, RegWidth;
3293
3294 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3295 return nullptr;
3296 }
3297 if (isHsaAbi(getSTI())) {
3298 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3299 return nullptr;
3300 } else
3301 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3302 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3303}
3304
3305ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3306 bool HasSP3AbsModifier, LitModifier Lit) {
3307 // TODO: add syntactic sugar for 1/(2*PI)
3308
3309 if (isRegister() || isModifier())
3310 return ParseStatus::NoMatch;
3311
3312 if (Lit == LitModifier::None) {
3313 if (trySkipId("lit"))
3314 Lit = LitModifier::Lit;
3315 else if (trySkipId("lit64"))
3316 Lit = LitModifier::Lit64;
3317
3318 if (Lit != LitModifier::None) {
3319 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3320 return ParseStatus::Failure;
3321 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3322 if (S.isSuccess() &&
3323 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3324 return ParseStatus::Failure;
3325 return S;
3326 }
3327 }
3328
3329 const auto& Tok = getToken();
3330 const auto& NextTok = peekToken();
3331 bool IsReal = Tok.is(AsmToken::Real);
3332 SMLoc S = getLoc();
3333 bool Negate = false;
3334
3335 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3336 lex();
3337 IsReal = true;
3338 Negate = true;
3339 }
3340
3341 AMDGPUOperand::Modifiers Mods;
3342 Mods.Lit = Lit;
3343
3344 if (IsReal) {
3345 // Floating-point expressions are not supported.
3346 // Can only allow floating-point literals with an
3347 // optional sign.
3348
3349 StringRef Num = getTokenStr();
3350 lex();
3351
3352 APFloat RealVal(APFloat::IEEEdouble());
3353 auto roundMode = APFloat::rmNearestTiesToEven;
3354 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3355 return ParseStatus::Failure;
3356 if (Negate)
3357 RealVal.changeSign();
3358
3359 Operands.push_back(
3360 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3361 AMDGPUOperand::ImmTyNone, true));
3362 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3363 Op.setModifiers(Mods);
3364
3365 return ParseStatus::Success;
3366
3367 } else {
3368 int64_t IntVal;
3369 const MCExpr *Expr;
3370 SMLoc S = getLoc();
3371
3372 if (HasSP3AbsModifier) {
3373 // This is a workaround for handling expressions
3374 // as arguments of SP3 'abs' modifier, for example:
3375 // |1.0|
3376 // |-1|
3377 // |1+x|
3378 // This syntax is not compatible with syntax of standard
3379 // MC expressions (due to the trailing '|').
3380 SMLoc EndLoc;
3381 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3382 return ParseStatus::Failure;
3383 } else {
3384 if (Parser.parseExpression(Expr))
3385 return ParseStatus::Failure;
3386 }
3387
3388 if (Expr->evaluateAsAbsolute(IntVal)) {
3389 if (Lit == LitModifier::Lit && !isInt<32>(IntVal) && !isUInt<32>(IntVal))
3390 return Error(S, "literal value out of range");
3391 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3392 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3393 Op.setModifiers(Mods);
3394 } else {
3395 if (Lit != LitModifier::None)
3396 return ParseStatus::NoMatch;
3397 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3398 }
3399
3400 return ParseStatus::Success;
3401 }
3402
3403 return ParseStatus::NoMatch;
3404}
3405
3406ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3407 if (!isRegister())
3408 return ParseStatus::NoMatch;
3409
3410 if (auto R = parseRegister()) {
3411 assert(R->isReg());
3412 Operands.push_back(std::move(R));
3413 return ParseStatus::Success;
3414 }
3415 return ParseStatus::Failure;
3416}
3417
3418ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3419 bool HasSP3AbsMod, LitModifier Lit) {
3420 ParseStatus Res = parseReg(Operands);
3421 if (!Res.isNoMatch())
3422 return Res;
3423 if (isModifier())
3424 return ParseStatus::NoMatch;
3425 return parseImm(Operands, HasSP3AbsMod, Lit);
3426}
3427
3428bool
3429AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3430 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3431 const auto &str = Token.getString();
3432 return str == "abs" || str == "neg" || str == "sext";
3433 }
3434 return false;
3435}
3436
3437bool
3438AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3439 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3440}
3441
3442bool
3443AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3444 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3445}
3446
3447bool
3448AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3449 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3450}
3451
3452// Check if this is an operand modifier or an opcode modifier
3453// which may look like an expression but it is not. We should
3454// avoid parsing these modifiers as expressions. Currently
3455// recognized sequences are:
3456// |...|
3457// abs(...)
3458// neg(...)
3459// sext(...)
3460// -reg
3461// -|...|
3462// -abs(...)
3463// name:...
3464//
3465bool
3466AMDGPUAsmParser::isModifier() {
3467
3468 AsmToken Tok = getToken();
3469 AsmToken NextToken[2];
3470 peekTokens(NextToken);
3471
3472 return isOperandModifier(Tok, NextToken[0]) ||
3473 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3474 isOpcodeModifierWithVal(Tok, NextToken[0]);
3475}
3476
3477// Check if the current token is an SP3 'neg' modifier.
3478// Currently this modifier is allowed in the following context:
3479//
3480// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3481// 2. Before an 'abs' modifier: -abs(...)
3482// 3. Before an SP3 'abs' modifier: -|...|
3483//
3484// In all other cases "-" is handled as a part
3485// of an expression that follows the sign.
3486//
3487// Note: When "-" is followed by an integer literal,
3488// this is interpreted as integer negation rather
3489// than a floating-point NEG modifier applied to N.
3490// Beside being contr-intuitive, such use of floating-point
3491// NEG modifier would have resulted in different meaning
3492// of integer literals used with VOP1/2/C and VOP3,
3493// for example:
3494// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3495// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3496// Negative fp literals with preceding "-" are
3497// handled likewise for uniformity
3498//
3499bool
3500AMDGPUAsmParser::parseSP3NegModifier() {
3501
3502 AsmToken NextToken[2];
3503 peekTokens(NextToken);
3504
3505 if (isToken(AsmToken::Minus) &&
3506 (isRegister(NextToken[0], NextToken[1]) ||
3507 NextToken[0].is(AsmToken::Pipe) ||
3508 isId(NextToken[0], "abs"))) {
3509 lex();
3510 return true;
3511 }
3512
3513 return false;
3514}
3515
3516ParseStatus
3517AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3518 bool AllowImm) {
3519 bool Neg, SP3Neg;
3520 bool Abs, SP3Abs;
3521 SMLoc Loc;
3522
3523 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3524 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3525 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3526
3527 SP3Neg = parseSP3NegModifier();
3528
3529 Loc = getLoc();
3530 Neg = trySkipId("neg");
3531 if (Neg && SP3Neg)
3532 return Error(Loc, "expected register or immediate");
3533 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3534 return ParseStatus::Failure;
3535
3536 Abs = trySkipId("abs");
3537 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3538 return ParseStatus::Failure;
3539
3540 LitModifier Lit = LitModifier::None;
3541 if (trySkipId("lit")) {
3542 Lit = LitModifier::Lit;
3543 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3544 return ParseStatus::Failure;
3545 } else if (trySkipId("lit64")) {
3546 Lit = LitModifier::Lit64;
3547 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3548 return ParseStatus::Failure;
3549 if (!has64BitLiterals())
3550 return Error(Loc, "lit64 is not supported on this GPU");
3551 }
3552
3553 Loc = getLoc();
3554 SP3Abs = trySkipToken(AsmToken::Pipe);
3555 if (Abs && SP3Abs)
3556 return Error(Loc, "expected register or immediate");
3557
3558 ParseStatus Res;
3559 if (AllowImm) {
3560 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3561 } else {
3562 Res = parseReg(Operands);
3563 }
3564 if (!Res.isSuccess())
3565 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3567 : Res;
3568
3569 if (Lit != LitModifier::None && !Operands.back()->isImm())
3570 Error(Loc, "expected immediate with lit modifier");
3571
3572 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3573 return ParseStatus::Failure;
3574 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3575 return ParseStatus::Failure;
3576 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3577 return ParseStatus::Failure;
3578 if (Lit != LitModifier::None &&
3579 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3580 return ParseStatus::Failure;
3581
3582 AMDGPUOperand::Modifiers Mods;
3583 Mods.Abs = Abs || SP3Abs;
3584 Mods.Neg = Neg || SP3Neg;
3585 Mods.Lit = Lit;
3586
3587 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3588 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3589 if (Op.isExpr())
3590 return Error(Op.getStartLoc(), "expected an absolute expression");
3591 Op.setModifiers(Mods);
3592 }
3593 return ParseStatus::Success;
3594}
3595
3596ParseStatus
3597AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3598 bool AllowImm) {
3599 bool Sext = trySkipId("sext");
3600 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3601 return ParseStatus::Failure;
3602
3603 ParseStatus Res;
3604 if (AllowImm) {
3605 Res = parseRegOrImm(Operands);
3606 } else {
3607 Res = parseReg(Operands);
3608 }
3609 if (!Res.isSuccess())
3610 return Sext ? ParseStatus::Failure : Res;
3611
3612 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3613 return ParseStatus::Failure;
3614
3615 AMDGPUOperand::Modifiers Mods;
3616 Mods.Sext = Sext;
3617
3618 if (Mods.hasIntModifiers()) {
3619 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3620 if (Op.isExpr())
3621 return Error(Op.getStartLoc(), "expected an absolute expression");
3622 Op.setModifiers(Mods);
3623 }
3624
3625 return ParseStatus::Success;
3626}
3627
3628ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3629 return parseRegOrImmWithFPInputMods(Operands, false);
3630}
3631
3632ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3633 return parseRegOrImmWithIntInputMods(Operands, false);
3634}
3635
3636ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3637 auto Loc = getLoc();
3638 if (trySkipId("off")) {
3639 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3640 AMDGPUOperand::ImmTyOff, false));
3641 return ParseStatus::Success;
3642 }
3643
3644 if (!isRegister())
3645 return ParseStatus::NoMatch;
3646
3647 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3648 if (Reg) {
3649 Operands.push_back(std::move(Reg));
3650 return ParseStatus::Success;
3651 }
3652
3653 return ParseStatus::Failure;
3654}
3655
3656unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3657 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3658
3659 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3660 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3661 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3662 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3663 return Match_InvalidOperand;
3664
3665 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3666 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3667 // v_mac_f32/16 allow only dst_sel == DWORD;
3668 auto OpNum =
3669 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3670 const auto &Op = Inst.getOperand(OpNum);
3671 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3672 return Match_InvalidOperand;
3673 }
3674 }
3675
3676 // Asm can first try to match VOPD or VOPD3. By failing early here with
3677 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3678 // Checking later during validateInstruction does not give a chance to retry
3679 // parsing as a different encoding.
3680 if (tryAnotherVOPDEncoding(Inst))
3681 return Match_InvalidOperand;
3682
3683 return Match_Success;
3684}
3685
3695
3696// What asm variants we should check
3697ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3698 if (isForcedDPP() && isForcedVOP3()) {
3699 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3700 return ArrayRef(Variants);
3701 }
3702 if (getForcedEncodingSize() == 32) {
3703 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3704 return ArrayRef(Variants);
3705 }
3706
3707 if (isForcedVOP3()) {
3708 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3709 return ArrayRef(Variants);
3710 }
3711
3712 if (isForcedSDWA()) {
3713 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3715 return ArrayRef(Variants);
3716 }
3717
3718 if (isForcedDPP()) {
3719 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3720 return ArrayRef(Variants);
3721 }
3722
3723 return getAllVariants();
3724}
3725
3726StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3727 if (isForcedDPP() && isForcedVOP3())
3728 return "e64_dpp";
3729
3730 if (getForcedEncodingSize() == 32)
3731 return "e32";
3732
3733 if (isForcedVOP3())
3734 return "e64";
3735
3736 if (isForcedSDWA())
3737 return "sdwa";
3738
3739 if (isForcedDPP())
3740 return "dpp";
3741
3742 return "";
3743}
3744
3745MCRegister
3746AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3747 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3748 for (MCPhysReg Reg : Desc.implicit_uses()) {
3749 switch (Reg) {
3750 case AMDGPU::FLAT_SCR:
3751 case AMDGPU::VCC:
3752 case AMDGPU::VCC_LO:
3753 case AMDGPU::VCC_HI:
3754 case AMDGPU::M0:
3755 return Reg;
3756 default:
3757 break;
3758 }
3759 }
3760 return MCRegister();
3761}
3762
3763// NB: This code is correct only when used to check constant
3764// bus limitations because GFX7 support no f16 inline constants.
3765// Note that there are no cases when a GFX7 opcode violates
3766// constant bus limitations due to the use of an f16 constant.
3767bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3768 unsigned OpIdx) const {
3769 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3770
3773 return false;
3774 }
3775
3776 const MCOperand &MO = Inst.getOperand(OpIdx);
3777
3778 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3779 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3780
3781 switch (OpSize) { // expected operand size
3782 case 8:
3783 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3784 case 4:
3785 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3786 case 2: {
3787 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3790 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3791
3795
3799
3802
3806
3809 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3810
3813 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3814
3816 return false;
3817
3818 llvm_unreachable("invalid operand type");
3819 }
3820 default:
3821 llvm_unreachable("invalid operand size");
3822 }
3823}
3824
3825unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3826 if (!isGFX10Plus())
3827 return 1;
3828
3829 switch (Opcode) {
3830 // 64-bit shift instructions can use only one scalar value input
3831 case AMDGPU::V_LSHLREV_B64_e64:
3832 case AMDGPU::V_LSHLREV_B64_gfx10:
3833 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3834 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3835 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3836 case AMDGPU::V_LSHRREV_B64_e64:
3837 case AMDGPU::V_LSHRREV_B64_gfx10:
3838 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3839 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3840 case AMDGPU::V_ASHRREV_I64_e64:
3841 case AMDGPU::V_ASHRREV_I64_gfx10:
3842 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3843 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3844 case AMDGPU::V_LSHL_B64_e64:
3845 case AMDGPU::V_LSHR_B64_e64:
3846 case AMDGPU::V_ASHR_I64_e64:
3847 return 1;
3848 default:
3849 return 2;
3850 }
3851}
3852
3853constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3855
3856// Get regular operand indices in the same order as specified
3857// in the instruction (but append mandatory literals to the end).
3859 bool AddMandatoryLiterals = false) {
3860
3861 int16_t ImmIdx =
3862 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3863
3864 if (isVOPD(Opcode)) {
3865 int16_t ImmXIdx =
3866 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3867
3868 return {getNamedOperandIdx(Opcode, OpName::src0X),
3869 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3870 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3871 getNamedOperandIdx(Opcode, OpName::src0Y),
3872 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3873 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3874 ImmXIdx,
3875 ImmIdx};
3876 }
3877
3878 return {getNamedOperandIdx(Opcode, OpName::src0),
3879 getNamedOperandIdx(Opcode, OpName::src1),
3880 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3881}
3882
3883bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3884 const MCOperand &MO = Inst.getOperand(OpIdx);
3885 if (MO.isImm())
3886 return !isInlineConstant(Inst, OpIdx);
3887 if (MO.isReg()) {
3888 auto Reg = MO.getReg();
3889 if (!Reg)
3890 return false;
3891 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3892 auto PReg = mc2PseudoReg(Reg);
3893 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3894 }
3895 return true;
3896}
3897
3898// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3899// Writelane is special in that it can use SGPR and M0 (which would normally
3900// count as using the constant bus twice - but in this case it is allowed since
3901// the lane selector doesn't count as a use of the constant bus). However, it is
3902// still required to abide by the 1 SGPR rule.
3903static bool checkWriteLane(const MCInst &Inst) {
3904 const unsigned Opcode = Inst.getOpcode();
3905 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3906 return false;
3907 const MCOperand &LaneSelOp = Inst.getOperand(2);
3908 if (!LaneSelOp.isReg())
3909 return false;
3910 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3911 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3912}
3913
3914bool AMDGPUAsmParser::validateConstantBusLimitations(
3915 const MCInst &Inst, const OperandVector &Operands) {
3916 const unsigned Opcode = Inst.getOpcode();
3917 const MCInstrDesc &Desc = MII.get(Opcode);
3918 MCRegister LastSGPR;
3919 unsigned ConstantBusUseCount = 0;
3920 unsigned NumLiterals = 0;
3921 unsigned LiteralSize;
3922
3923 if (!(Desc.TSFlags &
3926 !isVOPD(Opcode))
3927 return true;
3928
3929 if (checkWriteLane(Inst))
3930 return true;
3931
3932 // Check special imm operands (used by madmk, etc)
3933 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3934 ++NumLiterals;
3935 LiteralSize = 4;
3936 }
3937
3938 SmallDenseSet<MCRegister> SGPRsUsed;
3939 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3940 if (SGPRUsed) {
3941 SGPRsUsed.insert(SGPRUsed);
3942 ++ConstantBusUseCount;
3943 }
3944
3945 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3946
3947 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3948
3949 for (int OpIdx : OpIndices) {
3950 if (OpIdx == -1)
3951 continue;
3952
3953 const MCOperand &MO = Inst.getOperand(OpIdx);
3954 if (usesConstantBus(Inst, OpIdx)) {
3955 if (MO.isReg()) {
3956 LastSGPR = mc2PseudoReg(MO.getReg());
3957 // Pairs of registers with a partial intersections like these
3958 // s0, s[0:1]
3959 // flat_scratch_lo, flat_scratch
3960 // flat_scratch_lo, flat_scratch_hi
3961 // are theoretically valid but they are disabled anyway.
3962 // Note that this code mimics SIInstrInfo::verifyInstruction
3963 if (SGPRsUsed.insert(LastSGPR).second) {
3964 ++ConstantBusUseCount;
3965 }
3966 } else { // Expression or a literal
3967
3968 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3969 continue; // special operand like VINTERP attr_chan
3970
3971 // An instruction may use only one literal.
3972 // This has been validated on the previous step.
3973 // See validateVOPLiteral.
3974 // This literal may be used as more than one operand.
3975 // If all these operands are of the same size,
3976 // this literal counts as one scalar value.
3977 // Otherwise it counts as 2 scalar values.
3978 // See "GFX10 Shader Programming", section 3.6.2.3.
3979
3981 if (Size < 4)
3982 Size = 4;
3983
3984 if (NumLiterals == 0) {
3985 NumLiterals = 1;
3986 LiteralSize = Size;
3987 } else if (LiteralSize != Size) {
3988 NumLiterals = 2;
3989 }
3990 }
3991 }
3992
3993 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3994 Error(getOperandLoc(Operands, OpIdx),
3995 "invalid operand (violates constant bus restrictions)");
3996 return false;
3997 }
3998 }
3999 return true;
4000}
4001
4002std::optional<unsigned>
4003AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
4004
4005 const unsigned Opcode = Inst.getOpcode();
4006 if (!isVOPD(Opcode))
4007 return {};
4008
4009 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4010
4011 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
4012 const MCOperand &Opr = Inst.getOperand(OperandIdx);
4013 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
4014 ? Opr.getReg()
4015 : MCRegister();
4016 };
4017
4018 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
4019 // source-cache.
4020 bool SkipSrc =
4021 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
4022 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4023 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4024 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
4025 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
4026 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4027 bool AllowSameVGPR = isGFX12Plus();
4028
4029 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4030 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4031 int I = getNamedOperandIdx(Opcode, OpName);
4032 const MCOperand &Op = Inst.getOperand(I);
4033 if (!Op.isImm())
4034 continue;
4035 int64_t Imm = Op.getImm();
4036 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
4037 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
4038 return (unsigned)I;
4039 }
4040
4041 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4042 OpName::vsrc2Y, OpName::imm}) {
4043 int I = getNamedOperandIdx(Opcode, OpName);
4044 if (I == -1)
4045 continue;
4046 const MCOperand &Op = Inst.getOperand(I);
4047 if (Op.isImm())
4048 return (unsigned)I;
4049 }
4050 }
4051
4052 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4053 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4054 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4055
4056 return InvalidCompOprIdx;
4057}
4058
4059bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4060 const OperandVector &Operands) {
4061
4062 unsigned Opcode = Inst.getOpcode();
4063 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4064
4065 if (AsVOPD3) {
4066 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4067 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4068 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4069 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4070 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4071 }
4072 }
4073
4074 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4075 if (!InvalidCompOprIdx.has_value())
4076 return true;
4077
4078 auto CompOprIdx = *InvalidCompOprIdx;
4079 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4080 auto ParsedIdx =
4081 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4082 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4083 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4084
4085 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4086 if (CompOprIdx == VOPD::Component::DST) {
4087 if (AsVOPD3)
4088 Error(Loc, "dst registers must be distinct");
4089 else
4090 Error(Loc, "one dst register must be even and the other odd");
4091 } else {
4092 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4093 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4094 " operands must use different VGPR banks");
4095 }
4096
4097 return false;
4098}
4099
4100// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4101// potentially used as VOPD3 with the same operands.
4102bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4103 // First check if it fits VOPD
4104 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4105 if (!InvalidCompOprIdx.has_value())
4106 return false;
4107
4108 // Then if it fits VOPD3
4109 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4110 if (InvalidCompOprIdx.has_value()) {
4111 // If failed operand is dst it is better to show error about VOPD3
4112 // instruction as it has more capabilities and error message will be
4113 // more informative. If the dst is not legal for VOPD3, then it is not
4114 // legal for VOPD either.
4115 if (*InvalidCompOprIdx == VOPD::Component::DST)
4116 return true;
4117
4118 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4119 // with a conflict in tied implicit src2 of fmac and no asm operand to
4120 // to point to.
4121 return false;
4122 }
4123 return true;
4124}
4125
4126// \returns true is a VOPD3 instruction can be also represented as a shorter
4127// VOPD encoding.
4128bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4129 const unsigned Opcode = Inst.getOpcode();
4130 const auto &II = getVOPDInstInfo(Opcode, &MII);
4131 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4132 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4133 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4134 return false;
4135
4136 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4137 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4138 // be parsed as VOPD which does not accept src2.
4139 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4140 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4141 return false;
4142
4143 // If any modifiers are set this cannot be VOPD.
4144 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4145 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4146 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4147 int I = getNamedOperandIdx(Opcode, OpName);
4148 if (I == -1)
4149 continue;
4150 if (Inst.getOperand(I).getImm())
4151 return false;
4152 }
4153
4154 return !tryVOPD3(Inst);
4155}
4156
4157// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4158// form but switch to VOPD3 otherwise.
4159bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4160 const unsigned Opcode = Inst.getOpcode();
4161 if (!isGFX1250Plus() || !isVOPD(Opcode))
4162 return false;
4163
4164 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4165 return tryVOPD(Inst);
4166 return tryVOPD3(Inst);
4167}
4168
4169bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4170
4171 const unsigned Opc = Inst.getOpcode();
4172 const MCInstrDesc &Desc = MII.get(Opc);
4173
4174 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4175 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4176 assert(ClampIdx != -1);
4177 return Inst.getOperand(ClampIdx).getImm() == 0;
4178 }
4179
4180 return true;
4181}
4182
4185
4186bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4187
4188 const unsigned Opc = Inst.getOpcode();
4189 const MCInstrDesc &Desc = MII.get(Opc);
4190
4191 if ((Desc.TSFlags & MIMGFlags) == 0)
4192 return true;
4193
4194 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4195 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4196 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4197
4198 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4199 return true;
4200
4201 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4202 return true;
4203
4204 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4205 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4206 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4207 if (DMask == 0)
4208 DMask = 1;
4209
4210 bool IsPackedD16 = false;
4211 unsigned DataSize =
4212 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4213 if (hasPackedD16()) {
4214 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4215 IsPackedD16 = D16Idx >= 0;
4216 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4217 DataSize = (DataSize + 1) / 2;
4218 }
4219
4220 if ((VDataSize / 4) == DataSize + TFESize)
4221 return true;
4222
4223 StringRef Modifiers;
4224 if (isGFX90A())
4225 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4226 else
4227 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4228
4229 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4230 return false;
4231}
4232
4233bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4234 const unsigned Opc = Inst.getOpcode();
4235 const MCInstrDesc &Desc = MII.get(Opc);
4236
4237 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4238 return true;
4239
4240 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4241
4242 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4244 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4245 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4246 ? AMDGPU::OpName::srsrc
4247 : AMDGPU::OpName::rsrc;
4248 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4249 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4250 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4251
4252 assert(VAddr0Idx != -1);
4253 assert(SrsrcIdx != -1);
4254 assert(SrsrcIdx > VAddr0Idx);
4255
4256 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4257 if (BaseOpcode->BVH) {
4258 if (IsA16 == BaseOpcode->A16)
4259 return true;
4260 Error(IDLoc, "image address size does not match a16");
4261 return false;
4262 }
4263
4264 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4265 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4266 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4267 unsigned ActualAddrSize =
4268 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4269
4270 unsigned ExpectedAddrSize =
4271 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4272
4273 if (IsNSA) {
4274 if (hasPartialNSAEncoding() &&
4275 ExpectedAddrSize >
4277 int VAddrLastIdx = SrsrcIdx - 1;
4278 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4279
4280 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4281 }
4282 } else {
4283 if (ExpectedAddrSize > 12)
4284 ExpectedAddrSize = 16;
4285
4286 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4287 // This provides backward compatibility for assembly created
4288 // before 160b/192b/224b types were directly supported.
4289 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4290 return true;
4291 }
4292
4293 if (ActualAddrSize == ExpectedAddrSize)
4294 return true;
4295
4296 Error(IDLoc, "image address size does not match dim and a16");
4297 return false;
4298}
4299
4300bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4301
4302 const unsigned Opc = Inst.getOpcode();
4303 const MCInstrDesc &Desc = MII.get(Opc);
4304
4305 if ((Desc.TSFlags & MIMGFlags) == 0)
4306 return true;
4307 if (!Desc.mayLoad() || !Desc.mayStore())
4308 return true; // Not atomic
4309
4310 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4311 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4312
4313 // This is an incomplete check because image_atomic_cmpswap
4314 // may only use 0x3 and 0xf while other atomic operations
4315 // may use 0x1 and 0x3. However these limitations are
4316 // verified when we check that dmask matches dst size.
4317 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4318}
4319
4320bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4321
4322 const unsigned Opc = Inst.getOpcode();
4323 const MCInstrDesc &Desc = MII.get(Opc);
4324
4325 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4326 return true;
4327
4328 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4329 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4330
4331 // GATHER4 instructions use dmask in a different fashion compared to
4332 // other MIMG instructions. The only useful DMASK values are
4333 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4334 // (red,red,red,red) etc.) The ISA document doesn't mention
4335 // this.
4336 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4337}
4338
4339bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4340 const OperandVector &Operands) {
4341 if (!isGFX10Plus())
4342 return true;
4343
4344 const unsigned Opc = Inst.getOpcode();
4345 const MCInstrDesc &Desc = MII.get(Opc);
4346
4347 if ((Desc.TSFlags & MIMGFlags) == 0)
4348 return true;
4349
4350 // image_bvh_intersect_ray instructions do not have dim
4352 return true;
4353
4354 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4355 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4356 if (Op.isDim())
4357 return true;
4358 }
4359 return false;
4360}
4361
4362bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4363 const unsigned Opc = Inst.getOpcode();
4364 const MCInstrDesc &Desc = MII.get(Opc);
4365
4366 if ((Desc.TSFlags & MIMGFlags) == 0)
4367 return true;
4368
4369 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4370 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4372
4373 if (!BaseOpcode->MSAA)
4374 return true;
4375
4376 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4377 assert(DimIdx != -1);
4378
4379 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4380 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4381
4382 return DimInfo->MSAA;
4383}
4384
4385static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4386{
4387 switch (Opcode) {
4388 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4389 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4390 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4391 return true;
4392 default:
4393 return false;
4394 }
4395}
4396
4397// movrels* opcodes should only allow VGPRS as src0.
4398// This is specified in .td description for vop1/vop3,
4399// but sdwa is handled differently. See isSDWAOperand.
4400bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4401 const OperandVector &Operands) {
4402
4403 const unsigned Opc = Inst.getOpcode();
4404 const MCInstrDesc &Desc = MII.get(Opc);
4405
4406 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4407 return true;
4408
4409 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4410 assert(Src0Idx != -1);
4411
4412 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4413 if (Src0.isReg()) {
4414 auto Reg = mc2PseudoReg(Src0.getReg());
4415 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4416 if (!isSGPR(Reg, TRI))
4417 return true;
4418 }
4419
4420 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4421 return false;
4422}
4423
4424bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4425 const OperandVector &Operands) {
4426
4427 const unsigned Opc = Inst.getOpcode();
4428
4429 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4430 return true;
4431
4432 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4433 assert(Src0Idx != -1);
4434
4435 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4436 if (!Src0.isReg())
4437 return true;
4438
4439 auto Reg = mc2PseudoReg(Src0.getReg());
4440 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4441 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4442 Error(getOperandLoc(Operands, Src0Idx),
4443 "source operand must be either a VGPR or an inline constant");
4444 return false;
4445 }
4446
4447 return true;
4448}
4449
4450bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4451 const OperandVector &Operands) {
4452 unsigned Opcode = Inst.getOpcode();
4453 const MCInstrDesc &Desc = MII.get(Opcode);
4454
4455 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4456 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4457 return true;
4458
4459 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4460 if (Src2Idx == -1)
4461 return true;
4462
4463 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4464 Error(getOperandLoc(Operands, Src2Idx),
4465 "inline constants are not allowed for this operand");
4466 return false;
4467 }
4468
4469 return true;
4470}
4471
4472bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4473 const OperandVector &Operands) {
4474 const unsigned Opc = Inst.getOpcode();
4475 const MCInstrDesc &Desc = MII.get(Opc);
4476
4477 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4478 return true;
4479
4480 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4481 if (BlgpIdx != -1) {
4482 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4483 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4484
4485 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4486 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4487
4488 // Validate the correct register size was used for the floating point
4489 // format operands
4490
4491 bool Success = true;
4492 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4493 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4494 Error(getOperandLoc(Operands, Src0Idx),
4495 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4496 Success = false;
4497 }
4498
4499 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4500 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4501 Error(getOperandLoc(Operands, Src1Idx),
4502 "wrong register tuple size for blgp value " + Twine(BLGP));
4503 Success = false;
4504 }
4505
4506 return Success;
4507 }
4508 }
4509
4510 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4511 if (Src2Idx == -1)
4512 return true;
4513
4514 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4515 if (!Src2.isReg())
4516 return true;
4517
4518 MCRegister Src2Reg = Src2.getReg();
4519 MCRegister DstReg = Inst.getOperand(0).getReg();
4520 if (Src2Reg == DstReg)
4521 return true;
4522
4523 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4524 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4525 .getSizeInBits() <= 128)
4526 return true;
4527
4528 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4529 Error(getOperandLoc(Operands, Src2Idx),
4530 "source 2 operand must not partially overlap with dst");
4531 return false;
4532 }
4533
4534 return true;
4535}
4536
4537bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4538 switch (Inst.getOpcode()) {
4539 default:
4540 return true;
4541 case V_DIV_SCALE_F32_gfx6_gfx7:
4542 case V_DIV_SCALE_F32_vi:
4543 case V_DIV_SCALE_F32_gfx10:
4544 case V_DIV_SCALE_F64_gfx6_gfx7:
4545 case V_DIV_SCALE_F64_vi:
4546 case V_DIV_SCALE_F64_gfx10:
4547 break;
4548 }
4549
4550 // TODO: Check that src0 = src1 or src2.
4551
4552 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4553 AMDGPU::OpName::src2_modifiers,
4554 AMDGPU::OpName::src2_modifiers}) {
4555 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4556 .getImm() &
4558 return false;
4559 }
4560 }
4561
4562 return true;
4563}
4564
4565bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4566
4567 const unsigned Opc = Inst.getOpcode();
4568 const MCInstrDesc &Desc = MII.get(Opc);
4569
4570 if ((Desc.TSFlags & MIMGFlags) == 0)
4571 return true;
4572
4573 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4574 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4575 if (isCI() || isSI())
4576 return false;
4577 }
4578
4579 return true;
4580}
4581
4582bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4583 const unsigned Opc = Inst.getOpcode();
4584 const MCInstrDesc &Desc = MII.get(Opc);
4585
4586 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4587 return true;
4588
4589 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4590
4591 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4592}
4593
4594static bool IsRevOpcode(const unsigned Opcode)
4595{
4596 switch (Opcode) {
4597 case AMDGPU::V_SUBREV_F32_e32:
4598 case AMDGPU::V_SUBREV_F32_e64:
4599 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4600 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4601 case AMDGPU::V_SUBREV_F32_e32_vi:
4602 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4603 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4604 case AMDGPU::V_SUBREV_F32_e64_vi:
4605
4606 case AMDGPU::V_SUBREV_CO_U32_e32:
4607 case AMDGPU::V_SUBREV_CO_U32_e64:
4608 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4609 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4610
4611 case AMDGPU::V_SUBBREV_U32_e32:
4612 case AMDGPU::V_SUBBREV_U32_e64:
4613 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4614 case AMDGPU::V_SUBBREV_U32_e32_vi:
4615 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4616 case AMDGPU::V_SUBBREV_U32_e64_vi:
4617
4618 case AMDGPU::V_SUBREV_U32_e32:
4619 case AMDGPU::V_SUBREV_U32_e64:
4620 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4621 case AMDGPU::V_SUBREV_U32_e32_vi:
4622 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4623 case AMDGPU::V_SUBREV_U32_e64_vi:
4624
4625 case AMDGPU::V_SUBREV_F16_e32:
4626 case AMDGPU::V_SUBREV_F16_e64:
4627 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4628 case AMDGPU::V_SUBREV_F16_e32_vi:
4629 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4630 case AMDGPU::V_SUBREV_F16_e64_vi:
4631
4632 case AMDGPU::V_SUBREV_U16_e32:
4633 case AMDGPU::V_SUBREV_U16_e64:
4634 case AMDGPU::V_SUBREV_U16_e32_vi:
4635 case AMDGPU::V_SUBREV_U16_e64_vi:
4636
4637 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4638 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4639 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4640
4641 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4642 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4643
4644 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4645 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4646
4647 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4648 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4649
4650 case AMDGPU::V_LSHRREV_B32_e32:
4651 case AMDGPU::V_LSHRREV_B32_e64:
4652 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4653 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4654 case AMDGPU::V_LSHRREV_B32_e32_vi:
4655 case AMDGPU::V_LSHRREV_B32_e64_vi:
4656 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4657 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4658
4659 case AMDGPU::V_ASHRREV_I32_e32:
4660 case AMDGPU::V_ASHRREV_I32_e64:
4661 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4662 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4663 case AMDGPU::V_ASHRREV_I32_e32_vi:
4664 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4665 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4666 case AMDGPU::V_ASHRREV_I32_e64_vi:
4667
4668 case AMDGPU::V_LSHLREV_B32_e32:
4669 case AMDGPU::V_LSHLREV_B32_e64:
4670 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4671 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4672 case AMDGPU::V_LSHLREV_B32_e32_vi:
4673 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4674 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4675 case AMDGPU::V_LSHLREV_B32_e64_vi:
4676
4677 case AMDGPU::V_LSHLREV_B16_e32:
4678 case AMDGPU::V_LSHLREV_B16_e64:
4679 case AMDGPU::V_LSHLREV_B16_e32_vi:
4680 case AMDGPU::V_LSHLREV_B16_e64_vi:
4681 case AMDGPU::V_LSHLREV_B16_gfx10:
4682
4683 case AMDGPU::V_LSHRREV_B16_e32:
4684 case AMDGPU::V_LSHRREV_B16_e64:
4685 case AMDGPU::V_LSHRREV_B16_e32_vi:
4686 case AMDGPU::V_LSHRREV_B16_e64_vi:
4687 case AMDGPU::V_LSHRREV_B16_gfx10:
4688
4689 case AMDGPU::V_ASHRREV_I16_e32:
4690 case AMDGPU::V_ASHRREV_I16_e64:
4691 case AMDGPU::V_ASHRREV_I16_e32_vi:
4692 case AMDGPU::V_ASHRREV_I16_e64_vi:
4693 case AMDGPU::V_ASHRREV_I16_gfx10:
4694
4695 case AMDGPU::V_LSHLREV_B64_e64:
4696 case AMDGPU::V_LSHLREV_B64_gfx10:
4697 case AMDGPU::V_LSHLREV_B64_vi:
4698
4699 case AMDGPU::V_LSHRREV_B64_e64:
4700 case AMDGPU::V_LSHRREV_B64_gfx10:
4701 case AMDGPU::V_LSHRREV_B64_vi:
4702
4703 case AMDGPU::V_ASHRREV_I64_e64:
4704 case AMDGPU::V_ASHRREV_I64_gfx10:
4705 case AMDGPU::V_ASHRREV_I64_vi:
4706
4707 case AMDGPU::V_PK_LSHLREV_B16:
4708 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4709 case AMDGPU::V_PK_LSHLREV_B16_vi:
4710
4711 case AMDGPU::V_PK_LSHRREV_B16:
4712 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4713 case AMDGPU::V_PK_LSHRREV_B16_vi:
4714 case AMDGPU::V_PK_ASHRREV_I16:
4715 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4716 case AMDGPU::V_PK_ASHRREV_I16_vi:
4717 return true;
4718 default:
4719 return false;
4720 }
4721}
4722
4723bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4724 const OperandVector &Operands) {
4725 using namespace SIInstrFlags;
4726 const unsigned Opcode = Inst.getOpcode();
4727 const MCInstrDesc &Desc = MII.get(Opcode);
4728
4729 // lds_direct register is defined so that it can be used
4730 // with 9-bit operands only. Ignore encodings which do not accept these.
4731 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4732 if ((Desc.TSFlags & Enc) == 0)
4733 return true;
4734
4735 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4736 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4737 if (SrcIdx == -1)
4738 break;
4739 const auto &Src = Inst.getOperand(SrcIdx);
4740 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4741
4742 if (isGFX90A() || isGFX11Plus()) {
4743 Error(getOperandLoc(Operands, SrcIdx),
4744 "lds_direct is not supported on this GPU");
4745 return false;
4746 }
4747
4748 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4749 Error(getOperandLoc(Operands, SrcIdx),
4750 "lds_direct cannot be used with this instruction");
4751 return false;
4752 }
4753
4754 if (SrcName != OpName::src0) {
4755 Error(getOperandLoc(Operands, SrcIdx),
4756 "lds_direct may be used as src0 only");
4757 return false;
4758 }
4759 }
4760 }
4761
4762 return true;
4763}
4764
4765SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4766 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4767 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4768 if (Op.isFlatOffset())
4769 return Op.getStartLoc();
4770 }
4771 return getLoc();
4772}
4773
4774bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4775 const OperandVector &Operands) {
4776 auto Opcode = Inst.getOpcode();
4777 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4778 if (OpNum == -1)
4779 return true;
4780
4781 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4782 if ((TSFlags & SIInstrFlags::FLAT))
4783 return validateFlatOffset(Inst, Operands);
4784
4785 if ((TSFlags & SIInstrFlags::SMRD))
4786 return validateSMEMOffset(Inst, Operands);
4787
4788 const auto &Op = Inst.getOperand(OpNum);
4789 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4790 if (isGFX12Plus() &&
4791 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4792 const unsigned OffsetSize = 24;
4793 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4794 Error(getFlatOffsetLoc(Operands),
4795 Twine("expected a ") + Twine(OffsetSize - 1) +
4796 "-bit unsigned offset for buffer ops");
4797 return false;
4798 }
4799 } else {
4800 const unsigned OffsetSize = 16;
4801 if (!isUIntN(OffsetSize, Op.getImm())) {
4802 Error(getFlatOffsetLoc(Operands),
4803 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4804 return false;
4805 }
4806 }
4807 return true;
4808}
4809
4810bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4811 const OperandVector &Operands) {
4812 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4813 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4814 return true;
4815
4816 auto Opcode = Inst.getOpcode();
4817 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4818 assert(OpNum != -1);
4819
4820 const auto &Op = Inst.getOperand(OpNum);
4821 if (!hasFlatOffsets() && Op.getImm() != 0) {
4822 Error(getFlatOffsetLoc(Operands),
4823 "flat offset modifier is not supported on this GPU");
4824 return false;
4825 }
4826
4827 // For pre-GFX12 FLAT instructions the offset must be positive;
4828 // MSB is ignored and forced to zero.
4829 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4830 bool AllowNegative =
4832 isGFX12Plus();
4833 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4834 Error(getFlatOffsetLoc(Operands),
4835 Twine("expected a ") +
4836 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4837 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4838 return false;
4839 }
4840
4841 return true;
4842}
4843
4844SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4845 // Start with second operand because SMEM Offset cannot be dst or src0.
4846 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4847 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4848 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4849 return Op.getStartLoc();
4850 }
4851 return getLoc();
4852}
4853
4854bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4855 const OperandVector &Operands) {
4856 if (isCI() || isSI())
4857 return true;
4858
4859 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4860 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4861 return true;
4862
4863 auto Opcode = Inst.getOpcode();
4864 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4865 if (OpNum == -1)
4866 return true;
4867
4868 const auto &Op = Inst.getOperand(OpNum);
4869 if (!Op.isImm())
4870 return true;
4871
4872 uint64_t Offset = Op.getImm();
4873 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4876 return true;
4877
4878 Error(getSMEMOffsetLoc(Operands),
4879 isGFX12Plus() && IsBuffer
4880 ? "expected a 23-bit unsigned offset for buffer ops"
4881 : isGFX12Plus() ? "expected a 24-bit signed offset"
4882 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4883 : "expected a 21-bit signed offset");
4884
4885 return false;
4886}
4887
4888bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4889 const OperandVector &Operands) {
4890 unsigned Opcode = Inst.getOpcode();
4891 const MCInstrDesc &Desc = MII.get(Opcode);
4892 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4893 return true;
4894
4895 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4896 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4897
4898 const int OpIndices[] = { Src0Idx, Src1Idx };
4899
4900 unsigned NumExprs = 0;
4901 unsigned NumLiterals = 0;
4902 int64_t LiteralValue;
4903
4904 for (int OpIdx : OpIndices) {
4905 if (OpIdx == -1) break;
4906
4907 const MCOperand &MO = Inst.getOperand(OpIdx);
4908 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4910 bool IsLit = false;
4911 std::optional<int64_t> Imm;
4912 if (MO.isImm()) {
4913 Imm = MO.getImm();
4914 } else if (MO.isExpr()) {
4915 if (isLitExpr(MO.getExpr())) {
4916 IsLit = true;
4917 Imm = getLitValue(MO.getExpr());
4918 }
4919 } else {
4920 continue;
4921 }
4922
4923 if (!Imm.has_value()) {
4924 ++NumExprs;
4925 } else if (!isInlineConstant(Inst, OpIdx)) {
4926 auto OpType = static_cast<AMDGPU::OperandType>(
4927 Desc.operands()[OpIdx].OperandType);
4928 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4929 if (NumLiterals == 0 || LiteralValue != Value) {
4931 ++NumLiterals;
4932 }
4933 }
4934 }
4935 }
4936
4937 if (NumLiterals + NumExprs <= 1)
4938 return true;
4939
4940 Error(getOperandLoc(Operands, Src1Idx),
4941 "only one unique literal operand is allowed");
4942 return false;
4943}
4944
4945bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4946 const unsigned Opc = Inst.getOpcode();
4947 if (isPermlane16(Opc)) {
4948 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4949 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4950
4951 if (OpSel & ~3)
4952 return false;
4953 }
4954
4955 uint64_t TSFlags = MII.get(Opc).TSFlags;
4956
4957 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4958 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4959 if (OpSelIdx != -1) {
4960 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4961 return false;
4962 }
4963 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4964 if (OpSelHiIdx != -1) {
4965 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4966 return false;
4967 }
4968 }
4969
4970 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4971 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4972 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4973 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4974 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4975 if (OpSel & 3)
4976 return false;
4977 }
4978
4979 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4980 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4981 // the first SGPR and use it for both the low and high operations.
4982 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4983 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4984 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4985 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4986 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4987
4988 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4989 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4990 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4991 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4992
4993 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4994
4995 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4996 unsigned Mask = 1U << Index;
4997 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4998 };
4999
5000 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
5001 !VerifyOneSGPR(/*Index=*/0))
5002 return false;
5003 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
5004 !VerifyOneSGPR(/*Index=*/1))
5005 return false;
5006
5007 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
5008 if (Src2Idx != -1) {
5009 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
5010 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
5011 !VerifyOneSGPR(/*Index=*/2))
5012 return false;
5013 }
5014 }
5015
5016 return true;
5017}
5018
5019bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
5020 if (!hasTrue16Insts())
5021 return true;
5022 const MCRegisterInfo *MRI = getMRI();
5023 const unsigned Opc = Inst.getOpcode();
5024 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5025 if (OpSelIdx == -1)
5026 return true;
5027 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
5028 // If the value is 0 we could have a default OpSel Operand, so conservatively
5029 // allow it.
5030 if (OpSelOpValue == 0)
5031 return true;
5032 unsigned OpCount = 0;
5033 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5034 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5035 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
5036 if (OpIdx == -1)
5037 continue;
5038 const MCOperand &Op = Inst.getOperand(OpIdx);
5039 if (Op.isReg() &&
5040 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
5041 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
5042 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5043 if (OpSelOpIsHi != VGPRSuffixIsHi)
5044 return false;
5045 }
5046 ++OpCount;
5047 }
5048
5049 return true;
5050}
5051
5052bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5053 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5054
5055 const unsigned Opc = Inst.getOpcode();
5056 uint64_t TSFlags = MII.get(Opc).TSFlags;
5057
5058 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5059 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5060 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5061 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5062 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5063 !(TSFlags & SIInstrFlags::IsSWMMAC))
5064 return true;
5065
5066 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5067 if (NegIdx == -1)
5068 return true;
5069
5070 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5071
5072 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5073 // on some src operands but not allowed on other.
5074 // It is convenient that such instructions don't have src_modifiers operand
5075 // for src operands that don't allow neg because they also don't allow opsel.
5076
5077 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5078 AMDGPU::OpName::src1_modifiers,
5079 AMDGPU::OpName::src2_modifiers};
5080
5081 for (unsigned i = 0; i < 3; ++i) {
5082 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5083 if (Neg & (1 << i))
5084 return false;
5085 }
5086 }
5087
5088 return true;
5089}
5090
5091bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5092 const OperandVector &Operands) {
5093 const unsigned Opc = Inst.getOpcode();
5094 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5095 if (DppCtrlIdx >= 0) {
5096 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5097
5098 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5099 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5100 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5101 // only on GFX12.
5102 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5103 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5104 : "DP ALU dpp only supports row_newbcast");
5105 return false;
5106 }
5107 }
5108
5109 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5110 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5111
5112 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5113 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5114 if (Src1Idx >= 0) {
5115 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5116 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5117 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5118 Error(getOperandLoc(Operands, Src1Idx),
5119 "invalid operand for instruction");
5120 return false;
5121 }
5122 if (Src1.isImm()) {
5123 Error(getInstLoc(Operands),
5124 "src1 immediate operand invalid for instruction");
5125 return false;
5126 }
5127 }
5128 }
5129
5130 return true;
5131}
5132
5133// Check if VCC register matches wavefront size
5134bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5135 return (Reg == AMDGPU::VCC && isWave64()) ||
5136 (Reg == AMDGPU::VCC_LO && isWave32());
5137}
5138
5139// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5140bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5141 const OperandVector &Operands) {
5142 unsigned Opcode = Inst.getOpcode();
5143 const MCInstrDesc &Desc = MII.get(Opcode);
5144 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5145 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5146 !HasMandatoryLiteral && !isVOPD(Opcode))
5147 return true;
5148
5149 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5150
5151 std::optional<unsigned> LiteralOpIdx;
5152 std::optional<uint64_t> LiteralValue;
5153
5154 for (int OpIdx : OpIndices) {
5155 if (OpIdx == -1)
5156 continue;
5157
5158 const MCOperand &MO = Inst.getOperand(OpIdx);
5159 if (!MO.isImm() && !MO.isExpr())
5160 continue;
5161 if (!isSISrcOperand(Desc, OpIdx))
5162 continue;
5163
5164 std::optional<int64_t> Imm;
5165 if (MO.isImm())
5166 Imm = MO.getImm();
5167 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5168 Imm = getLitValue(MO.getExpr());
5169
5170 bool IsAnotherLiteral = false;
5171 bool IsForcedLit = findMCOperand(Operands, OpIdx).isForcedLit();
5172 bool IsForcedLit64 = findMCOperand(Operands, OpIdx).isForcedLit64();
5173 if (!Imm.has_value()) {
5174 // Literal value not known, so we conservately assume it's different.
5175 IsAnotherLiteral = true;
5176 } else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst, OpIdx)) {
5177 uint64_t Value = *Imm;
5178 bool IsForcedFP64 =
5179 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5181 HasMandatoryLiteral);
5182 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5183 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5184 bool IsValid32Op =
5185 IsForcedLit || AMDGPU::isValid32BitLiteral(Value, IsFP64);
5186
5187 if (((!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5188 !IsForcedFP64) ||
5189 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5190 (!has64BitLiterals() || Desc.getSize() != 4)) {
5191 Error(getOperandLoc(Operands, OpIdx),
5192 "invalid operand for instruction");
5193 return false;
5194 }
5195
5196 // Only src0 can use lit64 in VOP* encoding.
5197 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5198 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5199 Error(getOperandLoc(Operands, OpIdx),
5200 "invalid operand for instruction");
5201 return false;
5202 }
5203
5204 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5205 Value = Hi_32(Value);
5206
5207 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5209 }
5210
5211 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5212 !getFeatureBits()[FeatureVOP3Literal]) {
5213 Error(getOperandLoc(Operands, OpIdx),
5214 "literal operands are not supported");
5215 return false;
5216 }
5217
5218 if (LiteralOpIdx && IsAnotherLiteral) {
5219 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5220 getOperandLoc(Operands, *LiteralOpIdx)),
5221 "only one unique literal operand is allowed");
5222 return false;
5223 }
5224
5225 if (IsAnotherLiteral)
5226 LiteralOpIdx = OpIdx;
5227 }
5228
5229 return true;
5230}
5231
5232// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5233static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5234 const MCRegisterInfo *MRI) {
5235 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5236 if (OpIdx < 0)
5237 return -1;
5238
5239 const MCOperand &Op = Inst.getOperand(OpIdx);
5240 if (!Op.isReg())
5241 return -1;
5242
5243 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5244 auto Reg = Sub ? Sub : Op.getReg();
5245 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5246 return AGPR32.contains(Reg) ? 1 : 0;
5247}
5248
5249bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5250 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5251 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5253 SIInstrFlags::DS)) == 0)
5254 return true;
5255
5256 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5257 ? AMDGPU::OpName::data0
5258 : AMDGPU::OpName::vdata;
5259
5260 const MCRegisterInfo *MRI = getMRI();
5261 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5262 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5263
5264 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5265 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5266 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5267 return false;
5268 }
5269
5270 auto FB = getFeatureBits();
5271 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5272 if (DataAreg < 0 || DstAreg < 0)
5273 return true;
5274 return DstAreg == DataAreg;
5275 }
5276
5277 return DstAreg < 1 && DataAreg < 1;
5278}
5279
5280bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5281 auto FB = getFeatureBits();
5282 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5283 return true;
5284
5285 unsigned Opc = Inst.getOpcode();
5286 const MCRegisterInfo *MRI = getMRI();
5287 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5288 // unaligned VGPR. All others only allow even aligned VGPRs.
5289 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5290 return true;
5291
5292 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5293 switch (Opc) {
5294 default:
5295 break;
5296 case AMDGPU::DS_LOAD_TR6_B96:
5297 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5298 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5299 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5300 return true;
5301 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5302 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5303 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5304 // allows unaligned VGPR for vdst, but other operands still only allow
5305 // even aligned VGPRs.
5306 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5307 if (VAddrIdx != -1) {
5308 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5309 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5310 if ((Sub - AMDGPU::VGPR0) & 1)
5311 return false;
5312 }
5313 return true;
5314 }
5315 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5316 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5317 return true;
5318 }
5319 }
5320
5321 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5322 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5323 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5324 const MCOperand &Op = Inst.getOperand(I);
5325 if (!Op.isReg())
5326 continue;
5327
5328 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5329 if (!Sub)
5330 continue;
5331
5332 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5333 return false;
5334 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5335 return false;
5336 }
5337
5338 return true;
5339}
5340
5341SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5342 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5343 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5344 if (Op.isBLGP())
5345 return Op.getStartLoc();
5346 }
5347 return SMLoc();
5348}
5349
5350bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5351 const OperandVector &Operands) {
5352 unsigned Opc = Inst.getOpcode();
5353 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5354 if (BlgpIdx == -1)
5355 return true;
5356 SMLoc BLGPLoc = getBLGPLoc(Operands);
5357 if (!BLGPLoc.isValid())
5358 return true;
5359 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5360 auto FB = getFeatureBits();
5361 bool UsesNeg = false;
5362 if (FB[AMDGPU::FeatureGFX940Insts]) {
5363 switch (Opc) {
5364 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5365 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5366 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5367 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5368 UsesNeg = true;
5369 }
5370 }
5371
5372 if (IsNeg == UsesNeg)
5373 return true;
5374
5375 Error(BLGPLoc,
5376 UsesNeg ? "invalid modifier: blgp is not supported"
5377 : "invalid modifier: neg is not supported");
5378
5379 return false;
5380}
5381
5382bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5383 const OperandVector &Operands) {
5384 if (!isGFX11Plus())
5385 return true;
5386
5387 unsigned Opc = Inst.getOpcode();
5388 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5389 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5390 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5391 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5392 return true;
5393
5394 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5395 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5396 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5397 if (Reg == AMDGPU::SGPR_NULL)
5398 return true;
5399
5400 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5401 return false;
5402}
5403
5404bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5405 const OperandVector &Operands) {
5406 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5407 if ((TSFlags & SIInstrFlags::DS) == 0)
5408 return true;
5409 if (TSFlags & SIInstrFlags::GWS)
5410 return validateGWS(Inst, Operands);
5411 // Only validate GDS for non-GWS instructions.
5412 if (hasGDS())
5413 return true;
5414 int GDSIdx =
5415 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5416 if (GDSIdx < 0)
5417 return true;
5418 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5419 if (GDS) {
5420 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5421 Error(S, "gds modifier is not supported on this GPU");
5422 return false;
5423 }
5424 return true;
5425}
5426
5427// gfx90a has an undocumented limitation:
5428// DS_GWS opcodes must use even aligned registers.
5429bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5430 const OperandVector &Operands) {
5431 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5432 return true;
5433
5434 int Opc = Inst.getOpcode();
5435 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5436 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5437 return true;
5438
5439 const MCRegisterInfo *MRI = getMRI();
5440 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5441 int Data0Pos =
5442 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5443 assert(Data0Pos != -1);
5444 auto Reg = Inst.getOperand(Data0Pos).getReg();
5445 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5446 if (RegIdx & 1) {
5447 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5448 return false;
5449 }
5450
5451 return true;
5452}
5453
5454bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5455 const OperandVector &Operands,
5456 SMLoc IDLoc) {
5457 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5458 AMDGPU::OpName::cpol);
5459 if (CPolPos == -1)
5460 return true;
5461
5462 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5463
5464 if (!isGFX1250Plus()) {
5465 if (CPol & CPol::SCAL) {
5466 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5467 StringRef CStr(S.getPointer());
5468 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5469 Error(S, "scale_offset is not supported on this GPU");
5470 }
5471 if (CPol & CPol::NV) {
5472 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5473 StringRef CStr(S.getPointer());
5474 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5475 Error(S, "nv is not supported on this GPU");
5476 }
5477 }
5478
5479 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5480 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5481 StringRef CStr(S.getPointer());
5482 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5483 Error(S, "scale_offset is not supported for this instruction");
5484 }
5485
5486 if (isGFX12Plus())
5487 return validateTHAndScopeBits(Inst, Operands, CPol);
5488
5489 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5490 if (TSFlags & SIInstrFlags::SMRD) {
5491 if (CPol && (isSI() || isCI())) {
5492 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5493 Error(S, "cache policy is not supported for SMRD instructions");
5494 return false;
5495 }
5496 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5497 Error(IDLoc, "invalid cache policy for SMEM instruction");
5498 return false;
5499 }
5500 }
5501
5502 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5503 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5506 if (!(TSFlags & AllowSCCModifier)) {
5507 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5508 StringRef CStr(S.getPointer());
5509 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5510 Error(S,
5511 "scc modifier is not supported for this instruction on this GPU");
5512 return false;
5513 }
5514 }
5515
5517 return true;
5518
5519 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5520 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5521 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5522 : "instruction must use glc");
5523 return false;
5524 }
5525 } else {
5526 if (CPol & CPol::GLC) {
5527 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5528 StringRef CStr(S.getPointer());
5530 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5531 Error(S, isGFX940() ? "instruction must not use sc0"
5532 : "instruction must not use glc");
5533 return false;
5534 }
5535 }
5536
5537 return true;
5538}
5539
5540bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5541 const OperandVector &Operands,
5542 const unsigned CPol) {
5543 const unsigned TH = CPol & AMDGPU::CPol::TH;
5544 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5545
5546 const unsigned Opcode = Inst.getOpcode();
5547 const MCInstrDesc &TID = MII.get(Opcode);
5548
5549 auto PrintError = [&](StringRef Msg) {
5550 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5551 Error(S, Msg);
5552 return false;
5553 };
5554
5555 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5557 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5558
5559 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5562 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5563
5564 if (TH == 0)
5565 return true;
5566
5567 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5568 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5569 (TH == AMDGPU::CPol::TH_NT_HT)))
5570 return PrintError("invalid th value for SMEM instruction");
5571
5572 if (TH == AMDGPU::CPol::TH_BYPASS) {
5573 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5575 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5577 return PrintError("scope and th combination is not valid");
5578 }
5579
5580 unsigned THType = AMDGPU::getTemporalHintType(TID);
5581 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5582 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5583 return PrintError("invalid th value for atomic instructions");
5584 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5585 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5586 return PrintError("invalid th value for store instructions");
5587 } else {
5588 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5589 return PrintError("invalid th value for load instructions");
5590 }
5591
5592 return true;
5593}
5594
5595bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5596 const OperandVector &Operands) {
5597 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5598 if (Desc.mayStore() &&
5600 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5601 if (Loc != getInstLoc(Operands)) {
5602 Error(Loc, "TFE modifier has no meaning for store instructions");
5603 return false;
5604 }
5605 }
5606
5607 return true;
5608}
5609
5610bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5611 const OperandVector &Operands) {
5612 unsigned Opc = Inst.getOpcode();
5613 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5614 const MCInstrDesc &Desc = MII.get(Opc);
5615
5616 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5617 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5618 if (FmtIdx == -1)
5619 return true;
5620 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5621 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5622 unsigned RegSize =
5623 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5624 .getSizeInBits();
5625
5627 return true;
5628
5629 Error(getOperandLoc(Operands, SrcIdx),
5630 "wrong register tuple size for " +
5631 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5632 return false;
5633 };
5634
5635 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5636 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5637}
5638
5639bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5640 const OperandVector &Operands) {
5641 if (!validateLdsDirect(Inst, Operands))
5642 return false;
5643 if (!validateTrue16OpSel(Inst)) {
5644 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5645 "op_sel operand conflicts with 16-bit operand suffix");
5646 return false;
5647 }
5648 if (!validateSOPLiteral(Inst, Operands))
5649 return false;
5650 if (!validateVOPLiteral(Inst, Operands)) {
5651 return false;
5652 }
5653 if (!validateConstantBusLimitations(Inst, Operands)) {
5654 return false;
5655 }
5656 if (!validateVOPD(Inst, Operands)) {
5657 return false;
5658 }
5659 if (!validateIntClampSupported(Inst)) {
5660 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5661 "integer clamping is not supported on this GPU");
5662 return false;
5663 }
5664 if (!validateOpSel(Inst)) {
5665 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5666 "invalid op_sel operand");
5667 return false;
5668 }
5669 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5670 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5671 "invalid neg_lo operand");
5672 return false;
5673 }
5674 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5675 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5676 "invalid neg_hi operand");
5677 return false;
5678 }
5679 if (!validateDPP(Inst, Operands)) {
5680 return false;
5681 }
5682 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5683 if (!validateMIMGD16(Inst)) {
5684 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5685 "d16 modifier is not supported on this GPU");
5686 return false;
5687 }
5688 if (!validateMIMGDim(Inst, Operands)) {
5689 Error(IDLoc, "missing dim operand");
5690 return false;
5691 }
5692 if (!validateTensorR128(Inst)) {
5693 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5694 "instruction must set modifier r128=0");
5695 return false;
5696 }
5697 if (!validateMIMGMSAA(Inst)) {
5698 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5699 "invalid dim; must be MSAA type");
5700 return false;
5701 }
5702 if (!validateMIMGDataSize(Inst, IDLoc)) {
5703 return false;
5704 }
5705 if (!validateMIMGAddrSize(Inst, IDLoc))
5706 return false;
5707 if (!validateMIMGAtomicDMask(Inst)) {
5708 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5709 "invalid atomic image dmask");
5710 return false;
5711 }
5712 if (!validateMIMGGatherDMask(Inst)) {
5713 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5714 "invalid image_gather dmask: only one bit must be set");
5715 return false;
5716 }
5717 if (!validateMovrels(Inst, Operands)) {
5718 return false;
5719 }
5720 if (!validateOffset(Inst, Operands)) {
5721 return false;
5722 }
5723 if (!validateMAIAccWrite(Inst, Operands)) {
5724 return false;
5725 }
5726 if (!validateMAISrc2(Inst, Operands)) {
5727 return false;
5728 }
5729 if (!validateMFMA(Inst, Operands)) {
5730 return false;
5731 }
5732 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5733 return false;
5734 }
5735
5736 if (!validateAGPRLdSt(Inst)) {
5737 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5738 ? "invalid register class: data and dst should be all VGPR or AGPR"
5739 : "invalid register class: agpr loads and stores not supported on this GPU"
5740 );
5741 return false;
5742 }
5743 if (!validateVGPRAlign(Inst)) {
5744 Error(IDLoc,
5745 "invalid register class: vgpr tuples must be 64 bit aligned");
5746 return false;
5747 }
5748 if (!validateDS(Inst, Operands)) {
5749 return false;
5750 }
5751
5752 if (!validateBLGP(Inst, Operands)) {
5753 return false;
5754 }
5755
5756 if (!validateDivScale(Inst)) {
5757 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5758 return false;
5759 }
5760 if (!validateWaitCnt(Inst, Operands)) {
5761 return false;
5762 }
5763 if (!validateTFE(Inst, Operands)) {
5764 return false;
5765 }
5766 if (!validateWMMA(Inst, Operands)) {
5767 return false;
5768 }
5769
5770 return true;
5771}
5772
5774 const FeatureBitset &FBS,
5775 unsigned VariantID = 0);
5776
5777static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5778 const FeatureBitset &AvailableFeatures,
5779 unsigned VariantID);
5780
5781bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5782 const FeatureBitset &FBS) {
5783 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5784}
5785
5786bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5787 const FeatureBitset &FBS,
5788 ArrayRef<unsigned> Variants) {
5789 for (auto Variant : Variants) {
5790 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5791 return true;
5792 }
5793
5794 return false;
5795}
5796
5797bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5798 SMLoc IDLoc) {
5799 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5800
5801 // Check if requested instruction variant is supported.
5802 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5803 return false;
5804
5805 // This instruction is not supported.
5806 // Clear any other pending errors because they are no longer relevant.
5807 getParser().clearPendingErrors();
5808
5809 // Requested instruction variant is not supported.
5810 // Check if any other variants are supported.
5811 StringRef VariantName = getMatchedVariantName();
5812 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5813 return Error(IDLoc,
5814 Twine(VariantName,
5815 " variant of this instruction is not supported"));
5816 }
5817
5818 // Check if this instruction may be used with a different wavesize.
5819 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5820 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5821 // FIXME: Use getAvailableFeatures, and do not manually recompute
5822 FeatureBitset FeaturesWS32 = getFeatureBits();
5823 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5824 .flip(AMDGPU::FeatureWavefrontSize32);
5825 FeatureBitset AvailableFeaturesWS32 =
5826 ComputeAvailableFeatures(FeaturesWS32);
5827
5828 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5829 return Error(IDLoc, "instruction requires wavesize=32");
5830 }
5831
5832 // Finally check if this instruction is supported on any other GPU.
5833 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5834 return Error(IDLoc, "instruction not supported on this GPU (" +
5835 getSTI().getCPU() + ")" + ": " + Mnemo);
5836 }
5837
5838 // Instruction not supported on any GPU. Probably a typo.
5839 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5840 return Error(IDLoc, "invalid instruction" + Suggestion);
5841}
5842
5843static bool isInvalidVOPDY(const OperandVector &Operands,
5844 uint64_t InvalidOprIdx) {
5845 assert(InvalidOprIdx < Operands.size());
5846 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5847 if (Op.isToken() && InvalidOprIdx > 1) {
5848 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5849 return PrevOp.isToken() && PrevOp.getToken() == "::";
5850 }
5851 return false;
5852}
5853
5854bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5855 OperandVector &Operands,
5856 MCStreamer &Out,
5857 uint64_t &ErrorInfo,
5858 bool MatchingInlineAsm) {
5859 MCInst Inst;
5860 Inst.setLoc(IDLoc);
5861 unsigned Result = Match_Success;
5862 for (auto Variant : getMatchedVariants()) {
5863 uint64_t EI;
5864 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5865 Variant);
5866 // We order match statuses from least to most specific. We use most specific
5867 // status as resulting
5868 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5869 if (R == Match_Success || R == Match_MissingFeature ||
5870 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5871 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5872 Result != Match_MissingFeature)) {
5873 Result = R;
5874 ErrorInfo = EI;
5875 }
5876 if (R == Match_Success)
5877 break;
5878 }
5879
5880 if (Result == Match_Success) {
5881 if (!validateInstruction(Inst, IDLoc, Operands)) {
5882 return true;
5883 }
5884 Out.emitInstruction(Inst, getSTI());
5885 return false;
5886 }
5887
5888 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5889 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5890 return true;
5891 }
5892
5893 switch (Result) {
5894 default: break;
5895 case Match_MissingFeature:
5896 // It has been verified that the specified instruction
5897 // mnemonic is valid. A match was found but it requires
5898 // features which are not supported on this GPU.
5899 return Error(IDLoc, "operands are not valid for this GPU or mode");
5900
5901 case Match_InvalidOperand: {
5902 SMLoc ErrorLoc = IDLoc;
5903 if (ErrorInfo != ~0ULL) {
5904 if (ErrorInfo >= Operands.size()) {
5905 return Error(IDLoc, "too few operands for instruction");
5906 }
5907 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5908 if (ErrorLoc == SMLoc())
5909 ErrorLoc = IDLoc;
5910
5911 if (isInvalidVOPDY(Operands, ErrorInfo))
5912 return Error(ErrorLoc, "invalid VOPDY instruction");
5913 }
5914 return Error(ErrorLoc, "invalid operand for instruction");
5915 }
5916
5917 case Match_MnemonicFail:
5918 llvm_unreachable("Invalid instructions should have been handled already");
5919 }
5920 llvm_unreachable("Implement any new match types added!");
5921}
5922
5923bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5924 int64_t Tmp = -1;
5925 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5926 return true;
5927 }
5928 if (getParser().parseAbsoluteExpression(Tmp)) {
5929 return true;
5930 }
5931 Ret = static_cast<uint32_t>(Tmp);
5932 return false;
5933}
5934
5935bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5936 if (!getSTI().getTargetTriple().isAMDGCN())
5937 return TokError("directive only supported for amdgcn architecture");
5938
5939 std::string TargetIDDirective;
5940 SMLoc TargetStart = getTok().getLoc();
5941 if (getParser().parseEscapedString(TargetIDDirective))
5942 return true;
5943
5944 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5945 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5946 return getParser().Error(TargetRange.Start,
5947 (Twine(".amdgcn_target directive's target id ") +
5948 Twine(TargetIDDirective) +
5949 Twine(" does not match the specified target id ") +
5950 Twine(getTargetStreamer().getTargetID()->toString())).str());
5951
5952 return false;
5953}
5954
5955bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5956 return Error(Range.Start, "value out of range", Range);
5957}
5958
5959bool AMDGPUAsmParser::calculateGPRBlocks(
5960 const FeatureBitset &Features, const MCExpr *VCCUsed,
5961 const MCExpr *FlatScrUsed, bool XNACKUsed,
5962 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5963 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5964 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5965 // TODO(scott.linder): These calculations are duplicated from
5966 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5967 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5968 MCContext &Ctx = getContext();
5969
5970 const MCExpr *NumSGPRs = NextFreeSGPR;
5971 int64_t EvaluatedSGPRs;
5972
5973 if (Version.Major >= 10)
5975 else {
5976 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(getSTI());
5977
5978 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5979 !Features.test(FeatureSGPRInitBug) &&
5980 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5981 return OutOfRangeError(SGPRRange);
5982
5983 const MCExpr *ExtraSGPRs =
5984 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5985 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5986
5987 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5988 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5989 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5990 return OutOfRangeError(SGPRRange);
5991
5992 if (Features.test(FeatureSGPRInitBug))
5993 NumSGPRs =
5995 }
5996
5997 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5998 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5999 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
6000 unsigned Granule) -> const MCExpr * {
6001 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
6002 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
6003 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
6004 const MCExpr *AlignToGPR =
6005 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
6006 const MCExpr *DivGPR =
6007 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
6008 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
6009 return SubGPR;
6010 };
6011
6012 VGPRBlocks = GetNumGPRBlocks(
6013 NextFreeVGPR,
6014 IsaInfo::getVGPREncodingGranule(getSTI(), EnableWavefrontSize32));
6015 SGPRBlocks =
6016 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(getSTI()));
6017
6018 return false;
6019}
6020
6021bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6022 if (!getSTI().getTargetTriple().isAMDGCN())
6023 return TokError("directive only supported for amdgcn architecture");
6024
6025 if (!isHsaAbi(getSTI()))
6026 return TokError("directive only supported for amdhsa OS");
6027
6028 StringRef KernelName;
6029 if (getParser().parseIdentifier(KernelName))
6030 return true;
6031
6032 AMDGPU::MCKernelDescriptor KD =
6034 &getSTI(), getContext());
6035
6036 StringSet<> Seen;
6037
6038 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
6039
6040 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
6041 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
6042
6043 SMRange VGPRRange;
6044 const MCExpr *NextFreeVGPR = ZeroExpr;
6045 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
6046 const MCExpr *NamedBarCnt = ZeroExpr;
6047 uint64_t SharedVGPRCount = 0;
6048 uint64_t PreloadLength = 0;
6049 uint64_t PreloadOffset = 0;
6050 SMRange SGPRRange;
6051 const MCExpr *NextFreeSGPR = ZeroExpr;
6052
6053 // Count the number of user SGPRs implied from the enabled feature bits.
6054 unsigned ImpliedUserSGPRCount = 0;
6055
6056 // Track if the asm explicitly contains the directive for the user SGPR
6057 // count.
6058 std::optional<unsigned> ExplicitUserSGPRCount;
6059 const MCExpr *ReserveVCC = OneExpr;
6060 const MCExpr *ReserveFlatScr = OneExpr;
6061 std::optional<bool> EnableWavefrontSize32;
6062
6063 while (true) {
6064 while (trySkipToken(AsmToken::EndOfStatement));
6065
6066 StringRef ID;
6067 SMRange IDRange = getTok().getLocRange();
6068 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6069 return true;
6070
6071 if (ID == ".end_amdhsa_kernel")
6072 break;
6073
6074 if (!Seen.insert(ID).second)
6075 return TokError(".amdhsa_ directives cannot be repeated");
6076
6077 SMLoc ValStart = getLoc();
6078 const MCExpr *ExprVal;
6079 if (getParser().parseExpression(ExprVal))
6080 return true;
6081 SMLoc ValEnd = getLoc();
6082 SMRange ValRange = SMRange(ValStart, ValEnd);
6083
6084 int64_t IVal = 0;
6085 uint64_t Val = IVal;
6086 bool EvaluatableExpr;
6087 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6088 if (IVal < 0)
6089 return OutOfRangeError(ValRange);
6090 Val = IVal;
6091 }
6092
6093#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6094 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6095 return OutOfRangeError(RANGE); \
6096 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6097 getContext());
6098
6099// Some fields use the parsed value immediately which requires the expression to
6100// be solvable.
6101#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6102 if (!(RESOLVED)) \
6103 return Error(IDRange.Start, "directive should have resolvable expression", \
6104 IDRange);
6105
6106 if (ID == ".amdhsa_group_segment_fixed_size") {
6108 CHAR_BIT>(Val))
6109 return OutOfRangeError(ValRange);
6110 KD.group_segment_fixed_size = ExprVal;
6111 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6113 CHAR_BIT>(Val))
6114 return OutOfRangeError(ValRange);
6115 KD.private_segment_fixed_size = ExprVal;
6116 } else if (ID == ".amdhsa_kernarg_size") {
6117 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6118 return OutOfRangeError(ValRange);
6119 KD.kernarg_size = ExprVal;
6120 } else if (ID == ".amdhsa_user_sgpr_count") {
6121 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6122 ExplicitUserSGPRCount = Val;
6123 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6124 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6126 return Error(IDRange.Start,
6127 "directive is not supported with architected flat scratch",
6128 IDRange);
6130 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6131 ExprVal, ValRange);
6132 if (Val)
6133 ImpliedUserSGPRCount += 4;
6134 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6135 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6136 if (!hasKernargPreload())
6137 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6138
6139 if (Val > getMaxNumUserSGPRs())
6140 return OutOfRangeError(ValRange);
6141 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6142 ValRange);
6143 if (Val) {
6144 ImpliedUserSGPRCount += Val;
6145 PreloadLength = Val;
6146 }
6147 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6148 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6149 if (!hasKernargPreload())
6150 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6151
6152 if (Val >= 1024)
6153 return OutOfRangeError(ValRange);
6154 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6155 ValRange);
6156 if (Val)
6157 PreloadOffset = Val;
6158 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6159 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6161 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6162 ValRange);
6163 if (Val)
6164 ImpliedUserSGPRCount += 2;
6165 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6166 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6168 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6169 ValRange);
6170 if (Val)
6171 ImpliedUserSGPRCount += 2;
6172 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6173 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6175 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6176 ExprVal, ValRange);
6177 if (Val)
6178 ImpliedUserSGPRCount += 2;
6179 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6180 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6182 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6183 ValRange);
6184 if (Val)
6185 ImpliedUserSGPRCount += 2;
6186 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6188 return Error(IDRange.Start,
6189 "directive is not supported with architected flat scratch",
6190 IDRange);
6191 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6193 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6194 ExprVal, ValRange);
6195 if (Val)
6196 ImpliedUserSGPRCount += 2;
6197 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6198 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6200 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6201 ExprVal, ValRange);
6202 if (Val)
6203 ImpliedUserSGPRCount += 1;
6204 } else if (ID == ".amdhsa_wavefront_size32") {
6205 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6206 if (IVersion.Major < 10)
6207 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6208 EnableWavefrontSize32 = Val;
6210 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6211 ValRange);
6212 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6214 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6215 ValRange);
6216 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6218 return Error(IDRange.Start,
6219 "directive is not supported with architected flat scratch",
6220 IDRange);
6222 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6223 ValRange);
6224 } else if (ID == ".amdhsa_enable_private_segment") {
6226 return Error(
6227 IDRange.Start,
6228 "directive is not supported without architected flat scratch",
6229 IDRange);
6231 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6232 ValRange);
6233 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6235 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6236 ValRange);
6237 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6239 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6240 ValRange);
6241 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6243 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6244 ValRange);
6245 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6247 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6248 ValRange);
6249 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6251 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6252 ValRange);
6253 } else if (ID == ".amdhsa_next_free_vgpr") {
6254 VGPRRange = ValRange;
6255 NextFreeVGPR = ExprVal;
6256 } else if (ID == ".amdhsa_next_free_sgpr") {
6257 SGPRRange = ValRange;
6258 NextFreeSGPR = ExprVal;
6259 } else if (ID == ".amdhsa_accum_offset") {
6260 if (!isGFX90A())
6261 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6262 AccumOffset = ExprVal;
6263 } else if (ID == ".amdhsa_named_barrier_count") {
6264 if (!isGFX1250Plus())
6265 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6266 NamedBarCnt = ExprVal;
6267 } else if (ID == ".amdhsa_reserve_vcc") {
6268 if (EvaluatableExpr && !isUInt<1>(Val))
6269 return OutOfRangeError(ValRange);
6270 ReserveVCC = ExprVal;
6271 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6272 if (IVersion.Major < 7)
6273 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6275 return Error(IDRange.Start,
6276 "directive is not supported with architected flat scratch",
6277 IDRange);
6278 if (EvaluatableExpr && !isUInt<1>(Val))
6279 return OutOfRangeError(ValRange);
6280 ReserveFlatScr = ExprVal;
6281 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6282 if (IVersion.Major < 8)
6283 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6284 if (!isUInt<1>(Val))
6285 return OutOfRangeError(ValRange);
6286 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6287 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6288 IDRange);
6289 } else if (ID == ".amdhsa_float_round_mode_32") {
6291 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6292 ValRange);
6293 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6295 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6296 ValRange);
6297 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6299 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6300 ValRange);
6301 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6303 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6304 ValRange);
6305 } else if (ID == ".amdhsa_dx10_clamp") {
6306 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6307 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6308 IDRange);
6310 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6311 ValRange);
6312 } else if (ID == ".amdhsa_ieee_mode") {
6313 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6314 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6315 IDRange);
6317 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6318 ValRange);
6319 } else if (ID == ".amdhsa_fp16_overflow") {
6320 if (IVersion.Major < 9)
6321 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6323 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6324 ValRange);
6325 } else if (ID == ".amdhsa_tg_split") {
6326 if (!isGFX90A())
6327 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6328 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6329 ExprVal, ValRange);
6330 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6331 if (!supportsWGP(getSTI()))
6332 return Error(IDRange.Start,
6333 "directive unsupported on " + getSTI().getCPU(), IDRange);
6335 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6336 ValRange);
6337 } else if (ID == ".amdhsa_memory_ordered") {
6338 if (IVersion.Major < 10)
6339 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6341 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6342 ValRange);
6343 } else if (ID == ".amdhsa_forward_progress") {
6344 if (IVersion.Major < 10)
6345 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6347 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6348 ValRange);
6349 } else if (ID == ".amdhsa_shared_vgpr_count") {
6350 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6351 if (IVersion.Major < 10 || IVersion.Major >= 12)
6352 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6353 IDRange);
6354 SharedVGPRCount = Val;
6356 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6357 ValRange);
6358 } else if (ID == ".amdhsa_inst_pref_size") {
6359 if (IVersion.Major < 11)
6360 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6361 if (IVersion.Major == 11) {
6363 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6364 ValRange);
6365 } else {
6367 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6368 ValRange);
6369 }
6370 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6373 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6374 ExprVal, ValRange);
6375 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6377 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6378 ExprVal, ValRange);
6379 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6382 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6383 ExprVal, ValRange);
6384 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6386 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6387 ExprVal, ValRange);
6388 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6390 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6391 ExprVal, ValRange);
6392 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6394 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6395 ExprVal, ValRange);
6396 } else if (ID == ".amdhsa_exception_int_div_zero") {
6398 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6399 ExprVal, ValRange);
6400 } else if (ID == ".amdhsa_round_robin_scheduling") {
6401 if (IVersion.Major < 12)
6402 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6404 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6405 ValRange);
6406 } else {
6407 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6408 }
6409
6410#undef PARSE_BITS_ENTRY
6411 }
6412
6413 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6414 return TokError(".amdhsa_next_free_vgpr directive is required");
6415
6416 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6417 return TokError(".amdhsa_next_free_sgpr directive is required");
6418
6419 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6420 if (UserSGPRCount > getMaxNumUserSGPRs())
6421 return TokError("too many user SGPRs enabled, found " +
6422 Twine(UserSGPRCount) + ", but only " +
6423 Twine(getMaxNumUserSGPRs()) + " are supported.");
6424
6425 // Consider the case where the total number of UserSGPRs with trailing
6426 // allocated preload SGPRs, is greater than the number of explicitly
6427 // referenced SGPRs.
6428 if (PreloadLength) {
6429 MCContext &Ctx = getContext();
6430 NextFreeSGPR = AMDGPUMCExpr::createMax(
6431 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6432 }
6433
6434 const MCExpr *VGPRBlocks;
6435 const MCExpr *SGPRBlocks;
6436 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6437 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6438 EnableWavefrontSize32, NextFreeVGPR,
6439 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6440 SGPRBlocks))
6441 return true;
6442
6443 int64_t EvaluatedVGPRBlocks;
6444 bool VGPRBlocksEvaluatable =
6445 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6446 if (VGPRBlocksEvaluatable &&
6448 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6449 return OutOfRangeError(VGPRRange);
6450 }
6452 KD.compute_pgm_rsrc1, VGPRBlocks,
6453 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6454 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6455
6456 int64_t EvaluatedSGPRBlocks;
6457 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6459 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6460 return OutOfRangeError(SGPRRange);
6462 KD.compute_pgm_rsrc1, SGPRBlocks,
6463 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6464 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6465
6466 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6467 return TokError("amdgpu_user_sgpr_count smaller than implied by "
6468 "enabled user SGPRs");
6469
6470 if (isGFX1250Plus()) {
6473 MCConstantExpr::create(UserSGPRCount, getContext()),
6474 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6475 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6476 } else {
6479 MCConstantExpr::create(UserSGPRCount, getContext()),
6480 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6481 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6482 }
6483
6484 int64_t IVal = 0;
6485 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6486 return TokError("Kernarg size should be resolvable");
6487 uint64_t kernarg_size = IVal;
6488 if (PreloadLength && kernarg_size &&
6489 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6490 return TokError("Kernarg preload length + offset is larger than the "
6491 "kernarg segment size");
6492
6493 if (isGFX90A()) {
6494 if (!Seen.contains(".amdhsa_accum_offset"))
6495 return TokError(".amdhsa_accum_offset directive is required");
6496 int64_t EvaluatedAccum;
6497 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6498 uint64_t UEvaluatedAccum = EvaluatedAccum;
6499 if (AccumEvaluatable &&
6500 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6501 return TokError("accum_offset should be in range [4..256] in "
6502 "increments of 4");
6503
6504 int64_t EvaluatedNumVGPR;
6505 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6506 AccumEvaluatable &&
6507 UEvaluatedAccum >
6508 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6509 return TokError("accum_offset exceeds total VGPR allocation");
6510 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6512 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6515 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6516 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6517 getContext());
6518 }
6519
6520 if (isGFX1250Plus())
6522 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6523 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6524 getContext());
6525
6526 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6527 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6528 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6529 return TokError("shared_vgpr_count directive not valid on "
6530 "wavefront size 32");
6531 }
6532
6533 if (VGPRBlocksEvaluatable &&
6534 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6535 63)) {
6536 return TokError("shared_vgpr_count*2 + "
6537 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6538 "exceed 63\n");
6539 }
6540 }
6541
6542 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6543 NextFreeVGPR, NextFreeSGPR,
6544 ReserveVCC, ReserveFlatScr);
6545 return false;
6546}
6547
6548bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6549 uint32_t Version;
6550 if (ParseAsAbsoluteExpression(Version))
6551 return true;
6552
6553 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6554 return false;
6555}
6556
6557bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6558 AMDGPUMCKernelCodeT &C) {
6559 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6560 // assembly for backwards compatibility.
6561 if (ID == "max_scratch_backing_memory_byte_size") {
6562 Parser.eatToEndOfStatement();
6563 return false;
6564 }
6565
6566 SmallString<40> ErrStr;
6567 raw_svector_ostream Err(ErrStr);
6568 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6569 return TokError(Err.str());
6570 }
6571 Lex();
6572
6573 if (ID == "enable_wavefront_size32") {
6574 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6575 if (!isGFX10Plus())
6576 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6577 if (!isWave32())
6578 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6579 } else {
6580 if (!isWave64())
6581 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6582 }
6583 }
6584
6585 if (ID == "wavefront_size") {
6586 if (C.wavefront_size == 5) {
6587 if (!isGFX10Plus())
6588 return TokError("wavefront_size=5 is only allowed on GFX10+");
6589 if (!isWave32())
6590 return TokError("wavefront_size=5 requires +WavefrontSize32");
6591 } else if (C.wavefront_size == 6) {
6592 if (!isWave64())
6593 return TokError("wavefront_size=6 requires +WavefrontSize64");
6594 }
6595 }
6596
6597 return false;
6598}
6599
6600bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6601 AMDGPUMCKernelCodeT KernelCode;
6602 KernelCode.initDefault(getSTI(), getContext());
6603
6604 while (true) {
6605 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6606 // will set the current token to EndOfStatement.
6607 while(trySkipToken(AsmToken::EndOfStatement));
6608
6609 StringRef ID;
6610 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6611 return true;
6612
6613 if (ID == ".end_amd_kernel_code_t")
6614 break;
6615
6616 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6617 return true;
6618 }
6619
6620 KernelCode.validate(&getSTI(), getContext());
6621 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6622
6623 return false;
6624}
6625
6626bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6627 StringRef KernelName;
6628 if (!parseId(KernelName, "expected symbol name"))
6629 return true;
6630
6631 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6633
6634 KernelScope.initialize(getContext());
6635 return false;
6636}
6637
6638bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6639 if (!getSTI().getTargetTriple().isAMDGCN()) {
6640 return Error(getLoc(),
6641 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6642 "architectures");
6643 }
6644
6645 auto TargetIDDirective = getLexer().getTok().getStringContents();
6646 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6647 return Error(getParser().getTok().getLoc(), "target id must match options");
6648
6649 getTargetStreamer().EmitISAVersion();
6650 Lex();
6651
6652 return false;
6653}
6654
6655bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6656 assert(isHsaAbi(getSTI()));
6657
6658 std::string HSAMetadataString;
6659 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6660 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6661 return true;
6662
6663 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6664 return Error(getLoc(), "invalid HSA metadata");
6665
6666 return false;
6667}
6668
6669/// Common code to parse out a block of text (typically YAML) between start and
6670/// end directives.
6671bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6672 const char *AssemblerDirectiveEnd,
6673 std::string &CollectString) {
6674
6675 raw_string_ostream CollectStream(CollectString);
6676
6677 getLexer().setSkipSpace(false);
6678
6679 bool FoundEnd = false;
6680 while (!isToken(AsmToken::Eof)) {
6681 while (isToken(AsmToken::Space)) {
6682 CollectStream << getTokenStr();
6683 Lex();
6684 }
6685
6686 if (trySkipId(AssemblerDirectiveEnd)) {
6687 FoundEnd = true;
6688 break;
6689 }
6690
6691 CollectStream << Parser.parseStringToEndOfStatement()
6692 << getContext().getAsmInfo().getSeparatorString();
6693
6694 Parser.eatToEndOfStatement();
6695 }
6696
6697 getLexer().setSkipSpace(true);
6698
6699 if (isToken(AsmToken::Eof) && !FoundEnd) {
6700 return TokError(Twine("expected directive ") +
6701 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6702 }
6703
6704 return false;
6705}
6706
6707/// Parse the assembler directive for new MsgPack-format PAL metadata.
6708bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6709 std::string String;
6710 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6712 return true;
6713
6714 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6715 if (!PALMetadata->setFromString(String))
6716 return Error(getLoc(), "invalid PAL metadata");
6717 return false;
6718}
6719
6720/// Parse the assembler directive for old linear-format PAL metadata.
6721bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6722 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6723 return Error(getLoc(),
6724 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6725 "not available on non-amdpal OSes")).str());
6726 }
6727
6728 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6729 PALMetadata->setLegacy();
6730 for (;;) {
6731 uint32_t Key, Value;
6732 if (ParseAsAbsoluteExpression(Key)) {
6733 return TokError(Twine("invalid value in ") +
6735 }
6736 if (!trySkipToken(AsmToken::Comma)) {
6737 return TokError(Twine("expected an even number of values in ") +
6739 }
6740 if (ParseAsAbsoluteExpression(Value)) {
6741 return TokError(Twine("invalid value in ") +
6743 }
6744 PALMetadata->setRegister(Key, Value);
6745 if (!trySkipToken(AsmToken::Comma))
6746 break;
6747 }
6748 return false;
6749}
6750
6751/// ParseDirectiveAMDGPULDS
6752/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6753bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6754 if (getParser().checkForValidSection())
6755 return true;
6756
6757 StringRef Name;
6758 SMLoc NameLoc = getLoc();
6759 if (getParser().parseIdentifier(Name))
6760 return TokError("expected identifier in directive");
6761
6762 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6763 if (getParser().parseComma())
6764 return true;
6765
6766 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(getSTI());
6767
6768 int64_t Size;
6769 SMLoc SizeLoc = getLoc();
6770 if (getParser().parseAbsoluteExpression(Size))
6771 return true;
6772 if (Size < 0)
6773 return Error(SizeLoc, "size must be non-negative");
6774 if (Size > LocalMemorySize)
6775 return Error(SizeLoc, "size is too large");
6776
6777 int64_t Alignment = 4;
6778 if (trySkipToken(AsmToken::Comma)) {
6779 SMLoc AlignLoc = getLoc();
6780 if (getParser().parseAbsoluteExpression(Alignment))
6781 return true;
6782 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6783 return Error(AlignLoc, "alignment must be a power of two");
6784
6785 // Alignment larger than the size of LDS is possible in theory, as long
6786 // as the linker manages to place to symbol at address 0, but we do want
6787 // to make sure the alignment fits nicely into a 32-bit integer.
6788 if (Alignment >= 1u << 31)
6789 return Error(AlignLoc, "alignment is too large");
6790 }
6791
6792 if (parseEOL())
6793 return true;
6794
6795 Symbol->redefineIfPossible();
6796 if (!Symbol->isUndefined())
6797 return Error(NameLoc, "invalid symbol redefinition");
6798
6799 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6800 return false;
6801}
6802
6803bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6804 if (getParser().checkForValidSection())
6805 return true;
6806
6807 StringRef FuncName;
6808 if (getParser().parseIdentifier(FuncName))
6809 return TokError("expected symbol name after .amdgpu_info");
6810
6811 MCSymbol *FuncSym = getContext().getOrCreateSymbol(FuncName);
6812 AMDGPU::InfoSectionData ParsedInfoData;
6813 AMDGPU::FuncInfo FI;
6814 FI.Sym = FuncSym;
6815 bool HasScalarAttrs = false;
6816
6817 while (true) {
6818 while (trySkipToken(AsmToken::EndOfStatement))
6819 ;
6820
6821 StringRef ID;
6822 SMLoc IDLoc = getLoc();
6823 if (!parseId(ID, "expected directive or .end_amdgpu_info"))
6824 return true;
6825
6826 if (ID == ".end_amdgpu_info")
6827 break;
6828
6829 // Every per-entry directive shares the `.amdgpu_` namespace prefix; strip
6830 // it once and dispatch on the distinguishing suffix below. The unstripped
6831 // ID is preserved for diagnostics.
6832 StringRef Dir = ID;
6833 if (!Dir.consume_front(".amdgpu_"))
6834 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6835
6836 if (Dir == "flags") {
6837 int64_t Val;
6838 if (getParser().parseAbsoluteExpression(Val))
6839 return true;
6840 auto Flags = static_cast<AMDGPU::FuncInfoFlags>(Val);
6841 FI.UsesVCC = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6842 FI.UsesFlatScratch =
6843 !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6844 FI.HasDynStack = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_HAS_DYN_STACK);
6845 HasScalarAttrs = true;
6846 } else if (Dir == "num_sgpr") {
6847 int64_t Val;
6848 if (getParser().parseAbsoluteExpression(Val))
6849 return true;
6850 FI.NumSGPR = static_cast<uint32_t>(Val);
6851 HasScalarAttrs = true;
6852 } else if (Dir == "num_vgpr") {
6853 int64_t Val;
6854 if (getParser().parseAbsoluteExpression(Val))
6855 return true;
6856 FI.NumArchVGPR = static_cast<uint32_t>(Val);
6857 HasScalarAttrs = true;
6858 } else if (Dir == "num_agpr") {
6859 int64_t Val;
6860 if (getParser().parseAbsoluteExpression(Val))
6861 return true;
6862 FI.NumAccVGPR = static_cast<uint32_t>(Val);
6863 HasScalarAttrs = true;
6864 } else if (Dir == "private_segment_size") {
6865 int64_t Val;
6866 if (getParser().parseAbsoluteExpression(Val))
6867 return true;
6868 FI.PrivateSegmentSize = static_cast<uint32_t>(Val);
6869 HasScalarAttrs = true;
6870 } else if (Dir == "use") {
6871 StringRef ResName;
6872 if (getParser().parseIdentifier(ResName))
6873 return TokError("expected resource symbol for .amdgpu_use");
6874 ParsedInfoData.Uses.push_back(
6875 {FuncSym, getContext().getOrCreateSymbol(ResName)});
6876 } else if (Dir == "call") {
6877 StringRef DstName;
6878 if (getParser().parseIdentifier(DstName))
6879 return TokError("expected callee symbol for .amdgpu_call");
6880 ParsedInfoData.Calls.push_back(
6881 {FuncSym, getContext().getOrCreateSymbol(DstName)});
6882 } else if (Dir == "indirect_call") {
6883 std::string TypeId;
6884 if (getParser().parseEscapedString(TypeId))
6885 return TokError("expected type ID string for .amdgpu_indirect_call");
6886 ParsedInfoData.IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6887 } else if (Dir == "typeid") {
6888 std::string TypeId;
6889 if (getParser().parseEscapedString(TypeId))
6890 return TokError("expected type ID string for .amdgpu_typeid");
6891 ParsedInfoData.TypeIds.push_back({FuncSym, std::move(TypeId)});
6892 } else {
6893 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6894 }
6895 }
6896
6897 if (HasScalarAttrs)
6898 ParsedInfoData.Funcs.push_back(std::move(FI));
6899
6900 AMDGPU::InfoSectionData &Data = InfoData ? *InfoData : InfoData.emplace();
6901 for (AMDGPU::FuncInfo &Func : ParsedInfoData.Funcs)
6902 Data.Funcs.push_back(std::move(Func));
6903 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.Uses)
6904 Data.Uses.push_back(Use);
6905 for (std::pair<MCSymbol *, MCSymbol *> &Call : ParsedInfoData.Calls)
6906 Data.Calls.push_back(Call);
6907 for (std::pair<MCSymbol *, std::string> &IndirectCall :
6908 ParsedInfoData.IndirectCalls)
6909 Data.IndirectCalls.push_back(std::move(IndirectCall));
6910 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.TypeIds)
6911 Data.TypeIds.push_back(std::move(TypeId));
6912
6913 return false;
6914}
6915
6916void AMDGPUAsmParser::onEndOfFile() {
6917 if (InfoData)
6918 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6919}
6920
6921bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6922 StringRef IDVal = DirectiveID.getString();
6923
6924 if (isHsaAbi(getSTI())) {
6925 if (IDVal == ".amdhsa_kernel")
6926 return ParseDirectiveAMDHSAKernel();
6927
6928 if (IDVal == ".amdhsa_code_object_version")
6929 return ParseDirectiveAMDHSACodeObjectVersion();
6930
6931 // TODO: Restructure/combine with PAL metadata directive.
6933 return ParseDirectiveHSAMetadata();
6934 } else {
6935 if (IDVal == ".amd_kernel_code_t")
6936 return ParseDirectiveAMDKernelCodeT();
6937
6938 if (IDVal == ".amdgpu_hsa_kernel")
6939 return ParseDirectiveAMDGPUHsaKernel();
6940
6941 if (IDVal == ".amd_amdgpu_isa")
6942 return ParseDirectiveISAVersion();
6943
6945 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6946 Twine(" directive is "
6947 "not available on non-amdhsa OSes"))
6948 .str());
6949 }
6950 }
6951
6952 if (IDVal == ".amdgcn_target")
6953 return ParseDirectiveAMDGCNTarget();
6954
6955 if (IDVal == ".amdgpu_lds")
6956 return ParseDirectiveAMDGPULDS();
6957
6958 if (IDVal == ".amdgpu_info")
6959 return ParseDirectiveAMDGPUInfo();
6960
6961 if (IDVal == PALMD::AssemblerDirectiveBegin)
6962 return ParseDirectivePALMetadataBegin();
6963
6964 if (IDVal == PALMD::AssemblerDirective)
6965 return ParseDirectivePALMetadata();
6966
6967 return true;
6968}
6969
6970bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6971 MCRegister Reg) {
6972 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6973 return isGFX9Plus();
6974
6975 // GFX10+ has 2 more SGPRs 104 and 105.
6976 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6977 return hasSGPR104_SGPR105();
6978
6979 switch (Reg.id()) {
6980 case SRC_SHARED_BASE_LO:
6981 case SRC_SHARED_BASE:
6982 case SRC_SHARED_LIMIT_LO:
6983 case SRC_SHARED_LIMIT:
6984 case SRC_PRIVATE_BASE_LO:
6985 case SRC_PRIVATE_BASE:
6986 case SRC_PRIVATE_LIMIT_LO:
6987 case SRC_PRIVATE_LIMIT:
6988 return isGFX9Plus();
6989 case SRC_FLAT_SCRATCH_BASE_LO:
6990 case SRC_FLAT_SCRATCH_BASE_HI:
6991 return hasGloballyAddressableScratch();
6992 case SRC_POPS_EXITING_WAVE_ID:
6993 return isGFX9Plus() && !isGFX11Plus();
6994 case TBA:
6995 case TBA_LO:
6996 case TBA_HI:
6997 case TMA:
6998 case TMA_LO:
6999 case TMA_HI:
7000 return !isGFX9Plus();
7001 case XNACK_MASK:
7002 case XNACK_MASK_LO:
7003 case XNACK_MASK_HI:
7004 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7005 case SGPR_NULL:
7006 return isGFX10Plus();
7007 case SRC_EXECZ:
7008 case SRC_VCCZ:
7009 return !isGFX11Plus();
7010 default:
7011 break;
7012 }
7013
7014 if (isCI())
7015 return true;
7016
7017 if (isSI() || isGFX10Plus()) {
7018 // No flat_scr on SI.
7019 // On GFX10Plus flat scratch is not a valid register operand and can only be
7020 // accessed with s_setreg/s_getreg.
7021 switch (Reg.id()) {
7022 case FLAT_SCR:
7023 case FLAT_SCR_LO:
7024 case FLAT_SCR_HI:
7025 return false;
7026 default:
7027 return true;
7028 }
7029 }
7030
7031 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
7032 // SI/CI have.
7033 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
7034 return hasSGPR102_SGPR103();
7035
7036 return true;
7037}
7038
7039ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
7040 StringRef Mnemonic,
7041 OperandMode Mode) {
7042 ParseStatus Res = parseVOPD(Operands);
7043 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7044 return Res;
7045
7046 // Try to parse with a custom parser
7047 Res = MatchOperandParserImpl(Operands, Mnemonic);
7048
7049 // If we successfully parsed the operand or if there as an error parsing,
7050 // we are done.
7051 //
7052 // If we are parsing after we reach EndOfStatement then this means we
7053 // are appending default values to the Operands list. This is only done
7054 // by custom parser, so we shouldn't continue on to the generic parsing.
7055 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7056 return Res;
7057
7058 SMLoc RBraceLoc;
7059 SMLoc LBraceLoc = getLoc();
7060 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
7061 unsigned Prefix = Operands.size();
7062
7063 for (;;) {
7064 auto Loc = getLoc();
7065 Res = parseReg(Operands);
7066 if (Res.isNoMatch())
7067 Error(Loc, "expected a register");
7068 if (!Res.isSuccess())
7069 return ParseStatus::Failure;
7070
7071 RBraceLoc = getLoc();
7072 if (trySkipToken(AsmToken::RBrac))
7073 break;
7074
7075 if (!skipToken(AsmToken::Comma,
7076 "expected a comma or a closing square bracket"))
7077 return ParseStatus::Failure;
7078 }
7079
7080 if (Operands.size() - Prefix > 1) {
7081 Operands.insert(Operands.begin() + Prefix,
7082 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
7083 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
7084 }
7085
7086 return ParseStatus::Success;
7087 }
7088
7089 return parseRegOrImm(Operands);
7090}
7091
7092StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7093 // Clear any forced encodings from the previous instruction.
7094 setForcedEncodingSize(0);
7095 setForcedDPP(false);
7096 setForcedSDWA(false);
7097
7098 if (Name.consume_back("_e64_dpp")) {
7099 setForcedDPP(true);
7100 setForcedEncodingSize(64);
7101 return Name;
7102 }
7103 if (Name.consume_back("_e64")) {
7104 setForcedEncodingSize(64);
7105 return Name;
7106 }
7107 if (Name.consume_back("_e32")) {
7108 setForcedEncodingSize(32);
7109 return Name;
7110 }
7111 if (Name.consume_back("_dpp")) {
7112 setForcedDPP(true);
7113 return Name;
7114 }
7115 if (Name.consume_back("_sdwa")) {
7116 setForcedSDWA(true);
7117 return Name;
7118 }
7119 return Name;
7120}
7121
7122static void applyMnemonicAliases(StringRef &Mnemonic,
7123 const FeatureBitset &Features,
7124 unsigned VariantID);
7125
7126bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
7127 StringRef Name, SMLoc NameLoc,
7128 OperandVector &Operands) {
7129 // Add the instruction mnemonic
7130 Name = parseMnemonicSuffix(Name);
7131
7132 // If the target architecture uses MnemonicAlias, call it here to parse
7133 // operands correctly.
7134 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
7135
7136 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
7137
7138 bool IsMIMG = Name.starts_with("image_");
7139
7140 while (!trySkipToken(AsmToken::EndOfStatement)) {
7141 OperandMode Mode = OperandMode_Default;
7142 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
7143 Mode = OperandMode_NSA;
7144 ParseStatus Res = parseOperand(Operands, Name, Mode);
7145
7146 if (!Res.isSuccess()) {
7147 checkUnsupportedInstruction(Name, NameLoc);
7148 if (!Parser.hasPendingError()) {
7149 // FIXME: use real operand location rather than the current location.
7150 StringRef Msg = Res.isFailure() ? "failed parsing operand."
7151 : "not a valid operand.";
7152 Error(getLoc(), Msg);
7153 }
7154 while (!trySkipToken(AsmToken::EndOfStatement)) {
7155 lex();
7156 }
7157 return true;
7158 }
7159
7160 // Eat the comma or space if there is one.
7161 trySkipToken(AsmToken::Comma);
7162 }
7163
7164 return false;
7165}
7166
7167//===----------------------------------------------------------------------===//
7168// Utility functions
7169//===----------------------------------------------------------------------===//
7170
7171ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7172 OperandVector &Operands) {
7173 SMLoc S = getLoc();
7174 if (!trySkipId(Name))
7175 return ParseStatus::NoMatch;
7176
7177 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7178 return ParseStatus::Success;
7179}
7180
7181ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7182 int64_t &IntVal) {
7183
7184 if (!trySkipId(Prefix, AsmToken::Colon))
7185 return ParseStatus::NoMatch;
7186
7188}
7189
7190ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7191 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7192 std::function<bool(int64_t &)> ConvertResult) {
7193 SMLoc S = getLoc();
7194 int64_t Value = 0;
7195
7196 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7197 if (!Res.isSuccess())
7198 return Res;
7199
7200 if (ConvertResult && !ConvertResult(Value)) {
7201 Error(S, "invalid " + StringRef(Prefix) + " value.");
7202 }
7203
7204 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7205 return ParseStatus::Success;
7206}
7207
7208ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7209 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7210 bool (*ConvertResult)(int64_t &)) {
7211 SMLoc S = getLoc();
7212 if (!trySkipId(Prefix, AsmToken::Colon))
7213 return ParseStatus::NoMatch;
7214
7215 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7216 return ParseStatus::Failure;
7217
7218 unsigned Val = 0;
7219 const unsigned MaxSize = 4;
7220
7221 // FIXME: How to verify the number of elements matches the number of src
7222 // operands?
7223 for (int I = 0; ; ++I) {
7224 int64_t Op;
7225 SMLoc Loc = getLoc();
7226 if (!parseExpr(Op))
7227 return ParseStatus::Failure;
7228
7229 if (Op != 0 && Op != 1)
7230 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7231
7232 Val |= (Op << I);
7233
7234 if (trySkipToken(AsmToken::RBrac))
7235 break;
7236
7237 if (I + 1 == MaxSize)
7238 return Error(getLoc(), "expected a closing square bracket");
7239
7240 if (!skipToken(AsmToken::Comma, "expected a comma"))
7241 return ParseStatus::Failure;
7242 }
7243
7244 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7245 return ParseStatus::Success;
7246}
7247
7248ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7249 OperandVector &Operands,
7250 AMDGPUOperand::ImmTy ImmTy,
7251 bool IgnoreNegative) {
7252 int64_t Bit;
7253 SMLoc S = getLoc();
7254
7255 if (trySkipId(Name)) {
7256 Bit = 1;
7257 } else if (trySkipId("no", Name)) {
7258 if (IgnoreNegative)
7259 return ParseStatus::Success;
7260 Bit = 0;
7261 } else {
7262 return ParseStatus::NoMatch;
7263 }
7264
7265 if (Name == "r128" && !hasMIMG_R128())
7266 return Error(S, "r128 modifier is not supported on this GPU");
7267 if (Name == "a16" && !hasA16())
7268 return Error(S, "a16 modifier is not supported on this GPU");
7269
7270 if (Bit == 0 && Name == "gds") {
7271 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7272 if (Mnemo.starts_with("ds_gws"))
7273 return Error(S, "nogds is not allowed");
7274 }
7275
7276 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7277 ImmTy = AMDGPUOperand::ImmTyR128A16;
7278
7279 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7280 return ParseStatus::Success;
7281}
7282
7283unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7284 bool &Disabling) const {
7285 Disabling = Id.consume_front("no");
7286
7287 if (isGFX940() && !Mnemo.starts_with("s_")) {
7288 return StringSwitch<unsigned>(Id)
7289 .Case("nt", AMDGPU::CPol::NT)
7290 .Case("sc0", AMDGPU::CPol::SC0)
7291 .Case("sc1", AMDGPU::CPol::SC1)
7292 .Default(0);
7293 }
7294
7295 return StringSwitch<unsigned>(Id)
7296 .Case("dlc", AMDGPU::CPol::DLC)
7297 .Case("glc", AMDGPU::CPol::GLC)
7298 .Case("scc", AMDGPU::CPol::SCC)
7299 .Case("slc", AMDGPU::CPol::SLC)
7300 .Default(0);
7301}
7302
7303ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7304 if (isGFX12Plus()) {
7305 SMLoc StringLoc = getLoc();
7306
7307 int64_t CPolVal = 0;
7308 ParseStatus ResTH = ParseStatus::NoMatch;
7309 ParseStatus ResScope = ParseStatus::NoMatch;
7310 ParseStatus ResNV = ParseStatus::NoMatch;
7311 ParseStatus ResScal = ParseStatus::NoMatch;
7312
7313 for (;;) {
7314 if (ResTH.isNoMatch()) {
7315 int64_t TH;
7316 ResTH = parseTH(Operands, TH);
7317 if (ResTH.isFailure())
7318 return ResTH;
7319 if (ResTH.isSuccess()) {
7320 CPolVal |= TH;
7321 continue;
7322 }
7323 }
7324
7325 if (ResScope.isNoMatch()) {
7326 int64_t Scope;
7327 ResScope = parseScope(Operands, Scope);
7328 if (ResScope.isFailure())
7329 return ResScope;
7330 if (ResScope.isSuccess()) {
7331 CPolVal |= Scope;
7332 continue;
7333 }
7334 }
7335
7336 // NV bit exists on GFX12+, but does something starting from GFX1250.
7337 // Allow parsing on all GFX12 and fail on validation for better
7338 // diagnostics.
7339 if (ResNV.isNoMatch()) {
7340 if (trySkipId("nv")) {
7341 ResNV = ParseStatus::Success;
7342 CPolVal |= CPol::NV;
7343 continue;
7344 } else if (trySkipId("no", "nv")) {
7345 ResNV = ParseStatus::Success;
7346 continue;
7347 }
7348 }
7349
7350 if (ResScal.isNoMatch()) {
7351 if (trySkipId("scale_offset")) {
7352 ResScal = ParseStatus::Success;
7353 CPolVal |= CPol::SCAL;
7354 continue;
7355 } else if (trySkipId("no", "scale_offset")) {
7356 ResScal = ParseStatus::Success;
7357 continue;
7358 }
7359 }
7360
7361 break;
7362 }
7363
7364 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7365 ResScal.isNoMatch())
7366 return ParseStatus::NoMatch;
7367
7368 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7369 AMDGPUOperand::ImmTyCPol));
7370 return ParseStatus::Success;
7371 }
7372
7373 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7374 SMLoc OpLoc = getLoc();
7375 unsigned Enabled = 0, Seen = 0;
7376 for (;;) {
7377 SMLoc S = getLoc();
7378 bool Disabling;
7379 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7380 if (!CPol)
7381 break;
7382
7383 lex();
7384
7385 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7386 return Error(S, "dlc modifier is not supported on this GPU");
7387
7388 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7389 return Error(S, "scc modifier is not supported on this GPU");
7390
7391 if (Seen & CPol)
7392 return Error(S, "duplicate cache policy modifier");
7393
7394 if (!Disabling)
7395 Enabled |= CPol;
7396
7397 Seen |= CPol;
7398 }
7399
7400 if (!Seen)
7401 return ParseStatus::NoMatch;
7402
7403 Operands.push_back(
7404 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7405 return ParseStatus::Success;
7406}
7407
7408ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7409 int64_t &Scope) {
7410 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7412
7413 ParseStatus Res = parseStringOrIntWithPrefix(
7414 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7415 Scope);
7416
7417 if (Res.isSuccess())
7418 Scope = Scopes[Scope];
7419
7420 return Res;
7421}
7422
7423ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7424 TH = AMDGPU::CPol::TH_RT; // default
7425
7426 StringRef Value;
7427 SMLoc StringLoc;
7428 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7429 if (!Res.isSuccess())
7430 return Res;
7431
7432 if (Value == "TH_DEFAULT")
7434 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7435 Value == "TH_LOAD_NT_WB") {
7436 return Error(StringLoc, "invalid th value");
7437 } else if (Value.consume_front("TH_ATOMIC_")) {
7439 } else if (Value.consume_front("TH_LOAD_")) {
7441 } else if (Value.consume_front("TH_STORE_")) {
7443 } else {
7444 return Error(StringLoc, "invalid th value");
7445 }
7446
7447 if (Value == "BYPASS")
7449
7450 if (TH != 0) {
7452 TH |= StringSwitch<int64_t>(Value)
7453 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7454 .Case("RT", AMDGPU::CPol::TH_RT)
7455 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7456 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7457 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7459 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7460 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7462 .Default(0xffffffff);
7463 else
7464 TH |= StringSwitch<int64_t>(Value)
7465 .Case("RT", AMDGPU::CPol::TH_RT)
7466 .Case("NT", AMDGPU::CPol::TH_NT)
7467 .Case("HT", AMDGPU::CPol::TH_HT)
7468 .Case("LU", AMDGPU::CPol::TH_LU)
7469 .Case("WB", AMDGPU::CPol::TH_WB)
7470 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7471 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7472 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7473 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7474 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7475 .Default(0xffffffff);
7476 }
7477
7478 if (TH == 0xffffffff)
7479 return Error(StringLoc, "invalid th value");
7480
7481 return ParseStatus::Success;
7482}
7483
7484static void
7486 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7487 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7488 std::optional<unsigned> InsertAt = std::nullopt) {
7489 auto i = OptionalIdx.find(ImmT);
7490 if (i != OptionalIdx.end()) {
7491 unsigned Idx = i->second;
7492 const AMDGPUOperand &Op =
7493 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7494 if (InsertAt)
7495 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7496 else
7497 Op.addImmOperands(Inst, 1);
7498 } else {
7499 if (InsertAt.has_value())
7500 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7501 else
7503 }
7504}
7505
7506ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7507 StringRef &Value,
7508 SMLoc &StringLoc) {
7509 if (!trySkipId(Prefix, AsmToken::Colon))
7510 return ParseStatus::NoMatch;
7511
7512 StringLoc = getLoc();
7513 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7515}
7516
7517ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7518 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7519 int64_t &IntVal) {
7520 if (!trySkipId(Name, AsmToken::Colon))
7521 return ParseStatus::NoMatch;
7522
7523 SMLoc StringLoc = getLoc();
7524
7525 StringRef Value;
7526 if (isToken(AsmToken::Identifier)) {
7527 Value = getTokenStr();
7528 lex();
7529
7530 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7531 if (Value == Ids[IntVal])
7532 break;
7533 } else if (!parseExpr(IntVal))
7534 return ParseStatus::Failure;
7535
7536 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7537 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7538
7539 return ParseStatus::Success;
7540}
7541
7542ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7543 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7544 AMDGPUOperand::ImmTy Type) {
7545 SMLoc S = getLoc();
7546 int64_t IntVal;
7547
7548 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7549 if (Res.isSuccess())
7550 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7551
7552 return Res;
7553}
7554
7555//===----------------------------------------------------------------------===//
7556// MTBUF format
7557//===----------------------------------------------------------------------===//
7558
7559bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7560 int64_t MaxVal,
7561 int64_t &Fmt) {
7562 int64_t Val;
7563 SMLoc Loc = getLoc();
7564
7565 auto Res = parseIntWithPrefix(Pref, Val);
7566 if (Res.isFailure())
7567 return false;
7568 if (Res.isNoMatch())
7569 return true;
7570
7571 if (Val < 0 || Val > MaxVal) {
7572 Error(Loc, Twine("out of range ", StringRef(Pref)));
7573 return false;
7574 }
7575
7576 Fmt = Val;
7577 return true;
7578}
7579
7580ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7581 AMDGPUOperand::ImmTy ImmTy) {
7582 const char *Pref = "index_key";
7583 int64_t ImmVal = 0;
7584 SMLoc Loc = getLoc();
7585 auto Res = parseIntWithPrefix(Pref, ImmVal);
7586 if (!Res.isSuccess())
7587 return Res;
7588
7589 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7590 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7591 (ImmVal < 0 || ImmVal > 1))
7592 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7593
7594 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7595 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7596
7597 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7598 return ParseStatus::Success;
7599}
7600
7601ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7602 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7603}
7604
7605ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7606 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7607}
7608
7609ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7610 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7611}
7612
7613ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7614 StringRef Name,
7615 AMDGPUOperand::ImmTy Type) {
7616 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7617 Type);
7618}
7619
7620ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7621 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7622 AMDGPUOperand::ImmTyMatrixAFMT);
7623}
7624
7625ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7626 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7627 AMDGPUOperand::ImmTyMatrixBFMT);
7628}
7629
7630ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7631 StringRef Name,
7632 AMDGPUOperand::ImmTy Type) {
7633 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7634 Type);
7635}
7636
7637ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7638 return tryParseMatrixScale(Operands, "matrix_a_scale",
7639 AMDGPUOperand::ImmTyMatrixAScale);
7640}
7641
7642ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7643 return tryParseMatrixScale(Operands, "matrix_b_scale",
7644 AMDGPUOperand::ImmTyMatrixBScale);
7645}
7646
7647ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7648 StringRef Name,
7649 AMDGPUOperand::ImmTy Type) {
7650 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7651 Type);
7652}
7653
7654ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7655 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7656 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7657}
7658
7659ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7660 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7661 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7662}
7663
7664// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7665// values to live in a joint format operand in the MCInst encoding.
7666ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7667 using namespace llvm::AMDGPU::MTBUFFormat;
7668
7669 int64_t Dfmt = DFMT_UNDEF;
7670 int64_t Nfmt = NFMT_UNDEF;
7671
7672 // dfmt and nfmt can appear in either order, and each is optional.
7673 for (int I = 0; I < 2; ++I) {
7674 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7675 return ParseStatus::Failure;
7676
7677 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7678 return ParseStatus::Failure;
7679
7680 // Skip optional comma between dfmt/nfmt
7681 // but guard against 2 commas following each other.
7682 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7683 !peekToken().is(AsmToken::Comma)) {
7684 trySkipToken(AsmToken::Comma);
7685 }
7686 }
7687
7688 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7689 return ParseStatus::NoMatch;
7690
7691 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7692 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7693
7694 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7695 return ParseStatus::Success;
7696}
7697
7698ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7699 using namespace llvm::AMDGPU::MTBUFFormat;
7700
7701 int64_t Fmt = UFMT_UNDEF;
7702
7703 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7704 return ParseStatus::Failure;
7705
7706 if (Fmt == UFMT_UNDEF)
7707 return ParseStatus::NoMatch;
7708
7709 Format = Fmt;
7710 return ParseStatus::Success;
7711}
7712
7713bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7714 int64_t &Nfmt,
7715 StringRef FormatStr,
7716 SMLoc Loc) {
7717 using namespace llvm::AMDGPU::MTBUFFormat;
7718 int64_t Format;
7719
7720 Format = getDfmt(FormatStr);
7721 if (Format != DFMT_UNDEF) {
7722 Dfmt = Format;
7723 return true;
7724 }
7725
7726 Format = getNfmt(FormatStr, getSTI());
7727 if (Format != NFMT_UNDEF) {
7728 Nfmt = Format;
7729 return true;
7730 }
7731
7732 Error(Loc, "unsupported format");
7733 return false;
7734}
7735
7736ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7737 SMLoc FormatLoc,
7738 int64_t &Format) {
7739 using namespace llvm::AMDGPU::MTBUFFormat;
7740
7741 int64_t Dfmt = DFMT_UNDEF;
7742 int64_t Nfmt = NFMT_UNDEF;
7743 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7744 return ParseStatus::Failure;
7745
7746 if (trySkipToken(AsmToken::Comma)) {
7747 StringRef Str;
7748 SMLoc Loc = getLoc();
7749 if (!parseId(Str, "expected a format string") ||
7750 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7751 return ParseStatus::Failure;
7752 if (Dfmt == DFMT_UNDEF)
7753 return Error(Loc, "duplicate numeric format");
7754 if (Nfmt == NFMT_UNDEF)
7755 return Error(Loc, "duplicate data format");
7756 }
7757
7758 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7759 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7760
7761 if (isGFX10Plus()) {
7762 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7763 if (Ufmt == UFMT_UNDEF)
7764 return Error(FormatLoc, "unsupported format");
7765 Format = Ufmt;
7766 } else {
7767 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7768 }
7769
7770 return ParseStatus::Success;
7771}
7772
7773ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7774 SMLoc Loc,
7775 int64_t &Format) {
7776 using namespace llvm::AMDGPU::MTBUFFormat;
7777
7778 auto Id = getUnifiedFormat(FormatStr, getSTI());
7779 if (Id == UFMT_UNDEF)
7780 return ParseStatus::NoMatch;
7781
7782 if (!isGFX10Plus())
7783 return Error(Loc, "unified format is not supported on this GPU");
7784
7785 Format = Id;
7786 return ParseStatus::Success;
7787}
7788
7789ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7790 using namespace llvm::AMDGPU::MTBUFFormat;
7791 SMLoc Loc = getLoc();
7792
7793 if (!parseExpr(Format))
7794 return ParseStatus::Failure;
7795 if (!isValidFormatEncoding(Format, getSTI()))
7796 return Error(Loc, "out of range format");
7797
7798 return ParseStatus::Success;
7799}
7800
7801ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7802 using namespace llvm::AMDGPU::MTBUFFormat;
7803
7804 if (!trySkipId("format", AsmToken::Colon))
7805 return ParseStatus::NoMatch;
7806
7807 if (trySkipToken(AsmToken::LBrac)) {
7808 StringRef FormatStr;
7809 SMLoc Loc = getLoc();
7810 if (!parseId(FormatStr, "expected a format string"))
7811 return ParseStatus::Failure;
7812
7813 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7814 if (Res.isNoMatch())
7815 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7816 if (!Res.isSuccess())
7817 return Res;
7818
7819 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7820 return ParseStatus::Failure;
7821
7822 return ParseStatus::Success;
7823 }
7824
7825 return parseNumericFormat(Format);
7826}
7827
7828ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7829 using namespace llvm::AMDGPU::MTBUFFormat;
7830
7831 int64_t Format = getDefaultFormatEncoding(getSTI());
7832 ParseStatus Res;
7833 SMLoc Loc = getLoc();
7834
7835 // Parse legacy format syntax.
7836 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7837 if (Res.isFailure())
7838 return Res;
7839
7840 bool FormatFound = Res.isSuccess();
7841
7842 Operands.push_back(
7843 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7844
7845 if (FormatFound)
7846 trySkipToken(AsmToken::Comma);
7847
7848 if (isToken(AsmToken::EndOfStatement)) {
7849 // We are expecting an soffset operand,
7850 // but let matcher handle the error.
7851 return ParseStatus::Success;
7852 }
7853
7854 // Parse soffset.
7855 Res = parseRegOrImm(Operands);
7856 if (!Res.isSuccess())
7857 return Res;
7858
7859 trySkipToken(AsmToken::Comma);
7860
7861 if (!FormatFound) {
7862 Res = parseSymbolicOrNumericFormat(Format);
7863 if (Res.isFailure())
7864 return Res;
7865 if (Res.isSuccess()) {
7866 auto Size = Operands.size();
7867 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7868 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7869 Op.setImm(Format);
7870 }
7871 return ParseStatus::Success;
7872 }
7873
7874 if (isId("format") && peekToken().is(AsmToken::Colon))
7875 return Error(getLoc(), "duplicate format");
7876 return ParseStatus::Success;
7877}
7878
7879ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7880 ParseStatus Res =
7881 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7882 if (Res.isNoMatch()) {
7883 Res = parseIntWithPrefix("inst_offset", Operands,
7884 AMDGPUOperand::ImmTyInstOffset);
7885 }
7886 return Res;
7887}
7888
7889ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7890 ParseStatus Res =
7891 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7892 if (Res.isNoMatch())
7893 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7894 return Res;
7895}
7896
7897ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7898 ParseStatus Res =
7899 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7900 if (Res.isNoMatch()) {
7901 Res =
7902 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7903 }
7904 return Res;
7905}
7906
7907//===----------------------------------------------------------------------===//
7908// Exp
7909//===----------------------------------------------------------------------===//
7910
7911void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7912 OptionalImmIndexMap OptionalIdx;
7913
7914 unsigned OperandIdx[4];
7915 unsigned EnMask = 0;
7916 int SrcIdx = 0;
7917
7918 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7919 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7920
7921 // Add the register arguments
7922 if (Op.isReg()) {
7923 assert(SrcIdx < 4);
7924 OperandIdx[SrcIdx] = Inst.size();
7925 Op.addRegOperands(Inst, 1);
7926 ++SrcIdx;
7927 continue;
7928 }
7929
7930 if (Op.isOff()) {
7931 assert(SrcIdx < 4);
7932 OperandIdx[SrcIdx] = Inst.size();
7933 Inst.addOperand(MCOperand::createReg(MCRegister()));
7934 ++SrcIdx;
7935 continue;
7936 }
7937
7938 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7939 Op.addImmOperands(Inst, 1);
7940 continue;
7941 }
7942
7943 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7944 continue;
7945
7946 // Handle optional arguments
7947 OptionalIdx[Op.getImmTy()] = i;
7948 }
7949
7950 assert(SrcIdx == 4);
7951
7952 bool Compr = false;
7953 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7954 Compr = true;
7955 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7956 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7957 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7958 }
7959
7960 for (auto i = 0; i < SrcIdx; ++i) {
7961 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7962 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7963 }
7964 }
7965
7966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7967 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7968
7969 Inst.addOperand(MCOperand::createImm(EnMask));
7970}
7971
7972//===----------------------------------------------------------------------===//
7973// s_waitcnt
7974//===----------------------------------------------------------------------===//
7975
7976static bool
7978 const AMDGPU::IsaVersion ISA,
7979 int64_t &IntVal,
7980 int64_t CntVal,
7981 bool Saturate,
7982 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7983 unsigned (*decode)(const IsaVersion &Version, unsigned))
7984{
7985 bool Failed = false;
7986
7987 IntVal = encode(ISA, IntVal, CntVal);
7988 if (CntVal != decode(ISA, IntVal)) {
7989 if (Saturate) {
7990 IntVal = encode(ISA, IntVal, -1);
7991 } else {
7992 Failed = true;
7993 }
7994 }
7995 return Failed;
7996}
7997
7998bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7999
8000 SMLoc CntLoc = getLoc();
8001 StringRef CntName = getTokenStr();
8002
8003 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8004 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8005 return false;
8006
8007 int64_t CntVal;
8008 SMLoc ValLoc = getLoc();
8009 if (!parseExpr(CntVal))
8010 return false;
8011
8012 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8013
8014 bool Failed = true;
8015 bool Sat = CntName.ends_with("_sat");
8016
8017 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
8018 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
8019 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
8020 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
8021 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
8022 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
8023 } else {
8024 Error(CntLoc, "invalid counter name " + CntName);
8025 return false;
8026 }
8027
8028 if (Failed) {
8029 Error(ValLoc, "too large value for " + CntName);
8030 return false;
8031 }
8032
8033 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8034 return false;
8035
8036 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8037 if (isToken(AsmToken::EndOfStatement)) {
8038 Error(getLoc(), "expected a counter name");
8039 return false;
8040 }
8041 }
8042
8043 return true;
8044}
8045
8046ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
8047 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8048 int64_t Waitcnt = getWaitcntBitMask(ISA);
8049 SMLoc S = getLoc();
8050
8051 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8052 while (!isToken(AsmToken::EndOfStatement)) {
8053 if (!parseCnt(Waitcnt))
8054 return ParseStatus::Failure;
8055 }
8056 } else {
8057 if (!parseExpr(Waitcnt))
8058 return ParseStatus::Failure;
8059 }
8060
8061 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
8062 return ParseStatus::Success;
8063}
8064
8065bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8066 SMLoc FieldLoc = getLoc();
8067 StringRef FieldName = getTokenStr();
8068 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
8069 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8070 return false;
8071
8072 SMLoc ValueLoc = getLoc();
8073 StringRef ValueName = getTokenStr();
8074 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
8075 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
8076 return false;
8077
8078 unsigned Shift;
8079 if (FieldName == "instid0") {
8080 Shift = 0;
8081 } else if (FieldName == "instskip") {
8082 Shift = 4;
8083 } else if (FieldName == "instid1") {
8084 Shift = 7;
8085 } else {
8086 Error(FieldLoc, "invalid field name " + FieldName);
8087 return false;
8088 }
8089
8090 int Value;
8091 if (Shift == 4) {
8092 // Parse values for instskip.
8093 Value = StringSwitch<int>(ValueName)
8094 .Case("SAME", 0)
8095 .Case("NEXT", 1)
8096 .Case("SKIP_1", 2)
8097 .Case("SKIP_2", 3)
8098 .Case("SKIP_3", 4)
8099 .Case("SKIP_4", 5)
8100 .Default(-1);
8101 } else {
8102 // Parse values for instid0 and instid1.
8103 Value = StringSwitch<int>(ValueName)
8104 .Case("NO_DEP", 0)
8105 .Case("VALU_DEP_1", 1)
8106 .Case("VALU_DEP_2", 2)
8107 .Case("VALU_DEP_3", 3)
8108 .Case("VALU_DEP_4", 4)
8109 .Case("TRANS32_DEP_1", 5)
8110 .Case("TRANS32_DEP_2", 6)
8111 .Case("TRANS32_DEP_3", 7)
8112 .Case("FMA_ACCUM_CYCLE_1", 8)
8113 .Case("SALU_CYCLE_1", 9)
8114 .Case("SALU_CYCLE_2", 10)
8115 .Case("SALU_CYCLE_3", 11)
8116 .Default(-1);
8117 }
8118 if (Value < 0) {
8119 Error(ValueLoc, "invalid value name " + ValueName);
8120 return false;
8121 }
8122
8123 Delay |= Value << Shift;
8124 return true;
8125}
8126
8127ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
8128 int64_t Delay = 0;
8129 SMLoc S = getLoc();
8130
8131 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8132 do {
8133 if (!parseDelay(Delay))
8134 return ParseStatus::Failure;
8135 } while (trySkipToken(AsmToken::Pipe));
8136 } else {
8137 if (!parseExpr(Delay))
8138 return ParseStatus::Failure;
8139 }
8140
8141 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
8142 return ParseStatus::Success;
8143}
8144
8145bool
8146AMDGPUOperand::isSWaitCnt() const {
8147 return isImm();
8148}
8149
8150bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
8151
8152//===----------------------------------------------------------------------===//
8153// DepCtr
8154//===----------------------------------------------------------------------===//
8155
8156void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
8157 StringRef DepCtrName) {
8158 switch (ErrorId) {
8159 case OPR_ID_UNKNOWN:
8160 Error(Loc, Twine("invalid counter name ", DepCtrName));
8161 return;
8162 case OPR_ID_UNSUPPORTED:
8163 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
8164 return;
8165 case OPR_ID_DUPLICATE:
8166 Error(Loc, Twine("duplicate counter name ", DepCtrName));
8167 return;
8168 case OPR_VAL_INVALID:
8169 Error(Loc, Twine("invalid value for ", DepCtrName));
8170 return;
8171 default:
8172 assert(false);
8173 }
8174}
8175
8176bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8177
8178 using namespace llvm::AMDGPU::DepCtr;
8179
8180 SMLoc DepCtrLoc = getLoc();
8181 StringRef DepCtrName = getTokenStr();
8182
8183 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8184 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8185 return false;
8186
8187 int64_t ExprVal;
8188 if (!parseExpr(ExprVal))
8189 return false;
8190
8191 unsigned PrevOprMask = UsedOprMask;
8192 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8193
8194 if (CntVal < 0) {
8195 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8196 return false;
8197 }
8198
8199 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8200 return false;
8201
8202 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8203 if (isToken(AsmToken::EndOfStatement)) {
8204 Error(getLoc(), "expected a counter name");
8205 return false;
8206 }
8207 }
8208
8209 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8210 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8211 return true;
8212}
8213
8214ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8215 using namespace llvm::AMDGPU::DepCtr;
8216
8217 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8218 SMLoc Loc = getLoc();
8219
8220 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8221 unsigned UsedOprMask = 0;
8222 while (!isToken(AsmToken::EndOfStatement)) {
8223 if (!parseDepCtr(DepCtr, UsedOprMask))
8224 return ParseStatus::Failure;
8225 }
8226 } else {
8227 if (!parseExpr(DepCtr))
8228 return ParseStatus::Failure;
8229 }
8230
8231 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8232 return ParseStatus::Success;
8233}
8234
8235bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8236
8237//===----------------------------------------------------------------------===//
8238// hwreg
8239//===----------------------------------------------------------------------===//
8240
8241ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8242 OperandInfoTy &Offset,
8243 OperandInfoTy &Width) {
8244 using namespace llvm::AMDGPU::Hwreg;
8245
8246 if (!trySkipId("hwreg", AsmToken::LParen))
8247 return ParseStatus::NoMatch;
8248
8249 // The register may be specified by name or using a numeric code
8250 HwReg.Loc = getLoc();
8251 if (isToken(AsmToken::Identifier) &&
8252 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8253 HwReg.IsSymbolic = true;
8254 lex(); // skip register name
8255 } else if (!parseExpr(HwReg.Val, "a register name")) {
8256 return ParseStatus::Failure;
8257 }
8258
8259 if (trySkipToken(AsmToken::RParen))
8260 return ParseStatus::Success;
8261
8262 // parse optional params
8263 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8264 return ParseStatus::Failure;
8265
8266 Offset.Loc = getLoc();
8267 if (!parseExpr(Offset.Val))
8268 return ParseStatus::Failure;
8269
8270 if (!skipToken(AsmToken::Comma, "expected a comma"))
8271 return ParseStatus::Failure;
8272
8273 Width.Loc = getLoc();
8274 if (!parseExpr(Width.Val) ||
8275 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8276 return ParseStatus::Failure;
8277
8278 return ParseStatus::Success;
8279}
8280
8281ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8282 using namespace llvm::AMDGPU::Hwreg;
8283
8284 int64_t ImmVal = 0;
8285 SMLoc Loc = getLoc();
8286
8287 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8288 HwregId::Default);
8289 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8290 HwregOffset::Default);
8291 struct : StructuredOpField {
8292 using StructuredOpField::StructuredOpField;
8293 bool validate(AMDGPUAsmParser &Parser) const override {
8294 if (!isUIntN(Width, Val - 1))
8295 return Error(Parser, "only values from 1 to 32 are legal");
8296 return true;
8297 }
8298 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8299 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8300
8301 if (Res.isNoMatch())
8302 Res = parseHwregFunc(HwReg, Offset, Width);
8303
8304 if (Res.isSuccess()) {
8305 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8306 return ParseStatus::Failure;
8307 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8308 }
8309
8310 if (Res.isNoMatch() &&
8311 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8313
8314 if (!Res.isSuccess())
8315 return ParseStatus::Failure;
8316
8317 if (!isUInt<16>(ImmVal))
8318 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8319 Operands.push_back(
8320 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8321 return ParseStatus::Success;
8322}
8323
8324bool AMDGPUOperand::isHwreg() const {
8325 return isImmTy(ImmTyHwreg);
8326}
8327
8328//===----------------------------------------------------------------------===//
8329// sendmsg
8330//===----------------------------------------------------------------------===//
8331
8332bool
8333AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8334 OperandInfoTy &Op,
8335 OperandInfoTy &Stream) {
8336 using namespace llvm::AMDGPU::SendMsg;
8337
8338 Msg.Loc = getLoc();
8339 if (isToken(AsmToken::Identifier) &&
8340 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8341 Msg.IsSymbolic = true;
8342 lex(); // skip message name
8343 } else if (!parseExpr(Msg.Val, "a message name")) {
8344 return false;
8345 }
8346
8347 if (trySkipToken(AsmToken::Comma)) {
8348 Op.IsDefined = true;
8349 Op.Loc = getLoc();
8350 if (isToken(AsmToken::Identifier) &&
8351 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8353 lex(); // skip operation name
8354 } else if (!parseExpr(Op.Val, "an operation name")) {
8355 return false;
8356 }
8357
8358 if (trySkipToken(AsmToken::Comma)) {
8359 Stream.IsDefined = true;
8360 Stream.Loc = getLoc();
8361 if (!parseExpr(Stream.Val))
8362 return false;
8363 }
8364 }
8365
8366 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8367}
8368
8369bool
8370AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8371 const OperandInfoTy &Op,
8372 const OperandInfoTy &Stream) {
8373 using namespace llvm::AMDGPU::SendMsg;
8374
8375 // Validation strictness depends on whether message is specified
8376 // in a symbolic or in a numeric form. In the latter case
8377 // only encoding possibility is checked.
8378 bool Strict = Msg.IsSymbolic;
8379
8380 if (Strict) {
8381 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8382 Error(Msg.Loc, "specified message id is not supported on this GPU");
8383 return false;
8384 }
8385 } else {
8386 if (!isValidMsgId(Msg.Val, getSTI())) {
8387 Error(Msg.Loc, "invalid message id");
8388 return false;
8389 }
8390 }
8391 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8392 if (Op.IsDefined) {
8393 Error(Op.Loc, "message does not support operations");
8394 } else {
8395 Error(Msg.Loc, "missing message operation");
8396 }
8397 return false;
8398 }
8399 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8400 if (Op.Val == OPR_ID_UNSUPPORTED)
8401 Error(Op.Loc, "specified operation id is not supported on this GPU");
8402 else
8403 Error(Op.Loc, "invalid operation id");
8404 return false;
8405 }
8406 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8407 Stream.IsDefined) {
8408 Error(Stream.Loc, "message operation does not support streams");
8409 return false;
8410 }
8411 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8412 Error(Stream.Loc, "invalid message stream id");
8413 return false;
8414 }
8415 return true;
8416}
8417
8418ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8419 using namespace llvm::AMDGPU::SendMsg;
8420
8421 int64_t ImmVal = 0;
8422 SMLoc Loc = getLoc();
8423
8424 if (trySkipId("sendmsg", AsmToken::LParen)) {
8425 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8426 OperandInfoTy Op(OP_NONE_);
8427 OperandInfoTy Stream(STREAM_ID_NONE_);
8428 if (parseSendMsgBody(Msg, Op, Stream) &&
8429 validateSendMsg(Msg, Op, Stream)) {
8430 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8431 } else {
8432 return ParseStatus::Failure;
8433 }
8434 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8435 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8436 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8437 } else {
8438 return ParseStatus::Failure;
8439 }
8440
8441 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8442 return ParseStatus::Success;
8443}
8444
8445bool AMDGPUOperand::isSendMsg() const {
8446 return isImmTy(ImmTySendMsg);
8447}
8448
8449ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8450 using namespace llvm::AMDGPU::WaitEvent;
8451
8452 SMLoc Loc = getLoc();
8453 int64_t ImmVal = 0;
8454
8455 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8456 1, 0);
8457 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8458
8459 StructuredOpField *TargetBitfield =
8460 isGFX11() ? &DontWaitExportReady : &ExportReady;
8461
8462 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8463 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8465 else if (Res.isSuccess()) {
8466 if (!validateStructuredOpFields({TargetBitfield}))
8467 return ParseStatus::Failure;
8468 ImmVal = TargetBitfield->Val;
8469 }
8470
8471 if (!Res.isSuccess())
8472 return ParseStatus::Failure;
8473
8474 if (!isUInt<16>(ImmVal))
8475 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8476
8477 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8478 AMDGPUOperand::ImmTyWaitEvent));
8479 return ParseStatus::Success;
8480}
8481
8482bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8483
8484//===----------------------------------------------------------------------===//
8485// v_interp
8486//===----------------------------------------------------------------------===//
8487
8488ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8489 StringRef Str;
8490 SMLoc S = getLoc();
8491
8492 if (!parseId(Str))
8493 return ParseStatus::NoMatch;
8494
8495 int Slot = StringSwitch<int>(Str)
8496 .Case("p10", 0)
8497 .Case("p20", 1)
8498 .Case("p0", 2)
8499 .Default(-1);
8500
8501 if (Slot == -1)
8502 return Error(S, "invalid interpolation slot");
8503
8504 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8505 AMDGPUOperand::ImmTyInterpSlot));
8506 return ParseStatus::Success;
8507}
8508
8509ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8510 StringRef Str;
8511 SMLoc S = getLoc();
8512
8513 if (!parseId(Str))
8514 return ParseStatus::NoMatch;
8515
8516 if (!Str.starts_with("attr"))
8517 return Error(S, "invalid interpolation attribute");
8518
8519 StringRef Chan = Str.take_back(2);
8520 int AttrChan = StringSwitch<int>(Chan)
8521 .Case(".x", 0)
8522 .Case(".y", 1)
8523 .Case(".z", 2)
8524 .Case(".w", 3)
8525 .Default(-1);
8526 if (AttrChan == -1)
8527 return Error(S, "invalid or missing interpolation attribute channel");
8528
8529 Str = Str.drop_back(2).drop_front(4);
8530
8531 uint8_t Attr;
8532 if (Str.getAsInteger(10, Attr))
8533 return Error(S, "invalid or missing interpolation attribute number");
8534
8535 if (Attr > 32)
8536 return Error(S, "out of bounds interpolation attribute number");
8537
8538 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8539
8540 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8541 AMDGPUOperand::ImmTyInterpAttr));
8542 Operands.push_back(AMDGPUOperand::CreateImm(
8543 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8544 return ParseStatus::Success;
8545}
8546
8547//===----------------------------------------------------------------------===//
8548// exp
8549//===----------------------------------------------------------------------===//
8550
8551ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8552 using namespace llvm::AMDGPU::Exp;
8553
8554 StringRef Str;
8555 SMLoc S = getLoc();
8556
8557 if (!parseId(Str))
8558 return ParseStatus::NoMatch;
8559
8560 unsigned Id = getTgtId(Str);
8561 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8562 return Error(S, (Id == ET_INVALID)
8563 ? "invalid exp target"
8564 : "exp target is not supported on this GPU");
8565
8566 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8567 AMDGPUOperand::ImmTyExpTgt));
8568 return ParseStatus::Success;
8569}
8570
8571//===----------------------------------------------------------------------===//
8572// parser helpers
8573//===----------------------------------------------------------------------===//
8574
8575bool
8576AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8577 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8578}
8579
8580bool
8581AMDGPUAsmParser::isId(const StringRef Id) const {
8582 return isId(getToken(), Id);
8583}
8584
8585bool
8586AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8587 return getTokenKind() == Kind;
8588}
8589
8590StringRef AMDGPUAsmParser::getId() const {
8591 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8592}
8593
8594bool
8595AMDGPUAsmParser::trySkipId(const StringRef Id) {
8596 if (isId(Id)) {
8597 lex();
8598 return true;
8599 }
8600 return false;
8601}
8602
8603bool
8604AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8605 if (isToken(AsmToken::Identifier)) {
8606 StringRef Tok = getTokenStr();
8607 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8608 lex();
8609 return true;
8610 }
8611 }
8612 return false;
8613}
8614
8615bool
8616AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8617 if (isId(Id) && peekToken().is(Kind)) {
8618 lex();
8619 lex();
8620 return true;
8621 }
8622 return false;
8623}
8624
8625bool
8626AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8627 if (isToken(Kind)) {
8628 lex();
8629 return true;
8630 }
8631 return false;
8632}
8633
8634bool
8635AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8636 const StringRef ErrMsg) {
8637 if (!trySkipToken(Kind)) {
8638 Error(getLoc(), ErrMsg);
8639 return false;
8640 }
8641 return true;
8642}
8643
8644bool
8645AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8646 SMLoc S = getLoc();
8647
8648 const MCExpr *Expr;
8649 if (Parser.parseExpression(Expr))
8650 return false;
8651
8652 if (Expr->evaluateAsAbsolute(Imm))
8653 return true;
8654
8655 if (Expected.empty()) {
8656 Error(S, "expected absolute expression");
8657 } else {
8658 Error(S, Twine("expected ", Expected) +
8659 Twine(" or an absolute expression"));
8660 }
8661 return false;
8662}
8663
8664bool
8665AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8666 SMLoc S = getLoc();
8667
8668 const MCExpr *Expr;
8669 if (Parser.parseExpression(Expr))
8670 return false;
8671
8672 int64_t IntVal;
8673 if (Expr->evaluateAsAbsolute(IntVal)) {
8674 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8675 } else {
8676 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8677 }
8678 return true;
8679}
8680
8681bool
8682AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8683 if (isToken(AsmToken::String)) {
8684 Val = getToken().getStringContents();
8685 lex();
8686 return true;
8687 }
8688 Error(getLoc(), ErrMsg);
8689 return false;
8690}
8691
8692bool
8693AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8694 if (isToken(AsmToken::Identifier)) {
8695 Val = getTokenStr();
8696 lex();
8697 return true;
8698 }
8699 if (!ErrMsg.empty())
8700 Error(getLoc(), ErrMsg);
8701 return false;
8702}
8703
8704AsmToken
8705AMDGPUAsmParser::getToken() const {
8706 return Parser.getTok();
8707}
8708
8709AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8710 return isToken(AsmToken::EndOfStatement)
8711 ? getToken()
8712 : getLexer().peekTok(ShouldSkipSpace);
8713}
8714
8715void
8716AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8717 auto TokCount = getLexer().peekTokens(Tokens);
8718
8719 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8720 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8721}
8722
8724AMDGPUAsmParser::getTokenKind() const {
8725 return getLexer().getKind();
8726}
8727
8728SMLoc
8729AMDGPUAsmParser::getLoc() const {
8730 return getToken().getLoc();
8731}
8732
8733StringRef
8734AMDGPUAsmParser::getTokenStr() const {
8735 return getToken().getString();
8736}
8737
8738void
8739AMDGPUAsmParser::lex() {
8740 Parser.Lex();
8741}
8742
8743const AMDGPUOperand &
8744AMDGPUAsmParser::findMCOperand(const OperandVector &Operands,
8745 int MCOpIdx) const {
8746 for (const auto &Op : Operands) {
8747 const AMDGPUOperand &TargetOp = static_cast<AMDGPUOperand &>(*Op);
8748 if (TargetOp.getMCOpIdx() == MCOpIdx)
8749 return TargetOp;
8750 }
8751 llvm_unreachable("no such MC operand!");
8752}
8753
8754SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8755 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8756}
8757
8758// Returns one of the given locations that comes later in the source.
8759SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8760 return a.getPointer() < b.getPointer() ? b : a;
8761}
8762
8763SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8764 int MCOpIdx) const {
8765 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8766}
8767
8768SMLoc
8769AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8770 const OperandVector &Operands) const {
8771 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8772 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8773 if (Test(Op))
8774 return Op.getStartLoc();
8775 }
8776 return getInstLoc(Operands);
8777}
8778
8779SMLoc
8780AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8781 const OperandVector &Operands) const {
8782 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8783 return getOperandLoc(Test, Operands);
8784}
8785
8786ParseStatus
8787AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8788 if (!trySkipToken(AsmToken::LCurly))
8789 return ParseStatus::NoMatch;
8790
8791 bool First = true;
8792 while (!trySkipToken(AsmToken::RCurly)) {
8793 if (!First &&
8794 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8795 return ParseStatus::Failure;
8796
8797 StringRef Id = getTokenStr();
8798 SMLoc IdLoc = getLoc();
8799 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8800 !skipToken(AsmToken::Colon, "colon expected"))
8801 return ParseStatus::Failure;
8802
8803 const auto *I =
8804 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8805 if (I == Fields.end())
8806 return Error(IdLoc, "unknown field");
8807 if ((*I)->IsDefined)
8808 return Error(IdLoc, "duplicate field");
8809
8810 // TODO: Support symbolic values.
8811 (*I)->Loc = getLoc();
8812 if (!parseExpr((*I)->Val))
8813 return ParseStatus::Failure;
8814 (*I)->IsDefined = true;
8815
8816 First = false;
8817 }
8818 return ParseStatus::Success;
8819}
8820
8821bool AMDGPUAsmParser::validateStructuredOpFields(
8823 return all_of(Fields, [this](const StructuredOpField *F) {
8824 return F->validate(*this);
8825 });
8826}
8827
8828//===----------------------------------------------------------------------===//
8829// swizzle
8830//===----------------------------------------------------------------------===//
8831
8833static unsigned
8834encodeBitmaskPerm(const unsigned AndMask,
8835 const unsigned OrMask,
8836 const unsigned XorMask) {
8837 using namespace llvm::AMDGPU::Swizzle;
8838
8839 return BITMASK_PERM_ENC |
8840 (AndMask << BITMASK_AND_SHIFT) |
8841 (OrMask << BITMASK_OR_SHIFT) |
8842 (XorMask << BITMASK_XOR_SHIFT);
8843}
8844
8845bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8846 const unsigned MaxVal,
8847 const Twine &ErrMsg, SMLoc &Loc) {
8848 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8849 return false;
8850 }
8851 Loc = getLoc();
8852 if (!parseExpr(Op)) {
8853 return false;
8854 }
8855 if (Op < MinVal || Op > MaxVal) {
8856 Error(Loc, ErrMsg);
8857 return false;
8858 }
8859
8860 return true;
8861}
8862
8863bool
8864AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8865 const unsigned MinVal,
8866 const unsigned MaxVal,
8867 const StringRef ErrMsg) {
8868 SMLoc Loc;
8869 for (unsigned i = 0; i < OpNum; ++i) {
8870 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8871 return false;
8872 }
8873
8874 return true;
8875}
8876
8877bool
8878AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8879 using namespace llvm::AMDGPU::Swizzle;
8880
8881 int64_t Lane[LANE_NUM];
8882 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8883 "expected a 2-bit lane id")) {
8885 for (unsigned I = 0; I < LANE_NUM; ++I) {
8886 Imm |= Lane[I] << (LANE_SHIFT * I);
8887 }
8888 return true;
8889 }
8890 return false;
8891}
8892
8893bool
8894AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8895 using namespace llvm::AMDGPU::Swizzle;
8896
8897 SMLoc Loc;
8898 int64_t GroupSize;
8899 int64_t LaneIdx;
8900
8901 if (!parseSwizzleOperand(GroupSize,
8902 2, 32,
8903 "group size must be in the interval [2,32]",
8904 Loc)) {
8905 return false;
8906 }
8907 if (!isPowerOf2_64(GroupSize)) {
8908 Error(Loc, "group size must be a power of two");
8909 return false;
8910 }
8911 if (parseSwizzleOperand(LaneIdx,
8912 0, GroupSize - 1,
8913 "lane id must be in the interval [0,group size - 1]",
8914 Loc)) {
8915 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8916 return true;
8917 }
8918 return false;
8919}
8920
8921bool
8922AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8923 using namespace llvm::AMDGPU::Swizzle;
8924
8925 SMLoc Loc;
8926 int64_t GroupSize;
8927
8928 if (!parseSwizzleOperand(GroupSize,
8929 2, 32,
8930 "group size must be in the interval [2,32]",
8931 Loc)) {
8932 return false;
8933 }
8934 if (!isPowerOf2_64(GroupSize)) {
8935 Error(Loc, "group size must be a power of two");
8936 return false;
8937 }
8938
8939 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8940 return true;
8941}
8942
8943bool
8944AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8945 using namespace llvm::AMDGPU::Swizzle;
8946
8947 SMLoc Loc;
8948 int64_t GroupSize;
8949
8950 if (!parseSwizzleOperand(GroupSize,
8951 1, 16,
8952 "group size must be in the interval [1,16]",
8953 Loc)) {
8954 return false;
8955 }
8956 if (!isPowerOf2_64(GroupSize)) {
8957 Error(Loc, "group size must be a power of two");
8958 return false;
8959 }
8960
8961 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8962 return true;
8963}
8964
8965bool
8966AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8967 using namespace llvm::AMDGPU::Swizzle;
8968
8969 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8970 return false;
8971 }
8972
8973 StringRef Ctl;
8974 SMLoc StrLoc = getLoc();
8975 if (!parseString(Ctl)) {
8976 return false;
8977 }
8978 if (Ctl.size() != BITMASK_WIDTH) {
8979 Error(StrLoc, "expected a 5-character mask");
8980 return false;
8981 }
8982
8983 unsigned AndMask = 0;
8984 unsigned OrMask = 0;
8985 unsigned XorMask = 0;
8986
8987 for (size_t i = 0; i < Ctl.size(); ++i) {
8988 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8989 switch(Ctl[i]) {
8990 default:
8991 Error(StrLoc, "invalid mask");
8992 return false;
8993 case '0':
8994 break;
8995 case '1':
8996 OrMask |= Mask;
8997 break;
8998 case 'p':
8999 AndMask |= Mask;
9000 break;
9001 case 'i':
9002 AndMask |= Mask;
9003 XorMask |= Mask;
9004 break;
9005 }
9006 }
9007
9008 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
9009 return true;
9010}
9011
9012bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
9013 using namespace llvm::AMDGPU::Swizzle;
9014
9015 if (!AMDGPU::isGFX9Plus(getSTI())) {
9016 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
9017 return false;
9018 }
9019
9020 int64_t Swizzle;
9021 SMLoc Loc;
9022 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
9023 "FFT swizzle must be in the interval [0," +
9024 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
9025 Loc))
9026 return false;
9027
9028 Imm = FFT_MODE_ENC | Swizzle;
9029 return true;
9030}
9031
9032bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
9033 using namespace llvm::AMDGPU::Swizzle;
9034
9035 if (!AMDGPU::isGFX9Plus(getSTI())) {
9036 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
9037 return false;
9038 }
9039
9040 SMLoc Loc;
9041 int64_t Direction;
9042
9043 if (!parseSwizzleOperand(Direction, 0, 1,
9044 "direction must be 0 (left) or 1 (right)", Loc))
9045 return false;
9046
9047 int64_t RotateSize;
9048 if (!parseSwizzleOperand(
9049 RotateSize, 0, ROTATE_MAX_SIZE,
9050 "number of threads to rotate must be in the interval [0," +
9051 Twine(ROTATE_MAX_SIZE) + Twine(']'),
9052 Loc))
9053 return false;
9054
9056 (RotateSize << ROTATE_SIZE_SHIFT);
9057 return true;
9058}
9059
9060bool
9061AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
9062
9063 SMLoc OffsetLoc = getLoc();
9064
9065 if (!parseExpr(Imm, "a swizzle macro")) {
9066 return false;
9067 }
9068 if (!isUInt<16>(Imm)) {
9069 Error(OffsetLoc, "expected a 16-bit offset");
9070 return false;
9071 }
9072 return true;
9073}
9074
9075bool
9076AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
9077 using namespace llvm::AMDGPU::Swizzle;
9078
9079 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
9080
9081 SMLoc ModeLoc = getLoc();
9082 bool Ok = false;
9083
9084 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9085 Ok = parseSwizzleQuadPerm(Imm);
9086 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9087 Ok = parseSwizzleBitmaskPerm(Imm);
9088 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9089 Ok = parseSwizzleBroadcast(Imm);
9090 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
9091 Ok = parseSwizzleSwap(Imm);
9092 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9093 Ok = parseSwizzleReverse(Imm);
9094 } else if (trySkipId(IdSymbolic[ID_FFT])) {
9095 Ok = parseSwizzleFFT(Imm);
9096 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9097 Ok = parseSwizzleRotate(Imm);
9098 } else {
9099 Error(ModeLoc, "expected a swizzle mode");
9100 }
9101
9102 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
9103 }
9104
9105 return false;
9106}
9107
9108ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
9109 SMLoc S = getLoc();
9110 int64_t Imm = 0;
9111
9112 if (trySkipId("offset")) {
9113
9114 bool Ok = false;
9115 if (skipToken(AsmToken::Colon, "expected a colon")) {
9116 if (trySkipId("swizzle")) {
9117 Ok = parseSwizzleMacro(Imm);
9118 } else {
9119 Ok = parseSwizzleOffset(Imm);
9120 }
9121 }
9122
9123 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
9124
9126 }
9127 return ParseStatus::NoMatch;
9128}
9129
9130bool
9131AMDGPUOperand::isSwizzle() const {
9132 return isImmTy(ImmTySwizzle);
9133}
9134
9135//===----------------------------------------------------------------------===//
9136// VGPR Index Mode
9137//===----------------------------------------------------------------------===//
9138
9139int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9140
9141 using namespace llvm::AMDGPU::VGPRIndexMode;
9142
9143 if (trySkipToken(AsmToken::RParen)) {
9144 return OFF;
9145 }
9146
9147 int64_t Imm = 0;
9148
9149 while (true) {
9150 unsigned Mode = 0;
9151 SMLoc S = getLoc();
9152
9153 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
9154 if (trySkipId(IdSymbolic[ModeId])) {
9155 Mode = 1 << ModeId;
9156 break;
9157 }
9158 }
9159
9160 if (Mode == 0) {
9161 Error(S, (Imm == 0)?
9162 "expected a VGPR index mode or a closing parenthesis" :
9163 "expected a VGPR index mode");
9164 return UNDEF;
9165 }
9166
9167 if (Imm & Mode) {
9168 Error(S, "duplicate VGPR index mode");
9169 return UNDEF;
9170 }
9171 Imm |= Mode;
9172
9173 if (trySkipToken(AsmToken::RParen))
9174 break;
9175 if (!skipToken(AsmToken::Comma,
9176 "expected a comma or a closing parenthesis"))
9177 return UNDEF;
9178 }
9179
9180 return Imm;
9181}
9182
9183ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9184
9185 using namespace llvm::AMDGPU::VGPRIndexMode;
9186
9187 int64_t Imm = 0;
9188 SMLoc S = getLoc();
9189
9190 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9191 Imm = parseGPRIdxMacro();
9192 if (Imm == UNDEF)
9193 return ParseStatus::Failure;
9194 } else {
9195 if (getParser().parseAbsoluteExpression(Imm))
9196 return ParseStatus::Failure;
9197 if (Imm < 0 || !isUInt<4>(Imm))
9198 return Error(S, "invalid immediate: only 4-bit values are legal");
9199 }
9200
9201 Operands.push_back(
9202 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9203 return ParseStatus::Success;
9204}
9205
9206bool AMDGPUOperand::isGPRIdxMode() const {
9207 return isImmTy(ImmTyGprIdxMode);
9208}
9209
9210//===----------------------------------------------------------------------===//
9211// sopp branch targets
9212//===----------------------------------------------------------------------===//
9213
9214ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9215
9216 // Make sure we are not parsing something
9217 // that looks like a label or an expression but is not.
9218 // This will improve error messages.
9219 if (isRegister() || isModifier())
9220 return ParseStatus::NoMatch;
9221
9222 if (!parseExpr(Operands))
9223 return ParseStatus::Failure;
9224
9225 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9226 assert(Opr.isImm() || Opr.isExpr());
9227 SMLoc Loc = Opr.getStartLoc();
9228
9229 // Currently we do not support arbitrary expressions as branch targets.
9230 // Only labels and absolute expressions are accepted.
9231 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9232 Error(Loc, "expected an absolute expression or a label");
9233 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9234 Error(Loc, "expected a 16-bit signed jump offset");
9235 }
9236
9237 return ParseStatus::Success;
9238}
9239
9240//===----------------------------------------------------------------------===//
9241// Boolean holding registers
9242//===----------------------------------------------------------------------===//
9243
9244ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9245 return parseReg(Operands);
9246}
9247
9248//===----------------------------------------------------------------------===//
9249// mubuf
9250//===----------------------------------------------------------------------===//
9251
9252void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9253 const OperandVector &Operands,
9254 bool IsAtomic) {
9255 OptionalImmIndexMap OptionalIdx;
9256 unsigned FirstOperandIdx = 1;
9257 bool IsAtomicReturn = false;
9258
9259 if (IsAtomic) {
9260 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9262 }
9263
9264 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9265 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9266
9267 // Add the register arguments
9268 if (Op.isReg()) {
9269 Op.addRegOperands(Inst, 1);
9270 // Insert a tied src for atomic return dst.
9271 // This cannot be postponed as subsequent calls to
9272 // addImmOperands rely on correct number of MC operands.
9273 if (IsAtomicReturn && i == FirstOperandIdx)
9274 Op.addRegOperands(Inst, 1);
9275 continue;
9276 }
9277
9278 // Handle the case where soffset is an immediate
9279 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9280 Op.addImmOperands(Inst, 1);
9281 continue;
9282 }
9283
9284 // Handle tokens like 'offen' which are sometimes hard-coded into the
9285 // asm string. There are no MCInst operands for these.
9286 if (Op.isToken()) {
9287 continue;
9288 }
9289 assert(Op.isImm());
9290
9291 // Handle optional arguments
9292 OptionalIdx[Op.getImmTy()] = i;
9293 }
9294
9295 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9296 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9297 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9298 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9300}
9301
9302//===----------------------------------------------------------------------===//
9303// smrd
9304//===----------------------------------------------------------------------===//
9305
9306bool AMDGPUOperand::isSMRDOffset8() const {
9307 return isImmLiteral() && isUInt<8>(getImm());
9308}
9309
9310bool AMDGPUOperand::isSMEMOffset() const {
9311 // Offset range is checked later by validator.
9312 return isImmLiteral();
9313}
9314
9315bool AMDGPUOperand::isSMRDLiteralOffset() const {
9316 // 32-bit literals are only supported on CI and we only want to use them
9317 // when the offset is > 8-bits.
9318 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9319}
9320
9321//===----------------------------------------------------------------------===//
9322// vop3
9323//===----------------------------------------------------------------------===//
9324
9325static bool ConvertOmodMul(int64_t &Mul) {
9326 if (Mul != 1 && Mul != 2 && Mul != 4)
9327 return false;
9328
9329 Mul >>= 1;
9330 return true;
9331}
9332
9333static bool ConvertOmodDiv(int64_t &Div) {
9334 if (Div == 1) {
9335 Div = 0;
9336 return true;
9337 }
9338
9339 if (Div == 2) {
9340 Div = 3;
9341 return true;
9342 }
9343
9344 return false;
9345}
9346
9347// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9348// This is intentional and ensures compatibility with sp3.
9349// See bug 35397 for details.
9350bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9351 if (BoundCtrl == 0 || BoundCtrl == 1) {
9352 if (!isGFX11Plus())
9353 BoundCtrl = 1;
9354 return true;
9355 }
9356 return false;
9357}
9358
9359void AMDGPUAsmParser::onBeginOfFile() {
9360 if (!getParser().getStreamer().getTargetStreamer() ||
9361 getSTI().getTargetTriple().getArch() == Triple::r600)
9362 return;
9363
9364 if (!getTargetStreamer().getTargetID())
9365 getTargetStreamer().initializeTargetID(getSTI(),
9366 getSTI().getFeatureString());
9367
9368 if (isHsaAbi(getSTI()))
9369 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9370}
9371
9372/// Parse AMDGPU specific expressions.
9373///
9374/// expr ::= or(expr, ...) |
9375/// max(expr, ...)
9376///
9377bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9378 using AGVK = AMDGPUMCExpr::VariantKind;
9379
9380 if (isToken(AsmToken::Identifier)) {
9381 StringRef TokenId = getTokenStr();
9382 AGVK VK = StringSwitch<AGVK>(TokenId)
9383 .Case("max", AGVK::AGVK_Max)
9384 .Case("or", AGVK::AGVK_Or)
9385 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9386 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9387 .Case("alignto", AGVK::AGVK_AlignTo)
9388 .Case("occupancy", AGVK::AGVK_Occupancy)
9389 .Case("instprefsize", AGVK::AGVK_InstPrefSize)
9390 .Default(AGVK::AGVK_None);
9391
9392 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9394 uint64_t CommaCount = 0;
9395 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9396 lex(); // Eat '('
9397 while (true) {
9398 if (trySkipToken(AsmToken::RParen)) {
9399 if (Exprs.empty()) {
9400 Error(getToken().getLoc(),
9401 "empty " + Twine(TokenId) + " expression");
9402 return true;
9403 }
9404 if (CommaCount + 1 != Exprs.size()) {
9405 Error(getToken().getLoc(),
9406 "mismatch of commas in " + Twine(TokenId) + " expression");
9407 return true;
9408 }
9409 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9410 return false;
9411 }
9412 const MCExpr *Expr;
9413 if (getParser().parseExpression(Expr, EndLoc))
9414 return true;
9415 Exprs.push_back(Expr);
9416 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9417 if (LastTokenWasComma)
9418 CommaCount++;
9419 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9420 Error(getToken().getLoc(),
9421 "unexpected token in " + Twine(TokenId) + " expression");
9422 return true;
9423 }
9424 }
9425 }
9426 }
9427 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9428}
9429
9430ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9431 StringRef Name = getTokenStr();
9432 if (Name == "mul") {
9433 return parseIntWithPrefix("mul", Operands,
9434 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9435 }
9436
9437 if (Name == "div") {
9438 return parseIntWithPrefix("div", Operands,
9439 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9440 }
9441
9442 return ParseStatus::NoMatch;
9443}
9444
9445// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9446// the number of src operands present, then copies that bit into src0_modifiers.
9447static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9448 int Opc = Inst.getOpcode();
9449 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9450 if (OpSelIdx == -1)
9451 return;
9452
9453 int SrcNum;
9454 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9455 AMDGPU::OpName::src2};
9456 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9457 ++SrcNum)
9458 ;
9459 assert(SrcNum > 0);
9460
9461 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9462
9463 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9464 if (DstIdx == -1)
9465 return;
9466
9467 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9468 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9469 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9470 if (DstOp.isReg() &&
9471 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9472 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9473 ModVal |= SISrcMods::DST_OP_SEL;
9474 } else {
9475 if ((OpSel & (1 << SrcNum)) != 0)
9476 ModVal |= SISrcMods::DST_OP_SEL;
9477 }
9478 Inst.getOperand(ModIdx).setImm(ModVal);
9479}
9480
9481void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9482 const OperandVector &Operands) {
9483 cvtVOP3P(Inst, Operands);
9484 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9485}
9486
9487void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9488 OptionalImmIndexMap &OptionalIdx) {
9489 cvtVOP3P(Inst, Operands, OptionalIdx);
9490 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9491}
9492
9493static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9494 return
9495 // 1. This operand is input modifiers
9496 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9497 // 2. This is not last operand
9498 && Desc.NumOperands > (OpNum + 1)
9499 // 3. Next operand is register class
9500 && Desc.operands()[OpNum + 1].RegClass != -1
9501 // 4. Next register is not tied to any other operand
9502 && Desc.getOperandConstraint(OpNum + 1,
9504}
9505
9506void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9507 unsigned Opc = Inst.getOpcode();
9508 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9509 AMDGPU::OpName::src2};
9510 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9511 AMDGPU::OpName::src1_modifiers,
9512 AMDGPU::OpName::src2_modifiers};
9513 for (int J = 0; J < 3; ++J) {
9514 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9515 if (OpIdx == -1)
9516 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9517 // no src1. So continue instead of break.
9518 continue;
9519
9520 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9521 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9522
9523 if ((OpSel & (1 << J)) != 0)
9524 ModVal |= SISrcMods::OP_SEL_0;
9525 // op_sel[3] is encoded in src0_modifiers.
9526 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9527 ModVal |= SISrcMods::DST_OP_SEL;
9528
9529 Inst.getOperand(ModIdx).setImm(ModVal);
9530 }
9531}
9532
9533void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9534{
9535 OptionalImmIndexMap OptionalIdx;
9536 unsigned Opc = Inst.getOpcode();
9537
9538 unsigned I = 1;
9539 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9540 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9541 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9542 }
9543
9544 for (unsigned E = Operands.size(); I != E; ++I) {
9545 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9547 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9548 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9549 Op.isInterpAttrChan()) {
9550 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9551 } else if (Op.isImmModifier()) {
9552 OptionalIdx[Op.getImmTy()] = I;
9553 } else {
9554 llvm_unreachable("unhandled operand type");
9555 }
9556 }
9557
9558 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9559 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9560 AMDGPUOperand::ImmTyHigh);
9561
9562 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9563 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9564 AMDGPUOperand::ImmTyClamp);
9565
9566 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9567 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9568 AMDGPUOperand::ImmTyOModSI);
9569
9570 // Some v_interp instructions use op_sel[3] for dst.
9571 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9572 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9573 AMDGPUOperand::ImmTyOpSel);
9574 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9575 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9576
9577 cvtOpSelHelper(Inst, OpSel);
9578 }
9579}
9580
9581void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9582{
9583 OptionalImmIndexMap OptionalIdx;
9584 unsigned Opc = Inst.getOpcode();
9585
9586 unsigned I = 1;
9587 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9588 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9589 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9590 }
9591
9592 for (unsigned E = Operands.size(); I != E; ++I) {
9593 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9595 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9596 } else if (Op.isImmModifier()) {
9597 OptionalIdx[Op.getImmTy()] = I;
9598 } else {
9599 llvm_unreachable("unhandled operand type");
9600 }
9601 }
9602
9603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9604
9605 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9606 if (OpSelIdx != -1)
9607 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9608
9609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9610
9611 if (OpSelIdx == -1)
9612 return;
9613
9614 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9615 cvtOpSelHelper(Inst, OpSel);
9616}
9617
9618void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9619 const OperandVector &Operands) {
9620 OptionalImmIndexMap OptionalIdx;
9621 unsigned Opc = Inst.getOpcode();
9622 unsigned I = 1;
9623 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9624
9625 const MCInstrDesc &Desc = MII.get(Opc);
9626
9627 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9628 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9629
9630 for (unsigned E = Operands.size(); I != E; ++I) {
9631 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9632 int NumOperands = Inst.getNumOperands();
9633 // The order of operands in MCInst and parsed operands are different.
9634 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9635 // indices for parsing scale values correctly.
9636 if (NumOperands == CbszOpIdx) {
9639 }
9640 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9641 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9642 } else if (Op.isImmModifier()) {
9643 OptionalIdx[Op.getImmTy()] = I;
9644 } else {
9645 Op.addRegOrImmOperands(Inst, 1);
9646 }
9647 }
9648
9649 // Insert CBSZ and BLGP operands for F8F6F4 variants
9650 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9651 if (CbszIdx != OptionalIdx.end()) {
9652 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9653 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9654 }
9655
9656 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9657 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9658 if (BlgpIdx != OptionalIdx.end()) {
9659 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9660 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9661 }
9662
9663 // Add dummy src_modifiers
9666
9667 // Handle op_sel fields
9668
9669 unsigned OpSel = 0;
9670 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9671 if (OpselIdx != OptionalIdx.end()) {
9672 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9673 .getImm();
9674 }
9675
9676 unsigned OpSelHi = 0;
9677 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9678 if (OpselHiIdx != OptionalIdx.end()) {
9679 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9680 .getImm();
9681 }
9682 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9683 AMDGPU::OpName::src1_modifiers};
9684
9685 for (unsigned J = 0; J < 2; ++J) {
9686 unsigned ModVal = 0;
9687 if (OpSel & (1 << J))
9688 ModVal |= SISrcMods::OP_SEL_0;
9689 if (OpSelHi & (1 << J))
9690 ModVal |= SISrcMods::OP_SEL_1;
9691
9692 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9693 Inst.getOperand(ModIdx).setImm(ModVal);
9694 }
9695}
9696
9697void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9698 OptionalImmIndexMap &OptionalIdx) {
9699 unsigned Opc = Inst.getOpcode();
9700
9701 unsigned I = 1;
9702 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9703 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9704 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9705 }
9706
9707 for (unsigned E = Operands.size(); I != E; ++I) {
9708 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9710 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9711 } else if (Op.isImmModifier()) {
9712 OptionalIdx[Op.getImmTy()] = I;
9713 } else {
9714 Op.addRegOrImmOperands(Inst, 1);
9715 }
9716 }
9717
9718 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9719 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9720 AMDGPUOperand::ImmTyScaleSel);
9721
9722 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9723 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9724 AMDGPUOperand::ImmTyClamp);
9725
9726 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9727 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9728 Inst.addOperand(Inst.getOperand(0));
9729 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9730 AMDGPUOperand::ImmTyByteSel);
9731 }
9732
9733 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9734 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9735 AMDGPUOperand::ImmTyOModSI);
9736
9737 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9738 // it has src2 register operand that is tied to dst operand
9739 // we don't allow modifiers for this operand in assembler so src2_modifiers
9740 // should be 0.
9741 if (isMAC(Opc)) {
9742 auto *it = Inst.begin();
9743 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9744 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9745 ++it;
9746 // Copy the operand to ensure it's not invalidated when Inst grows.
9747 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9748 }
9749}
9750
9751void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9752 OptionalImmIndexMap OptionalIdx;
9753 cvtVOP3(Inst, Operands, OptionalIdx);
9754}
9755
9756void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9757 OptionalImmIndexMap &OptIdx) {
9758 const int Opc = Inst.getOpcode();
9759 const MCInstrDesc &Desc = MII.get(Opc);
9760
9761 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9762
9763 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9764 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9765 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9766 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9767 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9768 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9769 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9770 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9771 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9772 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9773 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9774 Inst.addOperand(Inst.getOperand(0));
9775 }
9776
9777 // Append vdst_in only if a previous converter (cvtVOP3DPP for DPP variants,
9778 // cvtVOP3 for byte_sel variants) hasn't already placed it. Use the position
9779 // of the named operand to detect that, the same way cvtVOP3DPP does
9780 // internally.
9781 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9782 if (VdstInIdx != -1 && VdstInIdx == static_cast<int>(Inst.getNumOperands()))
9783 Inst.addOperand(Inst.getOperand(0));
9784
9785 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9786 if (BitOp3Idx != -1) {
9787 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9788 }
9789
9790 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9791 // instruction, and then figure out where to actually put the modifiers
9792
9793 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9794 if (OpSelIdx != -1) {
9795 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9796 }
9797
9798 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9799 if (OpSelHiIdx != -1) {
9800 int DefaultVal = IsPacked ? -1 : 0;
9801 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9802 DefaultVal);
9803 }
9804
9805 int MatrixAFMTIdx =
9806 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9807 if (MatrixAFMTIdx != -1) {
9808 addOptionalImmOperand(Inst, Operands, OptIdx,
9809 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9810 }
9811
9812 int MatrixBFMTIdx =
9813 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9814 if (MatrixBFMTIdx != -1) {
9815 addOptionalImmOperand(Inst, Operands, OptIdx,
9816 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9817 }
9818
9819 int MatrixAScaleIdx =
9820 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9821 if (MatrixAScaleIdx != -1) {
9822 addOptionalImmOperand(Inst, Operands, OptIdx,
9823 AMDGPUOperand::ImmTyMatrixAScale, 0);
9824 }
9825
9826 int MatrixBScaleIdx =
9827 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9828 if (MatrixBScaleIdx != -1) {
9829 addOptionalImmOperand(Inst, Operands, OptIdx,
9830 AMDGPUOperand::ImmTyMatrixBScale, 0);
9831 }
9832
9833 int MatrixAScaleFmtIdx =
9834 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9835 if (MatrixAScaleFmtIdx != -1) {
9836 addOptionalImmOperand(Inst, Operands, OptIdx,
9837 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9838 }
9839
9840 int MatrixBScaleFmtIdx =
9841 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9842 if (MatrixBScaleFmtIdx != -1) {
9843 addOptionalImmOperand(Inst, Operands, OptIdx,
9844 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9845 }
9846
9847 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9848 addOptionalImmOperand(Inst, Operands, OptIdx,
9849 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9850
9851 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9852 addOptionalImmOperand(Inst, Operands, OptIdx,
9853 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9854
9855 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9856 if (NegLoIdx != -1)
9857 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9858
9859 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9860 if (NegHiIdx != -1)
9861 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9862
9863 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9864 AMDGPU::OpName::src2};
9865 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9866 AMDGPU::OpName::src1_modifiers,
9867 AMDGPU::OpName::src2_modifiers};
9868
9869 unsigned OpSel = 0;
9870 unsigned OpSelHi = 0;
9871 unsigned NegLo = 0;
9872 unsigned NegHi = 0;
9873
9874 if (OpSelIdx != -1)
9875 OpSel = Inst.getOperand(OpSelIdx).getImm();
9876
9877 if (OpSelHiIdx != -1)
9878 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9879
9880 if (NegLoIdx != -1)
9881 NegLo = Inst.getOperand(NegLoIdx).getImm();
9882
9883 if (NegHiIdx != -1)
9884 NegHi = Inst.getOperand(NegHiIdx).getImm();
9885
9886 for (int J = 0; J < 3; ++J) {
9887 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9888 if (OpIdx == -1)
9889 break;
9890
9891 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9892
9893 if (ModIdx == -1)
9894 continue;
9895
9896 uint32_t ModVal = 0;
9897
9898 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9899 if (SrcOp.isReg() && getMRI()
9900 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9901 .contains(SrcOp.getReg())) {
9902 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9903 if (VGPRSuffixIsHi)
9904 ModVal |= SISrcMods::OP_SEL_0;
9905 } else {
9906 if ((OpSel & (1 << J)) != 0)
9907 ModVal |= SISrcMods::OP_SEL_0;
9908 }
9909
9910 if ((OpSelHi & (1 << J)) != 0)
9911 ModVal |= SISrcMods::OP_SEL_1;
9912
9913 if ((NegLo & (1 << J)) != 0)
9914 ModVal |= SISrcMods::NEG;
9915
9916 if ((NegHi & (1 << J)) != 0)
9917 ModVal |= SISrcMods::NEG_HI;
9918
9919 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9920 }
9921}
9922
9923void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9924 OptionalImmIndexMap OptIdx;
9925 cvtVOP3(Inst, Operands, OptIdx);
9926 cvtVOP3P(Inst, Operands, OptIdx);
9927}
9928
9930 unsigned i, unsigned Opc,
9931 AMDGPU::OpName OpName) {
9932 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9933 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9934 else
9935 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9936}
9937
9938void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9939 unsigned Opc = Inst.getOpcode();
9940
9941 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9942 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9943 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9944 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9945 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9946
9947 OptionalImmIndexMap OptIdx;
9948 for (unsigned i = 5; i < Operands.size(); ++i) {
9949 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9950 OptIdx[Op.getImmTy()] = i;
9951 }
9952
9953 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9954 addOptionalImmOperand(Inst, Operands, OptIdx,
9955 AMDGPUOperand::ImmTyIndexKey8bit);
9956
9957 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9958 addOptionalImmOperand(Inst, Operands, OptIdx,
9959 AMDGPUOperand::ImmTyIndexKey16bit);
9960
9961 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9962 addOptionalImmOperand(Inst, Operands, OptIdx,
9963 AMDGPUOperand::ImmTyIndexKey32bit);
9964
9965 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9966 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9967
9968 cvtVOP3P(Inst, Operands, OptIdx);
9969}
9970
9971//===----------------------------------------------------------------------===//
9972// VOPD
9973//===----------------------------------------------------------------------===//
9974
9975ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9976 if (!hasVOPD(getSTI()))
9977 return ParseStatus::NoMatch;
9978
9979 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9980 SMLoc S = getLoc();
9981 lex();
9982 lex();
9983 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9984 SMLoc OpYLoc = getLoc();
9985 StringRef OpYName;
9986 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9987 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9988 return ParseStatus::Success;
9989 }
9990 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9991 }
9992 return ParseStatus::NoMatch;
9993}
9994
9995// Create VOPD MCInst operands using parsed assembler operands.
9996void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9997 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9998
9999 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
10000 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
10002 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10003 return;
10004 }
10005 if (Op.isReg()) {
10006 Op.addRegOperands(Inst, 1);
10007 return;
10008 }
10009 if (Op.isImm()) {
10010 Op.addImmOperands(Inst, 1);
10011 return;
10012 }
10013 llvm_unreachable("Unhandled operand type in cvtVOPD");
10014 };
10015
10016 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
10017
10018 // MCInst operands are ordered as follows:
10019 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
10020
10021 for (auto CompIdx : VOPD::COMPONENTS) {
10022 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
10023 }
10024
10025 for (auto CompIdx : VOPD::COMPONENTS) {
10026 const auto &CInfo = InstInfo[CompIdx];
10027 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10028 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10029 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10030 if (CInfo.hasSrc2Acc())
10031 addOp(CInfo.getIndexOfDstInParsedOperands());
10032 }
10033
10034 int BitOp3Idx =
10035 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
10036 if (BitOp3Idx != -1) {
10037 OptionalImmIndexMap OptIdx;
10038 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
10039 if (Op.isImm())
10040 OptIdx[Op.getImmTy()] = Operands.size() - 1;
10041
10042 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
10043 }
10044}
10045
10046//===----------------------------------------------------------------------===//
10047// dpp
10048//===----------------------------------------------------------------------===//
10049
10050bool AMDGPUOperand::isDPP8() const {
10051 return isImmTy(ImmTyDPP8);
10052}
10053
10054bool AMDGPUOperand::isDPPCtrl() const {
10055 using namespace AMDGPU::DPP;
10056
10057 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
10058 if (result) {
10059 int64_t Imm = getImm();
10060 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
10061 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
10062 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
10063 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
10064 (Imm == DppCtrl::WAVE_SHL1) ||
10065 (Imm == DppCtrl::WAVE_ROL1) ||
10066 (Imm == DppCtrl::WAVE_SHR1) ||
10067 (Imm == DppCtrl::WAVE_ROR1) ||
10068 (Imm == DppCtrl::ROW_MIRROR) ||
10069 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
10070 (Imm == DppCtrl::BCAST15) ||
10071 (Imm == DppCtrl::BCAST31) ||
10072 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
10073 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
10074 }
10075 return false;
10076}
10077
10078//===----------------------------------------------------------------------===//
10079// mAI
10080//===----------------------------------------------------------------------===//
10081
10082bool AMDGPUOperand::isBLGP() const {
10083 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
10084}
10085
10086bool AMDGPUOperand::isS16Imm() const {
10087 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
10088}
10089
10090bool AMDGPUOperand::isU16Imm() const {
10091 return isImmLiteral() && isUInt<16>(getImm());
10092}
10093
10094//===----------------------------------------------------------------------===//
10095// dim
10096//===----------------------------------------------------------------------===//
10097
10098bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
10099 // We want to allow "dim:1D" etc.,
10100 // but the initial 1 is tokenized as an integer.
10101 std::string Token;
10102 if (isToken(AsmToken::Integer)) {
10103 SMLoc Loc = getToken().getEndLoc();
10104 Token = std::string(getTokenStr());
10105 lex();
10106 if (getLoc() != Loc)
10107 return false;
10108 }
10109
10110 StringRef Suffix;
10111 if (!parseId(Suffix))
10112 return false;
10113 Token += Suffix;
10114
10115 StringRef DimId = Token;
10116 DimId.consume_front("SQ_RSRC_IMG_");
10117
10118 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
10119 if (!DimInfo)
10120 return false;
10121
10122 Encoding = DimInfo->Encoding;
10123 return true;
10124}
10125
10126ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
10127 if (!isGFX10Plus())
10128 return ParseStatus::NoMatch;
10129
10130 SMLoc S = getLoc();
10131
10132 if (!trySkipId("dim", AsmToken::Colon))
10133 return ParseStatus::NoMatch;
10134
10135 unsigned Encoding;
10136 SMLoc Loc = getLoc();
10137 if (!parseDimId(Encoding))
10138 return Error(Loc, "invalid dim value");
10139
10140 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
10141 AMDGPUOperand::ImmTyDim));
10142 return ParseStatus::Success;
10143}
10144
10145//===----------------------------------------------------------------------===//
10146// dpp
10147//===----------------------------------------------------------------------===//
10148
10149ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
10150 SMLoc S = getLoc();
10151
10152 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
10153 return ParseStatus::NoMatch;
10154
10155 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10156
10157 int64_t Sels[8];
10158
10159 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10160 return ParseStatus::Failure;
10161
10162 for (size_t i = 0; i < 8; ++i) {
10163 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10164 return ParseStatus::Failure;
10165
10166 SMLoc Loc = getLoc();
10167 if (getParser().parseAbsoluteExpression(Sels[i]))
10168 return ParseStatus::Failure;
10169 if (0 > Sels[i] || 7 < Sels[i])
10170 return Error(Loc, "expected a 3-bit value");
10171 }
10172
10173 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10174 return ParseStatus::Failure;
10175
10176 unsigned DPP8 = 0;
10177 for (size_t i = 0; i < 8; ++i)
10178 DPP8 |= (Sels[i] << (i * 3));
10179
10180 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10181 return ParseStatus::Success;
10182}
10183
10184bool
10185AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10186 const OperandVector &Operands) {
10187 if (Ctrl == "row_newbcast")
10188 return isGFX90A();
10189
10190 if (Ctrl == "row_share" ||
10191 Ctrl == "row_xmask")
10192 return isGFX10Plus();
10193
10194 if (Ctrl == "wave_shl" ||
10195 Ctrl == "wave_shr" ||
10196 Ctrl == "wave_rol" ||
10197 Ctrl == "wave_ror" ||
10198 Ctrl == "row_bcast")
10199 return isVI() || isGFX9();
10200
10201 return Ctrl == "row_mirror" ||
10202 Ctrl == "row_half_mirror" ||
10203 Ctrl == "quad_perm" ||
10204 Ctrl == "row_shl" ||
10205 Ctrl == "row_shr" ||
10206 Ctrl == "row_ror";
10207}
10208
10209int64_t
10210AMDGPUAsmParser::parseDPPCtrlPerm() {
10211 // quad_perm:[%d,%d,%d,%d]
10212
10213 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10214 return -1;
10215
10216 int64_t Val = 0;
10217 for (int i = 0; i < 4; ++i) {
10218 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10219 return -1;
10220
10221 int64_t Temp;
10222 SMLoc Loc = getLoc();
10223 if (getParser().parseAbsoluteExpression(Temp))
10224 return -1;
10225 if (Temp < 0 || Temp > 3) {
10226 Error(Loc, "expected a 2-bit value");
10227 return -1;
10228 }
10229
10230 Val += (Temp << i * 2);
10231 }
10232
10233 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10234 return -1;
10235
10236 return Val;
10237}
10238
10239int64_t
10240AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10241 using namespace AMDGPU::DPP;
10242
10243 // sel:%d
10244
10245 int64_t Val;
10246 SMLoc Loc = getLoc();
10247
10248 if (getParser().parseAbsoluteExpression(Val))
10249 return -1;
10250
10251 struct DppCtrlCheck {
10252 int64_t Ctrl;
10253 int Lo;
10254 int Hi;
10255 };
10256
10257 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10258 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10259 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10260 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10261 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10262 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10263 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10264 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10265 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10266 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10267 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10268 .Default({-1, 0, 0});
10269
10270 bool Valid;
10271 if (Check.Ctrl == -1) {
10272 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10273 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10274 } else {
10275 Valid = Check.Lo <= Val && Val <= Check.Hi;
10276 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10277 }
10278
10279 if (!Valid) {
10280 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10281 return -1;
10282 }
10283
10284 return Val;
10285}
10286
10287ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10288 using namespace AMDGPU::DPP;
10289
10290 if (!isToken(AsmToken::Identifier) ||
10291 !isSupportedDPPCtrl(getTokenStr(), Operands))
10292 return ParseStatus::NoMatch;
10293
10294 SMLoc S = getLoc();
10295 int64_t Val = -1;
10296 StringRef Ctrl;
10297
10298 parseId(Ctrl);
10299
10300 if (Ctrl == "row_mirror") {
10301 Val = DppCtrl::ROW_MIRROR;
10302 } else if (Ctrl == "row_half_mirror") {
10303 Val = DppCtrl::ROW_HALF_MIRROR;
10304 } else {
10305 if (skipToken(AsmToken::Colon, "expected a colon")) {
10306 if (Ctrl == "quad_perm") {
10307 Val = parseDPPCtrlPerm();
10308 } else {
10309 Val = parseDPPCtrlSel(Ctrl);
10310 }
10311 }
10312 }
10313
10314 if (Val == -1)
10315 return ParseStatus::Failure;
10316
10317 Operands.push_back(
10318 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10319 return ParseStatus::Success;
10320}
10321
10322void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10323 bool IsDPP8) {
10324 OptionalImmIndexMap OptionalIdx;
10325 unsigned Opc = Inst.getOpcode();
10326 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10327
10328 // MAC instructions are special because they have 'old'
10329 // operand which is not tied to dst (but assumed to be).
10330 // They also have dummy unused src2_modifiers.
10331 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10332 int Src2ModIdx =
10333 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10334 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10335 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10336
10337 unsigned I = 1;
10338 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10339 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10340 }
10341
10342 int Fi = 0;
10343 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10344 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10345 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10346 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10347 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10348 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10349 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10350 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10351 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10352
10353 for (unsigned E = Operands.size(); I != E; ++I) {
10354
10355 if (IsMAC) {
10356 int NumOperands = Inst.getNumOperands();
10357 if (OldIdx == NumOperands) {
10358 // Handle old operand
10359 constexpr int DST_IDX = 0;
10360 Inst.addOperand(Inst.getOperand(DST_IDX));
10361 } else if (Src2ModIdx == NumOperands) {
10362 // Add unused dummy src2_modifiers
10364 }
10365 }
10366
10367 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10368 Inst.addOperand(Inst.getOperand(0));
10369 }
10370
10371 if (IsVOP3CvtSrDpp) {
10372 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10374 Inst.addOperand(MCOperand::createReg(MCRegister()));
10375 }
10376 }
10377
10378 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10380 if (TiedTo != -1) {
10381 assert((unsigned)TiedTo < Inst.getNumOperands());
10382 // handle tied old or src2 for MAC instructions
10383 Inst.addOperand(Inst.getOperand(TiedTo));
10384 }
10385 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10386 // Add the register arguments
10387 if (IsDPP8 && Op.isDppFI()) {
10388 Fi = Op.getImm();
10389 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10390 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10391 } else if (Op.isReg()) {
10392 Op.addRegOperands(Inst, 1);
10393 } else if (Op.isImm() &&
10394 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10395 Op.addImmOperands(Inst, 1);
10396 } else if (Op.isImm()) {
10397 OptionalIdx[Op.getImmTy()] = I;
10398 } else {
10399 llvm_unreachable("unhandled operand type");
10400 }
10401 }
10402
10403 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10404 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10405 AMDGPUOperand::ImmTyClamp);
10406
10407 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10408 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10409 Inst.addOperand(Inst.getOperand(0));
10410 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10411 AMDGPUOperand::ImmTyByteSel);
10412 }
10413
10414 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10415 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10416
10417 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10418 cvtVOP3P(Inst, Operands, OptionalIdx);
10419 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10420 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10421 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10422 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10423 }
10424
10425 if (IsDPP8) {
10426 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10427 using namespace llvm::AMDGPU::DPP;
10428 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10429 } else {
10430 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10431 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10432 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10434
10435 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10436 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10437 AMDGPUOperand::ImmTyDppFI);
10438 }
10439}
10440
10441void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10442 OptionalImmIndexMap OptionalIdx;
10443
10444 unsigned I = 1;
10445 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10446 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10447 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10448 }
10449
10450 int Fi = 0;
10451 for (unsigned E = Operands.size(); I != E; ++I) {
10452 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10454 if (TiedTo != -1) {
10455 assert((unsigned)TiedTo < Inst.getNumOperands());
10456 // handle tied old or src2 for MAC instructions
10457 Inst.addOperand(Inst.getOperand(TiedTo));
10458 }
10459 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10460 // Add the register arguments
10461 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10462 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10463 // Skip it.
10464 continue;
10465 }
10466
10467 if (IsDPP8) {
10468 if (Op.isDPP8()) {
10469 Op.addImmOperands(Inst, 1);
10470 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10471 Op.addRegWithFPInputModsOperands(Inst, 2);
10472 } else if (Op.isDppFI()) {
10473 Fi = Op.getImm();
10474 } else if (Op.isReg()) {
10475 Op.addRegOperands(Inst, 1);
10476 } else {
10477 llvm_unreachable("Invalid operand type");
10478 }
10479 } else {
10481 Op.addRegWithFPInputModsOperands(Inst, 2);
10482 } else if (Op.isReg()) {
10483 Op.addRegOperands(Inst, 1);
10484 } else if (Op.isDPPCtrl()) {
10485 Op.addImmOperands(Inst, 1);
10486 } else if (Op.isImm()) {
10487 // Handle optional arguments
10488 OptionalIdx[Op.getImmTy()] = I;
10489 } else {
10490 llvm_unreachable("Invalid operand type");
10491 }
10492 }
10493 }
10494
10495 if (IsDPP8) {
10496 using namespace llvm::AMDGPU::DPP;
10497 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10498 } else {
10499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10500 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10502 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10503 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10504 AMDGPUOperand::ImmTyDppFI);
10505 }
10506 }
10507}
10508
10509//===----------------------------------------------------------------------===//
10510// sdwa
10511//===----------------------------------------------------------------------===//
10512
10513ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10514 StringRef Prefix,
10515 AMDGPUOperand::ImmTy Type) {
10516 return parseStringOrIntWithPrefix(
10517 Operands, Prefix,
10518 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10519 Type);
10520}
10521
10522ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10523 return parseStringOrIntWithPrefix(
10524 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10525 AMDGPUOperand::ImmTySDWADstUnused);
10526}
10527
10528void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10529 cvtSDWA(Inst, Operands, SDWAInstType::VOP1);
10530}
10531
10532void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10533 cvtSDWA(Inst, Operands, SDWAInstType::VOP2);
10534}
10535
10536void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10537 cvtSDWA(Inst, Operands, SDWAInstType::VOP2, true, true);
10538}
10539
10540void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10541 cvtSDWA(Inst, Operands, SDWAInstType::VOP2, false, true);
10542}
10543
10544void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10545 cvtSDWA(Inst, Operands, SDWAInstType::VOPC, isVI());
10546}
10547
10548void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10549 SDWAInstType BasicInstType, bool SkipDstVcc,
10550 bool SkipSrcVcc) {
10551 using namespace llvm::AMDGPU::SDWA;
10552
10553 OptionalImmIndexMap OptionalIdx;
10554 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10555 bool SkippedVcc = false;
10556
10557 unsigned I = 1;
10558 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10559 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10560 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10561 }
10562
10563 for (unsigned E = Operands.size(); I != E; ++I) {
10564 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10565 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10566 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10567 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10568 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10569 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10570 // Skip VCC only if we didn't skip it on previous iteration.
10571 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10572 if (BasicInstType == SDWAInstType::VOP2 &&
10573 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10574 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10575 SkippedVcc = true;
10576 continue;
10577 }
10578 if (BasicInstType == SDWAInstType::VOPC && Inst.getNumOperands() == 0) {
10579 SkippedVcc = true;
10580 continue;
10581 }
10582 }
10584 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10585 } else if (Op.isImm()) {
10586 // Handle optional arguments
10587 OptionalIdx[Op.getImmTy()] = I;
10588 } else {
10589 llvm_unreachable("Invalid operand type");
10590 }
10591 SkippedVcc = false;
10592 }
10593
10594 const unsigned Opc = Inst.getOpcode();
10595 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10596 Opc != AMDGPU::V_NOP_sdwa_vi) {
10597 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10598 switch (BasicInstType) {
10599 case SDWAInstType::VOP1:
10600 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10601 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10602 AMDGPUOperand::ImmTyClamp, 0);
10603
10604 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10605 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10606 AMDGPUOperand::ImmTyOModSI, 0);
10607
10608 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10609 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10610 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10611
10612 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10613 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10614 AMDGPUOperand::ImmTySDWADstUnused,
10615 DstUnused::UNUSED_PRESERVE);
10616
10617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10618 break;
10619
10620 case SDWAInstType::VOP2:
10621 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10622 AMDGPUOperand::ImmTyClamp, 0);
10623
10624 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10625 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10626
10627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10628 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10630 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10631 break;
10632
10633 case SDWAInstType::VOPC:
10634 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10635 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10636 AMDGPUOperand::ImmTyClamp, 0);
10637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10639 break;
10640 }
10641 }
10642
10643 // special case v_mac_{f16, f32}:
10644 // it has src2 register operand that is tied to dst operand
10645 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10646 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10647 auto *it = Inst.begin();
10648 std::advance(
10649 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10650 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10651 }
10652}
10653
10654/// Force static initialization.
10655extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10660
10661#define GET_MATCHER_IMPLEMENTATION
10662#define GET_MNEMONIC_SPELL_CHECKER
10663#define GET_MNEMONIC_CHECKER
10664#include "AMDGPUGenAsmMatcher.inc"
10665
10666ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10667 unsigned MCK) {
10668 switch (MCK) {
10669 case MCK_addr64:
10670 return parseTokenOp("addr64", Operands);
10671 case MCK_done:
10672 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10673 case MCK_idxen:
10674 return parseTokenOp("idxen", Operands);
10675 case MCK_lds:
10676 return parseNamedBit("lds", Operands, AMDGPUOperand::ImmTyLDS,
10677 /*IgnoreNegative=*/true);
10678 case MCK_offen:
10679 return parseTokenOp("offen", Operands);
10680 case MCK_off:
10681 return parseTokenOp("off", Operands);
10682 case MCK_row_95_en:
10683 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10684 case MCK_gds:
10685 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10686 case MCK_tfe:
10687 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10688 }
10689 return tryCustomParseOperand(Operands, MCK);
10690}
10691
10692// This function should be defined after auto-generated include so that we have
10693// MatchClassKind enum defined
10694unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10695 unsigned Kind) {
10696 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10697 // But MatchInstructionImpl() expects to meet token and fails to validate
10698 // operand. This method checks if we are given immediate operand but expect to
10699 // get corresponding token.
10700 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10701 switch (Kind) {
10702 case MCK_addr64:
10703 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10704 case MCK_gds:
10705 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10706 case MCK_lds:
10707 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10708 case MCK_idxen:
10709 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10710 case MCK_offen:
10711 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10712 case MCK_tfe:
10713 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10714 case MCK_done:
10715 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10716 case MCK_row_95_en:
10717 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10718 case MCK_SSrc_b32:
10719 // When operands have expression values, they will return true for isToken,
10720 // because it is not possible to distinguish between a token and an
10721 // expression at parse time. MatchInstructionImpl() will always try to
10722 // match an operand as a token, when isToken returns true, and when the
10723 // name of the expression is not a valid token, the match will fail,
10724 // so we need to handle it here.
10725 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10726 case MCK_SSrc_f32:
10727 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10728 case MCK_SOPPBrTarget:
10729 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10730 case MCK_VReg32OrOff:
10731 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10732 case MCK_InterpSlot:
10733 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10734 case MCK_InterpAttr:
10735 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10736 case MCK_InterpAttrChan:
10737 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10738 case MCK_SReg_64:
10739 case MCK_SReg_64_XEXEC:
10740 // Null is defined as a 32-bit register but
10741 // it should also be enabled with 64-bit operands or larger.
10742 // The following code enables it for SReg_64 and larger operands
10743 // used as source and destination. Remaining source
10744 // operands are handled in isInlinableImm.
10745 case MCK_SReg_96:
10746 case MCK_SReg_128:
10747 case MCK_SReg_256:
10748 case MCK_SReg_512:
10749 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10750 default:
10751 return Match_InvalidOperand;
10752 }
10753}
10754
10755//===----------------------------------------------------------------------===//
10756// endpgm
10757//===----------------------------------------------------------------------===//
10758
10759ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10760 SMLoc S = getLoc();
10761 int64_t Imm = 0;
10762
10763 if (!parseExpr(Imm)) {
10764 // The operand is optional, if not present default to 0
10765 Imm = 0;
10766 }
10767
10768 if (!isUInt<16>(Imm))
10769 return Error(S, "expected a 16-bit value");
10770
10771 Operands.push_back(
10772 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10773 return ParseStatus::Success;
10774}
10775
10776bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10777
10778//===----------------------------------------------------------------------===//
10779// Split Barrier
10780//===----------------------------------------------------------------------===//
10781
10782bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5912
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:571
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1129
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:299
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:31
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...