LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
47#include <optional>
48
49using namespace llvm;
50using namespace llvm::AMDGPU;
51using namespace llvm::amdhsa;
52
53namespace {
54
55class AMDGPUAsmParser;
56
57enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
58
59//===----------------------------------------------------------------------===//
60// Operand
61//===----------------------------------------------------------------------===//
62
63class AMDGPUOperand : public MCParsedAsmOperand {
64 enum KindTy {
65 Token,
66 Immediate,
67 Register,
68 Expression
69 } Kind;
70
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
73
74public:
75 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
77
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
79
80 struct Modifiers {
81 bool Abs = false;
82 bool Neg = false;
83 bool Sext = false;
84 LitModifier Lit = LitModifier::None;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit() const { return Lit == LitModifier::Lit; }
90 bool isForcedLit64() const { return Lit == LitModifier::Lit64; }
91
92 int64_t getFPModifiersOperand() const {
93 int64_t Operand = 0;
94 Operand |= Abs ? SISrcMods::ABS : 0u;
95 Operand |= Neg ? SISrcMods::NEG : 0u;
96 return Operand;
97 }
98
99 int64_t getIntModifiersOperand() const {
100 int64_t Operand = 0;
101 Operand |= Sext ? SISrcMods::SEXT : 0u;
102 return Operand;
103 }
104
105 int64_t getModifiersOperand() const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 && "fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
112 return 0;
113 }
114
115 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
116 };
117
118 enum ImmTy {
119 ImmTyNone,
120 ImmTyGDS,
121 ImmTyLDS,
122 ImmTyOffen,
123 ImmTyIdxen,
124 ImmTyAddr64,
125 ImmTyOffset,
126 ImmTyInstOffset,
127 ImmTyOffset0,
128 ImmTyOffset1,
129 ImmTySMEMOffsetMod,
130 ImmTyCPol,
131 ImmTyTFE,
132 ImmTyIsAsync,
133 ImmTyD16,
134 ImmTyClamp,
135 ImmTyOModSI,
136 ImmTySDWADstSel,
137 ImmTySDWASrc0Sel,
138 ImmTySDWASrc1Sel,
139 ImmTySDWADstUnused,
140 ImmTyDMask,
141 ImmTyDim,
142 ImmTyUNorm,
143 ImmTyDA,
144 ImmTyR128A16,
145 ImmTyA16,
146 ImmTyLWE,
147 ImmTyExpTgt,
148 ImmTyExpCompr,
149 ImmTyExpVM,
150 ImmTyDone,
151 ImmTyRowEn,
152 ImmTyFORMAT,
153 ImmTyHwreg,
154 ImmTyOff,
155 ImmTySendMsg,
156 ImmTyWaitEvent,
157 ImmTyInterpSlot,
158 ImmTyInterpAttr,
159 ImmTyInterpAttrChan,
160 ImmTyOpSel,
161 ImmTyOpSelHi,
162 ImmTyNegLo,
163 ImmTyNegHi,
164 ImmTyIndexKey8bit,
165 ImmTyIndexKey16bit,
166 ImmTyIndexKey32bit,
167 ImmTyDPP8,
168 ImmTyDppCtrl,
169 ImmTyDppRowMask,
170 ImmTyDppBankMask,
171 ImmTyDppBoundCtrl,
172 ImmTyDppFI,
173 ImmTySwizzle,
174 ImmTyGprIdxMode,
175 ImmTyHigh,
176 ImmTyBLGP,
177 ImmTyCBSZ,
178 ImmTyABID,
179 ImmTyEndpgm,
180 ImmTyWaitVDST,
181 ImmTyWaitEXP,
182 ImmTyWaitVAVDst,
183 ImmTyWaitVMVSrc,
184 ImmTyBitOp3,
185 ImmTyMatrixAFMT,
186 ImmTyMatrixBFMT,
187 ImmTyMatrixAScale,
188 ImmTyMatrixBScale,
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
191 ImmTyMatrixAReuse,
192 ImmTyMatrixBReuse,
193 ImmTyScaleSel,
194 ImmTyByteSel,
195 };
196
197private:
198 struct TokOp {
199 const char *Data;
200 unsigned Length;
201 };
202
203 struct ImmOp {
204 int64_t Val;
205 ImmTy Type;
206 bool IsFPImm;
207 Modifiers Mods;
208 };
209
210 struct RegOp {
211 MCRegister RegNo;
212 Modifiers Mods;
213 };
214
215 union {
216 TokOp Tok;
217 ImmOp Imm;
218 RegOp Reg;
219 const MCExpr *Expr;
220 };
221
222 // The index of the associated MCInst operand.
223 mutable int MCOpIdx = -1;
224
225public:
226 bool isToken() const override { return Kind == Token; }
227
228 bool isSymbolRefExpr() const {
229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 }
231
232 bool isImm() const override {
233 return Kind == Immediate;
234 }
235
236 bool isInlinableImm(MVT type) const;
237 bool isLiteralImm(MVT type) const;
238
239 bool isRegKind() const {
240 return Kind == Register;
241 }
242
243 bool isReg() const override {
244 return isRegKind() && !hasModifiers();
245 }
246
247 bool isRegOrInline(unsigned RCID, MVT type) const {
248 return isRegClass(RCID) || isInlinableImm(type);
249 }
250
251 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
253 }
254
255 bool isRegOrImmWithInt16InputMods() const {
256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
257 }
258
259 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
262 }
263
264 bool isRegOrImmWithInt32InputMods() const {
265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266 }
267
268 bool isRegOrInlineImmWithInt16InputMods() const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
270 }
271
272 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
275 }
276
277 bool isRegOrInlineImmWithInt32InputMods() const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
279 }
280
281 bool isRegOrImmWithInt64InputMods() const {
282 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
283 }
284
285 bool isRegOrImmWithFP16InputMods() const {
286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
287 }
288
289 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
292 }
293
294 bool isRegOrImmWithFP32InputMods() const {
295 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
296 }
297
298 bool isRegOrImmWithFP64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
300 }
301
302 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
305 }
306
307 bool isRegOrInlineImmWithFP32InputMods() const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
309 }
310
311 bool isRegOrInlineImmWithFP64InputMods() const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
313 }
314
315 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
316
317 bool isVRegWithFP32InputMods() const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
319 }
320
321 bool isVRegWithFP64InputMods() const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
323 }
324
325 bool isPackedFP16InputMods() const {
326 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
327 }
328
329 bool isPackedVGPRFP32InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isAV_LdSt_32_Align2_RegOp() const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
360 }
361
362 bool isVRegWithInputMods() const;
363 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
364 template <bool IsFake16> bool isT16VRegWithInputMods() const;
365
366 bool isSDWAOperand(MVT type) const;
367 bool isSDWAFP16Operand() const;
368 bool isSDWAFP32Operand() const;
369 bool isSDWAInt16Operand() const;
370 bool isSDWAInt32Operand() const;
371
372 bool isImmTy(ImmTy ImmT) const {
373 return isImm() && Imm.Type == ImmT;
374 }
375
376 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
377
378 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
379
380 bool isImmModifier() const {
381 return isImm() && Imm.Type != ImmTyNone;
382 }
383
384 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
385 bool isDim() const { return isImmTy(ImmTyDim); }
386 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
387 bool isOff() const { return isImmTy(ImmTyOff); }
388 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
389 bool isOffen() const { return isImmTy(ImmTyOffen); }
390 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
391 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS() const { return isImmTy(ImmTyGDS); }
395 bool isLDS() const { return isImmTy(ImmTyLDS); }
396 bool isCPol() const { return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE() const { return isImmTy(ImmTyTFE); }
409 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
410 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
421 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
422 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
423 bool isDone() const { return isImmTy(ImmTyDone); }
424 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
425
426 bool isRegOrImm() const {
427 return isReg() || isImm();
428 }
429
430 bool isRegClass(unsigned RCID) const;
431
432 bool isInlineValue() const;
433
434 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
436 }
437
438 bool isSCSrcB16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
440 }
441
442 bool isSCSrcV2B16() const {
443 return isSCSrcB16();
444 }
445
446 bool isSCSrc_b32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
448 }
449
450 bool isSCSrc_b64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
452 }
453
454 bool isBoolReg() const;
455
456 bool isSCSrcF16() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
458 }
459
460 bool isSCSrcV2F16() const {
461 return isSCSrcF16();
462 }
463
464 bool isSCSrcF32() const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
466 }
467
468 bool isSCSrcF64() const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
470 }
471
472 bool isSSrc_b32() const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
474 }
475
476 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
477
478 bool isSSrcV2B16() const {
479 llvm_unreachable("cannot happen");
480 return isSSrc_b16();
481 }
482
483 bool isSSrc_b64() const {
484 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
485 // See isVSrc64().
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
489 isExpr());
490 }
491
492 bool isSSrc_f32() const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
494 }
495
496 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
497
498 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
499
500 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
501
502 bool isSSrcV2F16() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_f16();
505 }
506
507 bool isSSrcV2FP32() const {
508 llvm_unreachable("cannot happen");
509 return isSSrc_f32();
510 }
511
512 bool isSCSrcV2FP32() const {
513 llvm_unreachable("cannot happen");
514 return isSCSrcF32();
515 }
516
517 bool isSSrcV2INT32() const {
518 llvm_unreachable("cannot happen");
519 return isSSrc_b32();
520 }
521
522 bool isSCSrcV2INT32() const {
523 llvm_unreachable("cannot happen");
524 return isSCSrc_b32();
525 }
526
527 bool isSSrcOrLds_b32() const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
530 }
531
532 bool isVCSrc_b32() const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
534 }
535
536 bool isVCSrc_b32_Lo256() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
538 }
539
540 bool isVCSrc_b64_Lo256() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
542 }
543
544 bool isVCSrc_b64() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
546 }
547
548 bool isVCSrcT_b16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
550 }
551
552 bool isVCSrcTB16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
554 }
555
556 bool isVCSrcFake16B16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
558 }
559
560 bool isVCSrc_b16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
562 }
563
564 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
565
566 bool isVCSrc_f32() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
568 }
569
570 bool isVCSrc_f64() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
572 }
573
574 bool isVCSrcTBF16() const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
576 }
577
578 bool isVCSrcT_f16() const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
580 }
581
582 bool isVCSrcT_bf16() const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
584 }
585
586 bool isVCSrcTBF16_Lo128() const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
588 }
589
590 bool isVCSrcTF16_Lo128() const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
592 }
593
594 bool isVCSrcFake16BF16_Lo128() const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
596 }
597
598 bool isVCSrcFake16F16_Lo128() const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
600 }
601
602 bool isVCSrc_bf16() const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
604 }
605
606 bool isVCSrc_f16() const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
608 }
609
610 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
611
612 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
613
614 bool isVSrc_b32() const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
616 }
617
618 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
619
620 bool isVSrc_v2b64() const {
621 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::i64) ||
622 isLiteralImm(MVT::i64);
623 }
624
625 bool isVSrc_v2f64() const {
626 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::f64) ||
627 isLiteralImm(MVT::f64);
628 }
629
630 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
631
632 bool isVSrcT_b16_Lo128() const {
633 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
634 }
635
636 bool isVSrcFake16_b16_Lo128() const {
637 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
638 }
639
640 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
641
642 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
643
644 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
645
646 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
647
648 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
649
650 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
651
652 bool isVSrc_f32() const {
653 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
654 }
655
656 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
657
658 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
659
660 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
661
662 bool isVSrcT_bf16_Lo128() const {
663 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
664 }
665
666 bool isVSrcT_f16_Lo128() const {
667 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
668 }
669
670 bool isVSrcFake16_bf16_Lo128() const {
671 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
672 }
673
674 bool isVSrcFake16_f16_Lo128() const {
675 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
676 }
677
678 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
679
680 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
681
682 bool isVSrc_v2bf16() const {
683 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
684 }
685
686 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
687
688 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
689
690 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
691
692 bool isVISrcB32() const {
693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
694 }
695
696 bool isVISrcB16() const {
697 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
698 }
699
700 bool isVISrcV2B16() const {
701 return isVISrcB16();
702 }
703
704 bool isVISrcF32() const {
705 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
706 }
707
708 bool isVISrcF16() const {
709 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
710 }
711
712 bool isVISrcV2F16() const {
713 return isVISrcF16() || isVISrcB32();
714 }
715
716 bool isVISrc_64_bf16() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
718 }
719
720 bool isVISrc_64_f16() const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
722 }
723
724 bool isVISrc_64_b32() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
726 }
727
728 bool isVISrc_64B64() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
730 }
731
732 bool isVISrc_64_f64() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
734 }
735
736 bool isVISrc_64V2FP32() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
738 }
739
740 bool isVISrc_64V2INT32() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
742 }
743
744 bool isVISrc_256_b32() const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
746 }
747
748 bool isVISrc_256_f32() const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
750 }
751
752 bool isVISrc_256B64() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
754 }
755
756 bool isVISrc_256_f64() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
758 }
759
760 bool isVISrc_512_f64() const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
762 }
763
764 bool isVISrc_128B16() const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
766 }
767
768 bool isVISrc_128V2B16() const {
769 return isVISrc_128B16();
770 }
771
772 bool isVISrc_128_b32() const {
773 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
774 }
775
776 bool isVISrc_128_f32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
778 }
779
780 bool isVISrc_256V2FP32() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
782 }
783
784 bool isVISrc_256V2INT32() const {
785 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
786 }
787
788 bool isVISrc_512_b32() const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
790 }
791
792 bool isVISrc_512B16() const {
793 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
794 }
795
796 bool isVISrc_512V2B16() const {
797 return isVISrc_512B16();
798 }
799
800 bool isVISrc_512_f32() const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
802 }
803
804 bool isVISrc_512F16() const {
805 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
806 }
807
808 bool isVISrc_512V2F16() const {
809 return isVISrc_512F16() || isVISrc_512_b32();
810 }
811
812 bool isVISrc_1024_b32() const {
813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
814 }
815
816 bool isVISrc_1024B16() const {
817 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
818 }
819
820 bool isVISrc_1024V2B16() const {
821 return isVISrc_1024B16();
822 }
823
824 bool isVISrc_1024_f32() const {
825 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
826 }
827
828 bool isVISrc_1024F16() const {
829 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
830 }
831
832 bool isVISrc_1024V2F16() const {
833 return isVISrc_1024F16() || isVISrc_1024_b32();
834 }
835
836 bool isAISrcB32() const {
837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
838 }
839
840 bool isAISrcB16() const {
841 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
842 }
843
844 bool isAISrcV2B16() const {
845 return isAISrcB16();
846 }
847
848 bool isAISrcF32() const {
849 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
850 }
851
852 bool isAISrcF16() const {
853 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
854 }
855
856 bool isAISrcV2F16() const {
857 return isAISrcF16() || isAISrcB32();
858 }
859
860 bool isAISrc_64B64() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
862 }
863
864 bool isAISrc_64_f64() const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
866 }
867
868 bool isAISrc_128_b32() const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
870 }
871
872 bool isAISrc_128B16() const {
873 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
874 }
875
876 bool isAISrc_128V2B16() const {
877 return isAISrc_128B16();
878 }
879
880 bool isAISrc_128_f32() const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
882 }
883
884 bool isAISrc_128F16() const {
885 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
886 }
887
888 bool isAISrc_128V2F16() const {
889 return isAISrc_128F16() || isAISrc_128_b32();
890 }
891
892 bool isVISrc_128_bf16() const {
893 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
894 }
895
896 bool isVISrc_128_f16() const {
897 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
898 }
899
900 bool isVISrc_128V2F16() const {
901 return isVISrc_128_f16() || isVISrc_128_b32();
902 }
903
904 bool isAISrc_256B64() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
906 }
907
908 bool isAISrc_256_f64() const {
909 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
910 }
911
912 bool isAISrc_512_b32() const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
914 }
915
916 bool isAISrc_512B16() const {
917 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
918 }
919
920 bool isAISrc_512V2B16() const {
921 return isAISrc_512B16();
922 }
923
924 bool isAISrc_512_f32() const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
926 }
927
928 bool isAISrc_512F16() const {
929 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
930 }
931
932 bool isAISrc_512V2F16() const {
933 return isAISrc_512F16() || isAISrc_512_b32();
934 }
935
936 bool isAISrc_1024_b32() const {
937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
938 }
939
940 bool isAISrc_1024B16() const {
941 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
942 }
943
944 bool isAISrc_1024V2B16() const {
945 return isAISrc_1024B16();
946 }
947
948 bool isAISrc_1024_f32() const {
949 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
950 }
951
952 bool isAISrc_1024F16() const {
953 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
954 }
955
956 bool isAISrc_1024V2F16() const {
957 return isAISrc_1024F16() || isAISrc_1024_b32();
958 }
959
960 bool isKImmFP32() const {
961 return isLiteralImm(MVT::f32);
962 }
963
964 bool isKImmFP16() const {
965 return isLiteralImm(MVT::f16);
966 }
967
968 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
969
970 bool isMem() const override {
971 return false;
972 }
973
974 bool isExpr() const {
975 return Kind == Expression;
976 }
977
978 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
979
980 bool isSWaitCnt() const;
981 bool isDepCtr() const;
982 bool isSDelayALU() const;
983 bool isHwreg() const;
984 bool isSendMsg() const;
985 bool isWaitEvent() const;
986 bool isSplitBarrier() const;
987 bool isSwizzle() const;
988 bool isSMRDOffset8() const;
989 bool isSMEMOffset() const;
990 bool isSMRDLiteralOffset() const;
991 bool isDPP8() const;
992 bool isDPPCtrl() const;
993 bool isBLGP() const;
994 bool isGPRIdxMode() const;
995 bool isS16Imm() const;
996 bool isU16Imm() const;
997 bool isEndpgm() const;
998
999 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
1000 return [this, P]() { return P(*this); };
1001 }
1002
1003 StringRef getToken() const {
1004 assert(isToken());
1005 return StringRef(Tok.Data, Tok.Length);
1006 }
1007
1008 int64_t getImm() const {
1009 assert(isImm());
1010 return Imm.Val;
1011 }
1012
1013 void setImm(int64_t Val) {
1014 assert(isImm());
1015 Imm.Val = Val;
1016 }
1017
1018 ImmTy getImmTy() const {
1019 assert(isImm());
1020 return Imm.Type;
1021 }
1022
1023 MCRegister getReg() const override {
1024 assert(isRegKind());
1025 return Reg.RegNo;
1026 }
1027
1028 SMLoc getStartLoc() const override {
1029 return StartLoc;
1030 }
1031
1032 SMLoc getEndLoc() const override {
1033 return EndLoc;
1034 }
1035
1036 SMRange getLocRange() const {
1037 return SMRange(StartLoc, EndLoc);
1038 }
1039
1040 int getMCOpIdx() const { return MCOpIdx; }
1041
1042 Modifiers getModifiers() const {
1043 assert(isRegKind() || isImmTy(ImmTyNone));
1044 return isRegKind() ? Reg.Mods : Imm.Mods;
1045 }
1046
1047 void setModifiers(Modifiers Mods) {
1048 assert(isRegKind() || isImmTy(ImmTyNone));
1049 if (isRegKind())
1050 Reg.Mods = Mods;
1051 else
1052 Imm.Mods = Mods;
1053 }
1054
1055 bool hasModifiers() const {
1056 return getModifiers().hasModifiers();
1057 }
1058
1059 bool hasFPModifiers() const {
1060 return getModifiers().hasFPModifiers();
1061 }
1062
1063 bool hasIntModifiers() const {
1064 return getModifiers().hasIntModifiers();
1065 }
1066
1067 bool isForcedLit() const {
1068 return isImmLiteral() && getModifiers().isForcedLit();
1069 }
1070
1071 bool isForcedLit64() const {
1072 return isImmLiteral() && getModifiers().isForcedLit64();
1073 }
1074
1075 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1076
1077 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1078
1079 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1080
1081 void addRegOperands(MCInst &Inst, unsigned N) const;
1082
1083 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1084 if (isRegKind())
1085 addRegOperands(Inst, N);
1086 else
1087 addImmOperands(Inst, N);
1088 }
1089
1090 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1091 Modifiers Mods = getModifiers();
1092 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1093 if (isRegKind()) {
1094 addRegOperands(Inst, N);
1095 } else {
1096 addImmOperands(Inst, N, false);
1097 }
1098 }
1099
1100 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1101 assert(!hasIntModifiers());
1102 addRegOrImmWithInputModsOperands(Inst, N);
1103 }
1104
1105 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1106 assert(!hasFPModifiers());
1107 addRegOrImmWithInputModsOperands(Inst, N);
1108 }
1109
1110 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1111 Modifiers Mods = getModifiers();
1112 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1113 assert(isRegKind());
1114 addRegOperands(Inst, N);
1115 }
1116
1117 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1118 assert(!hasIntModifiers());
1119 addRegWithInputModsOperands(Inst, N);
1120 }
1121
1122 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1123 assert(!hasFPModifiers());
1124 addRegWithInputModsOperands(Inst, N);
1125 }
1126
1127 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1128 // clang-format off
1129 switch (Type) {
1130 case ImmTyNone: OS << "None"; break;
1131 case ImmTyGDS: OS << "GDS"; break;
1132 case ImmTyLDS: OS << "LDS"; break;
1133 case ImmTyOffen: OS << "Offen"; break;
1134 case ImmTyIdxen: OS << "Idxen"; break;
1135 case ImmTyAddr64: OS << "Addr64"; break;
1136 case ImmTyOffset: OS << "Offset"; break;
1137 case ImmTyInstOffset: OS << "InstOffset"; break;
1138 case ImmTyOffset0: OS << "Offset0"; break;
1139 case ImmTyOffset1: OS << "Offset1"; break;
1140 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1141 case ImmTyCPol: OS << "CPol"; break;
1142 case ImmTyIndexKey8bit: OS << "index_key"; break;
1143 case ImmTyIndexKey16bit: OS << "index_key"; break;
1144 case ImmTyIndexKey32bit: OS << "index_key"; break;
1145 case ImmTyTFE: OS << "TFE"; break;
1146 case ImmTyIsAsync: OS << "IsAsync"; break;
1147 case ImmTyD16: OS << "D16"; break;
1148 case ImmTyFORMAT: OS << "FORMAT"; break;
1149 case ImmTyClamp: OS << "Clamp"; break;
1150 case ImmTyOModSI: OS << "OModSI"; break;
1151 case ImmTyDPP8: OS << "DPP8"; break;
1152 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1153 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1154 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1155 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1156 case ImmTyDppFI: OS << "DppFI"; break;
1157 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1158 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1159 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1160 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1161 case ImmTyDMask: OS << "DMask"; break;
1162 case ImmTyDim: OS << "Dim"; break;
1163 case ImmTyUNorm: OS << "UNorm"; break;
1164 case ImmTyDA: OS << "DA"; break;
1165 case ImmTyR128A16: OS << "R128A16"; break;
1166 case ImmTyA16: OS << "A16"; break;
1167 case ImmTyLWE: OS << "LWE"; break;
1168 case ImmTyOff: OS << "Off"; break;
1169 case ImmTyExpTgt: OS << "ExpTgt"; break;
1170 case ImmTyExpCompr: OS << "ExpCompr"; break;
1171 case ImmTyExpVM: OS << "ExpVM"; break;
1172 case ImmTyDone: OS << "Done"; break;
1173 case ImmTyRowEn: OS << "RowEn"; break;
1174 case ImmTyHwreg: OS << "Hwreg"; break;
1175 case ImmTySendMsg: OS << "SendMsg"; break;
1176 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1177 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1178 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1179 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1180 case ImmTyOpSel: OS << "OpSel"; break;
1181 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1182 case ImmTyNegLo: OS << "NegLo"; break;
1183 case ImmTyNegHi: OS << "NegHi"; break;
1184 case ImmTySwizzle: OS << "Swizzle"; break;
1185 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1186 case ImmTyHigh: OS << "High"; break;
1187 case ImmTyBLGP: OS << "BLGP"; break;
1188 case ImmTyCBSZ: OS << "CBSZ"; break;
1189 case ImmTyABID: OS << "ABID"; break;
1190 case ImmTyEndpgm: OS << "Endpgm"; break;
1191 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1192 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1193 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1194 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1195 case ImmTyBitOp3: OS << "BitOp3"; break;
1196 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1197 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1198 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1199 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1200 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1201 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1202 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1203 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1204 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1205 case ImmTyByteSel: OS << "ByteSel" ; break;
1206 }
1207 // clang-format on
1208 }
1209
1210 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1211 switch (Kind) {
1212 case Register:
1213 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1214 << " mods: " << Reg.Mods << '>';
1215 break;
1216 case Immediate:
1217 OS << '<' << getImm();
1218 if (getImmTy() != ImmTyNone) {
1219 OS << " type: "; printImmTy(OS, getImmTy());
1220 }
1221 OS << " mods: " << Imm.Mods << '>';
1222 break;
1223 case Token:
1224 OS << '\'' << getToken() << '\'';
1225 break;
1226 case Expression:
1227 OS << "<expr ";
1228 MAI.printExpr(OS, *Expr);
1229 OS << '>';
1230 break;
1231 }
1232 }
1233
1234 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1235 int64_t Val, SMLoc Loc,
1236 ImmTy Type = ImmTyNone,
1237 bool IsFPImm = false) {
1238 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1239 Op->Imm.Val = Val;
1240 Op->Imm.IsFPImm = IsFPImm;
1241 Op->Imm.Type = Type;
1242 Op->Imm.Mods = Modifiers();
1243 Op->StartLoc = Loc;
1244 Op->EndLoc = Loc;
1245 return Op;
1246 }
1247
1248 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1249 StringRef Str, SMLoc Loc,
1250 bool HasExplicitEncodingSize = true) {
1251 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1252 Res->Tok.Data = Str.data();
1253 Res->Tok.Length = Str.size();
1254 Res->StartLoc = Loc;
1255 Res->EndLoc = Loc;
1256 return Res;
1257 }
1258
1259 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1260 MCRegister Reg, SMLoc S, SMLoc E) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1262 Op->Reg.RegNo = Reg;
1263 Op->Reg.Mods = Modifiers();
1264 Op->StartLoc = S;
1265 Op->EndLoc = E;
1266 return Op;
1267 }
1268
1269 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1270 const class MCExpr *Expr, SMLoc S) {
1271 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1272 Op->Expr = Expr;
1273 Op->StartLoc = S;
1274 Op->EndLoc = S;
1275 return Op;
1276 }
1277};
1278
1279raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1280 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1281 return OS;
1282}
1283
1284//===----------------------------------------------------------------------===//
1285// AsmParser
1286//===----------------------------------------------------------------------===//
1287
1288// TODO: define GET_SUBTARGET_FEATURE_NAME
1289#define GET_REGISTER_MATCHER
1290#include "AMDGPUGenAsmMatcher.inc"
1291#undef GET_REGISTER_MATCHER
1292#undef GET_SUBTARGET_FEATURE_NAME
1293
1294// Holds info related to the current kernel, e.g. count of SGPRs used.
1295// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1296// .amdgpu_hsa_kernel or at EOF.
1297class KernelScopeInfo {
1298 int SgprIndexUnusedMin = -1;
1299 int VgprIndexUnusedMin = -1;
1300 int AgprIndexUnusedMin = -1;
1301 MCContext *Ctx = nullptr;
1302 MCSubtargetInfo const *MSTI = nullptr;
1303
1304 void usesSgprAt(int i) {
1305 if (i >= SgprIndexUnusedMin) {
1306 SgprIndexUnusedMin = ++i;
1307 if (Ctx) {
1308 MCSymbol* const Sym =
1309 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1310 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1311 }
1312 }
1313 }
1314
1315 void usesVgprAt(int i) {
1316 if (i >= VgprIndexUnusedMin) {
1317 VgprIndexUnusedMin = ++i;
1318 if (Ctx) {
1319 MCSymbol* const Sym =
1320 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1321 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1322 VgprIndexUnusedMin);
1323 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1324 }
1325 }
1326 }
1327
1328 void usesAgprAt(int i) {
1329 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1330 if (!hasMAIInsts(*MSTI))
1331 return;
1332
1333 if (i >= AgprIndexUnusedMin) {
1334 AgprIndexUnusedMin = ++i;
1335 if (Ctx) {
1336 MCSymbol* const Sym =
1337 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1338 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1339
1340 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1341 MCSymbol* const vSym =
1342 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1343 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1344 VgprIndexUnusedMin);
1345 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1346 }
1347 }
1348 }
1349
1350public:
1351 KernelScopeInfo() = default;
1352
1353 void initialize(MCContext &Context) {
1354 Ctx = &Context;
1355 MSTI = Ctx->getSubtargetInfo();
1356
1357 usesSgprAt(SgprIndexUnusedMin = -1);
1358 usesVgprAt(VgprIndexUnusedMin = -1);
1359 if (hasMAIInsts(*MSTI)) {
1360 usesAgprAt(AgprIndexUnusedMin = -1);
1361 }
1362 }
1363
1364 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1365 unsigned RegWidth) {
1366 switch (RegKind) {
1367 case IS_SGPR:
1368 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1369 break;
1370 case IS_AGPR:
1371 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1372 break;
1373 case IS_VGPR:
1374 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1375 break;
1376 default:
1377 break;
1378 }
1379 }
1380};
1381
1382class AMDGPUAsmParser : public MCTargetAsmParser {
1383 MCAsmParser &Parser;
1384
1385 unsigned ForcedEncodingSize = 0;
1386 bool ForcedDPP = false;
1387 bool ForcedSDWA = false;
1388 KernelScopeInfo KernelScope;
1389 const unsigned HwMode;
1390
1391 /// @name Auto-generated Match Functions
1392 /// {
1393
1394#define GET_ASSEMBLER_HEADER
1395#include "AMDGPUGenAsmMatcher.inc"
1396
1397 /// }
1398
1399 /// Get size of register operand
1400 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1401 assert(OpNo < Desc.NumOperands);
1402 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1403 return getRegBitWidth(RCID) / 8;
1404 }
1405
1406 std::optional<AMDGPU::InfoSectionData> InfoData;
1407
1408private:
1409 void createConstantSymbol(StringRef Id, int64_t Val);
1410
1411 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1412 bool OutOfRangeError(SMRange Range);
1413 /// Calculate VGPR/SGPR blocks required for given target, reserved
1414 /// registers, and user-specified NextFreeXGPR values.
1415 ///
1416 /// \param Features [in] Target features, used for bug corrections.
1417 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1418 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1419 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1420 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1421 /// descriptor field, if valid.
1422 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1423 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1424 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1425 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1426 /// \param VGPRBlocks [out] Result VGPR block count.
1427 /// \param SGPRBlocks [out] Result SGPR block count.
1428 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1429 const MCExpr *FlatScrUsed, bool XNACKUsed,
1430 std::optional<bool> EnableWavefrontSize32,
1431 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1432 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1433 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1434 bool ParseDirectiveAMDGCNTarget();
1435 bool ParseDirectiveAMDHSACodeObjectVersion();
1436 bool ParseDirectiveAMDHSAKernel();
1437 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1438 bool ParseDirectiveAMDKernelCodeT();
1439 // TODO: Possibly make subtargetHasRegister const.
1440 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1441 bool ParseDirectiveAMDGPUHsaKernel();
1442
1443 bool ParseDirectiveISAVersion();
1444 bool ParseDirectiveHSAMetadata();
1445 bool ParseDirectivePALMetadataBegin();
1446 bool ParseDirectivePALMetadata();
1447 bool ParseDirectiveAMDGPULDS();
1448 bool ParseDirectiveAMDGPUInfo();
1449
1450 /// Common code to parse out a block of text (typically YAML) between start and
1451 /// end directives.
1452 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1453 const char *AssemblerDirectiveEnd,
1454 std::string &CollectString);
1455
1456 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1457 RegisterKind RegKind, MCRegister Reg1,
1458 RegisterKind RegKind1, SMLoc Loc);
1459 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1460 unsigned &RegNum, unsigned &RegWidth,
1461 bool RestoreOnFailure = false);
1462 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1463 unsigned &RegNum, unsigned &RegWidth,
1464 SmallVectorImpl<AsmToken> &Tokens);
1465 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1466 unsigned &RegWidth,
1467 SmallVectorImpl<AsmToken> &Tokens);
1468 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1469 unsigned &RegWidth,
1470 SmallVectorImpl<AsmToken> &Tokens);
1471 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1472 unsigned &RegWidth,
1473 SmallVectorImpl<AsmToken> &Tokens);
1474 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1475 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1476 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1477
1478 bool isRegister();
1479 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1480 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1481 void initializeGprCountSymbol(RegisterKind RegKind);
1482 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1483 unsigned RegWidth);
1484 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1485 bool IsAtomic);
1486
1487public:
1488 enum OperandMode {
1489 OperandMode_Default,
1490 OperandMode_NSA,
1491 };
1492
1493 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1494
1495 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1496 const MCInstrInfo &MII)
1497 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1498 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1500
1501 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1502
1503 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1504 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1505 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1506 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1507 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1508 } else {
1509 createConstantSymbol(".option.machine_version_major", ISA.Major);
1510 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1511 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1512 }
1513 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1514 initializeGprCountSymbol(IS_VGPR);
1515 initializeGprCountSymbol(IS_SGPR);
1516 } else
1517 KernelScope.initialize(getContext());
1518
1519 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1520 createConstantSymbol(Symbol, Code);
1521
1522 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1523 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1524 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1525 }
1526
1527 bool hasMIMG_R128() const {
1528 return AMDGPU::hasMIMG_R128(getSTI());
1529 }
1530
1531 bool hasPackedD16() const {
1532 return AMDGPU::hasPackedD16(getSTI());
1533 }
1534
1535 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1536
1537 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1538
1539 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1540
1541 bool isSI() const {
1542 return AMDGPU::isSI(getSTI());
1543 }
1544
1545 bool isCI() const {
1546 return AMDGPU::isCI(getSTI());
1547 }
1548
1549 bool isVI() const {
1550 return AMDGPU::isVI(getSTI());
1551 }
1552
1553 bool isGFX9() const {
1554 return AMDGPU::isGFX9(getSTI());
1555 }
1556
1557 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1558 bool isGFX90A() const {
1559 return AMDGPU::isGFX90A(getSTI());
1560 }
1561
1562 bool isGFX940() const {
1563 return AMDGPU::isGFX940(getSTI());
1564 }
1565
1566 bool isGFX9Plus() const {
1567 return AMDGPU::isGFX9Plus(getSTI());
1568 }
1569
1570 bool isGFX10() const {
1571 return AMDGPU::isGFX10(getSTI());
1572 }
1573
1574 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1575
1576 bool isGFX11() const {
1577 return AMDGPU::isGFX11(getSTI());
1578 }
1579
1580 bool isGFX11Plus() const {
1581 return AMDGPU::isGFX11Plus(getSTI());
1582 }
1583
1584 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1585
1586 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1587
1588 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1589
1590 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1591
1592 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1593
1594 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1595
1596 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1597
1598 bool isGFX10_BEncoding() const {
1599 return AMDGPU::isGFX10_BEncoding(getSTI());
1600 }
1601
1602 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1603
1604 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1605
1606 bool hasInv2PiInlineImm() const {
1607 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1608 }
1609
1610 bool has64BitLiterals() const {
1611 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1612 }
1613
1614 bool hasFlatOffsets() const {
1615 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1616 }
1617
1618 bool hasTrue16Insts() const {
1619 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1620 }
1621
1622 bool hasArchitectedFlatScratch() const {
1623 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1624 }
1625
1626 bool hasSGPR102_SGPR103() const {
1627 return !isVI() && !isGFX9();
1628 }
1629
1630 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1631
1632 bool hasIntClamp() const {
1633 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1634 }
1635
1636 bool hasPartialNSAEncoding() const {
1637 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1638 }
1639
1640 bool hasGloballyAddressableScratch() const {
1641 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1642 }
1643
1644 unsigned getNSAMaxSize(bool HasSampler = false) const {
1645 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1646 }
1647
1648 unsigned getMaxNumUserSGPRs() const {
1649 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1650 }
1651
1652 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1653
1654 AMDGPUTargetStreamer &getTargetStreamer() {
1655 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1656 return static_cast<AMDGPUTargetStreamer &>(TS);
1657 }
1658
1659 MCContext &getContext() const {
1660 // We need this const_cast because for some reason getContext() is not const
1661 // in MCAsmParser.
1662 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1663 }
1664
1665 const MCRegisterInfo *getMRI() const {
1666 return getContext().getRegisterInfo();
1667 }
1668
1669 const MCInstrInfo *getMII() const {
1670 return &MII;
1671 }
1672
1673 // FIXME: This should not be used. Instead, should use queries derived from
1674 // getAvailableFeatures().
1675 const FeatureBitset &getFeatureBits() const {
1676 return getSTI().getFeatureBits();
1677 }
1678
1679 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1680 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1681 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1682
1683 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1684 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1685 bool isForcedDPP() const { return ForcedDPP; }
1686 bool isForcedSDWA() const { return ForcedSDWA; }
1687 ArrayRef<unsigned> getMatchedVariants() const;
1688 StringRef getMatchedVariantName() const;
1689
1690 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1691 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1692 bool RestoreOnFailure);
1693 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1694 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1695 SMLoc &EndLoc) override;
1696 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1697 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1698 unsigned Kind) override;
1699 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1700 OperandVector &Operands, MCStreamer &Out,
1701 uint64_t &ErrorInfo,
1702 bool MatchingInlineAsm) override;
1703 bool ParseDirective(AsmToken DirectiveID) override;
1704 void onEndOfFile() override;
1705 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1706 OperandMode Mode = OperandMode_Default);
1707 StringRef parseMnemonicSuffix(StringRef Name);
1708 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1709 SMLoc NameLoc, OperandVector &Operands) override;
1710 //bool ProcessInstruction(MCInst &Inst);
1711
1712 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1713
1714 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1715
1716 ParseStatus
1717 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1718 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1719 std::function<bool(int64_t &)> ConvertResult = nullptr);
1720
1721 ParseStatus parseOperandArrayWithPrefix(
1722 const char *Prefix, OperandVector &Operands,
1723 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1724 bool (*ConvertResult)(int64_t &) = nullptr);
1725
1726 ParseStatus
1727 parseNamedBit(StringRef Name, OperandVector &Operands,
1728 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1729 bool IgnoreNegative = false);
1730 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1731 ParseStatus parseCPol(OperandVector &Operands);
1732 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1733 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1734 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1735 SMLoc &StringLoc);
1736 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1737 StringRef Name,
1738 ArrayRef<const char *> Ids,
1739 int64_t &IntVal);
1740 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1741 StringRef Name,
1742 ArrayRef<const char *> Ids,
1743 AMDGPUOperand::ImmTy Type);
1744
1745 bool isModifier();
1746 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1747 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1748 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1749 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1750 bool parseSP3NegModifier();
1751 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1752 LitModifier Lit = LitModifier::None);
1753 ParseStatus parseReg(OperandVector &Operands);
1754 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1755 LitModifier Lit = LitModifier::None);
1756 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1757 bool AllowImm = true);
1758 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1759 bool AllowImm = true);
1760 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1761 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1762 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1763 ParseStatus tryParseIndexKey(OperandVector &Operands,
1764 AMDGPUOperand::ImmTy ImmTy);
1765 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1766 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1767 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1768 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1769 AMDGPUOperand::ImmTy Type);
1770 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1771 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1772 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1773 AMDGPUOperand::ImmTy Type);
1774 ParseStatus parseMatrixAScale(OperandVector &Operands);
1775 ParseStatus parseMatrixBScale(OperandVector &Operands);
1776 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1777 AMDGPUOperand::ImmTy Type);
1778 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1779 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1780
1781 ParseStatus parseDfmtNfmt(int64_t &Format);
1782 ParseStatus parseUfmt(int64_t &Format);
1783 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1784 int64_t &Format);
1785 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1786 int64_t &Format);
1787 ParseStatus parseFORMAT(OperandVector &Operands);
1788 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1789 ParseStatus parseNumericFormat(int64_t &Format);
1790 ParseStatus parseFlatOffset(OperandVector &Operands);
1791 ParseStatus parseR128A16(OperandVector &Operands);
1792 ParseStatus parseBLGP(OperandVector &Operands);
1793 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1794 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1795
1796 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1797
1798 bool parseCnt(int64_t &IntVal);
1799 ParseStatus parseSWaitCnt(OperandVector &Operands);
1800
1801 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1802 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1803 ParseStatus parseDepCtr(OperandVector &Operands);
1804
1805 bool parseDelay(int64_t &Delay);
1806 ParseStatus parseSDelayALU(OperandVector &Operands);
1807
1808 ParseStatus parseHwreg(OperandVector &Operands);
1809
1810private:
1811 struct OperandInfoTy {
1812 SMLoc Loc;
1813 int64_t Val;
1814 bool IsSymbolic = false;
1815 bool IsDefined = false;
1816
1817 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1818 };
1819
1820 struct StructuredOpField : OperandInfoTy {
1821 StringLiteral Id;
1822 StringLiteral Desc;
1823 unsigned Width;
1824 bool IsDefined = false;
1825
1826 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1827 unsigned Width, int64_t Default)
1828 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1829 virtual ~StructuredOpField() = default;
1830
1831 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1832 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1833 return false;
1834 }
1835
1836 virtual bool validate(AMDGPUAsmParser &Parser) const {
1837 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1838 return Error(Parser, "not supported on this GPU");
1839 if (!isUIntN(Width, Val))
1840 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1841 return true;
1842 }
1843 };
1844
1845 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1846 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1847
1848 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1849 bool validateSendMsg(const OperandInfoTy &Msg,
1850 const OperandInfoTy &Op,
1851 const OperandInfoTy &Stream);
1852
1853 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1854 OperandInfoTy &Width);
1855
1856 const AMDGPUOperand &findMCOperand(const OperandVector &Operands,
1857 int MCOpIdx) const;
1858
1859 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1860
1861 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1862 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1863 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1864
1865 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1866 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1867 const OperandVector &Operands) const;
1868 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1869 const OperandVector &Operands) const;
1870 SMLoc getInstLoc(const OperandVector &Operands) const;
1871
1872 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1873 const OperandVector &Operands);
1874 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1877 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1878 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1879 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1880 bool AsVOPD3);
1881 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1882 bool tryVOPD(const MCInst &Inst);
1883 bool tryVOPD3(const MCInst &Inst);
1884 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1885
1886 bool validateIntClampSupported(const MCInst &Inst);
1887 bool validateMIMGAtomicDMask(const MCInst &Inst);
1888 bool validateMIMGGatherDMask(const MCInst &Inst);
1889 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1890 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1891 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1892 bool validateMIMGD16(const MCInst &Inst);
1893 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1894 bool validateTensorR128(const MCInst &Inst);
1895 bool validateMIMGMSAA(const MCInst &Inst);
1896 bool validateOpSel(const MCInst &Inst);
1897 bool validateTrue16OpSel(const MCInst &Inst);
1898 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1899 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1900 bool validateVccOperand(MCRegister Reg) const;
1901 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1902 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1903 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1904 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1905 bool validateAGPRLdSt(const MCInst &Inst) const;
1906 bool validateVGPRAlign(const MCInst &Inst) const;
1907 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1908 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1909 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1910 bool validateDivScale(const MCInst &Inst);
1911 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1912 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1913 SMLoc IDLoc);
1914 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1915 const unsigned CPol);
1916 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1917 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1918 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1919 unsigned getConstantBusLimit(unsigned Opcode) const;
1920 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1921 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1922 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1923
1924 bool isSupportedMnemo(StringRef Mnemo,
1925 const FeatureBitset &FBS);
1926 bool isSupportedMnemo(StringRef Mnemo,
1927 const FeatureBitset &FBS,
1928 ArrayRef<unsigned> Variants);
1929 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1930
1931 bool isId(const StringRef Id) const;
1932 bool isId(const AsmToken &Token, const StringRef Id) const;
1933 bool isToken(const AsmToken::TokenKind Kind) const;
1934 StringRef getId() const;
1935 bool trySkipId(const StringRef Id);
1936 bool trySkipId(const StringRef Pref, const StringRef Id);
1937 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1938 bool trySkipToken(const AsmToken::TokenKind Kind);
1939 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1940 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1941 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1942
1943 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1944 AsmToken::TokenKind getTokenKind() const;
1945 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1946 bool parseExpr(OperandVector &Operands);
1947 StringRef getTokenStr() const;
1948 AsmToken peekToken(bool ShouldSkipSpace = true);
1949 AsmToken getToken() const;
1950 SMLoc getLoc() const;
1951 void lex();
1952
1953public:
1954 void onBeginOfFile() override;
1955 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1956
1957 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1958
1959 ParseStatus parseExpTgt(OperandVector &Operands);
1960 ParseStatus parseSendMsg(OperandVector &Operands);
1961 ParseStatus parseWaitEvent(OperandVector &Operands);
1962 ParseStatus parseInterpSlot(OperandVector &Operands);
1963 ParseStatus parseInterpAttr(OperandVector &Operands);
1964 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1965 ParseStatus parseBoolReg(OperandVector &Operands);
1966
1967 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1968 const unsigned MaxVal, const Twine &ErrMsg,
1969 SMLoc &Loc);
1970 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1971 const unsigned MinVal,
1972 const unsigned MaxVal,
1973 const StringRef ErrMsg);
1974 ParseStatus parseSwizzle(OperandVector &Operands);
1975 bool parseSwizzleOffset(int64_t &Imm);
1976 bool parseSwizzleMacro(int64_t &Imm);
1977 bool parseSwizzleQuadPerm(int64_t &Imm);
1978 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1979 bool parseSwizzleBroadcast(int64_t &Imm);
1980 bool parseSwizzleSwap(int64_t &Imm);
1981 bool parseSwizzleReverse(int64_t &Imm);
1982 bool parseSwizzleFFT(int64_t &Imm);
1983 bool parseSwizzleRotate(int64_t &Imm);
1984
1985 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1986 int64_t parseGPRIdxMacro();
1987
1988 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1989 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1990
1991 ParseStatus parseOModSI(OperandVector &Operands);
1992
1993 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1994 OptionalImmIndexMap &OptionalIdx);
1995 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1996 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1997 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1998 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1999 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
2000
2001 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
2002 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
2003 OptionalImmIndexMap &OptionalIdx);
2004 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
2005 OptionalImmIndexMap &OptionalIdx);
2006
2007 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
2008 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
2009 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
2010
2011 bool parseDimId(unsigned &Encoding);
2012 ParseStatus parseDim(OperandVector &Operands);
2013 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2014 ParseStatus parseDPP8(OperandVector &Operands);
2015 ParseStatus parseDPPCtrl(OperandVector &Operands);
2016 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
2017 int64_t parseDPPCtrlSel(StringRef Ctrl);
2018 int64_t parseDPPCtrlPerm();
2019 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
2020 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2021 cvtDPP(Inst, Operands, true);
2022 }
2023 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2024 bool IsDPP8 = false);
2025 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2026 cvtVOP3DPP(Inst, Operands, true);
2027 }
2028
2029 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2030 AMDGPUOperand::ImmTy Type);
2031 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2032 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2033 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2034 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2035 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2036 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2037
2038 enum class SDWAInstType : unsigned { VOP1 = 0, VOP2 = 1, VOPC = 2 };
2039
2040 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2041 SDWAInstType BasicInstType, bool SkipDstVcc = false,
2042 bool SkipSrcVcc = false);
2043
2044 ParseStatus parseEndpgm(OperandVector &Operands);
2045
2046 ParseStatus parseVOPD(OperandVector &Operands);
2047};
2048
2049} // end anonymous namespace
2050
2051// May be called with integer type with equivalent bitwidth.
2052static const fltSemantics *getFltSemantics(unsigned Size) {
2053 switch (Size) {
2054 case 4:
2055 return &APFloat::IEEEsingle();
2056 case 8:
2057 return &APFloat::IEEEdouble();
2058 case 2:
2059 return &APFloat::IEEEhalf();
2060 default:
2061 llvm_unreachable("unsupported fp type");
2062 }
2063}
2064
2066 return getFltSemantics(VT.getScalarSizeInBits() / 8);
2067}
2068
2070 switch (OperandType) {
2071 // When floating-point immediate is used as operand of type i16, the 32-bit
2072 // representation of the constant truncated to the 16 LSBs should be used.
2087 return &APFloat::IEEEsingle();
2096 return &APFloat::IEEEdouble();
2104 return &APFloat::IEEEhalf();
2109 return &APFloat::BFloat();
2110 default:
2111 llvm_unreachable("unsupported fp type");
2112 }
2113}
2114
2115//===----------------------------------------------------------------------===//
2116// Operand
2117//===----------------------------------------------------------------------===//
2118
2119static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2120 bool Lost;
2121
2122 // Convert literal to single precision
2125 &Lost);
2126 // We allow precision lost but not overflow or underflow
2127 if (Status != APFloat::opOK &&
2128 Lost &&
2129 ((Status & APFloat::opOverflow) != 0 ||
2130 (Status & APFloat::opUnderflow) != 0)) {
2131 return false;
2132 }
2133
2134 return true;
2135}
2136
2137static bool isSafeTruncation(int64_t Val, unsigned Size) {
2138 return isUIntN(Size, Val) || isIntN(Size, Val);
2139}
2140
2141static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2142 if (VT.getScalarType() == MVT::i16)
2143 return isInlinableLiteral32(Val, HasInv2Pi);
2144
2145 if (VT.getScalarType() == MVT::f16)
2146 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2147
2148 assert(VT.getScalarType() == MVT::bf16);
2149
2150 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2151}
2152
2153bool AMDGPUOperand::isInlinableImm(MVT type) const {
2154
2155 // This is a hack to enable named inline values like
2156 // shared_base with both 32-bit and 64-bit operands.
2157 // Note that these values are defined as
2158 // 32-bit operands only.
2159 if (isInlineValue()) {
2160 return true;
2161 }
2162
2163 if (!isImmTy(ImmTyNone)) {
2164 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2165 return false;
2166 }
2167
2168 if (getModifiers().Lit != LitModifier::None)
2169 return false;
2170
2171 // TODO: We should avoid using host float here. It would be better to
2172 // check the float bit values which is what a few other places do.
2173 // We've had bot failures before due to weird NaN support on mips hosts.
2174
2175 APInt Literal(64, Imm.Val);
2176
2177 if (Imm.IsFPImm) { // We got fp literal token
2178 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2180 AsmParser->hasInv2PiInlineImm());
2181 }
2182
2183 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2184 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2185 return false;
2186
2187 if (type.getScalarSizeInBits() == 16) {
2188 bool Lost = false;
2189 switch (type.getScalarType().SimpleTy) {
2190 default:
2191 llvm_unreachable("unknown 16-bit type");
2192 case MVT::bf16:
2193 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2194 &Lost);
2195 break;
2196 case MVT::f16:
2197 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2198 &Lost);
2199 break;
2200 case MVT::i16:
2201 FPLiteral.convert(APFloatBase::IEEEsingle(),
2202 APFloat::rmNearestTiesToEven, &Lost);
2203 break;
2204 }
2205 // We need to use 32-bit representation here because when a floating-point
2206 // inline constant is used as an i16 operand, its 32-bit representation
2207 // representation will be used. We will need the 32-bit value to check if
2208 // it is FP inline constant.
2209 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2210 return isInlineableLiteralOp16(ImmVal, type,
2211 AsmParser->hasInv2PiInlineImm());
2212 }
2213
2214 // Check if single precision literal is inlinable
2216 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2217 AsmParser->hasInv2PiInlineImm());
2218 }
2219
2220 // We got int literal token.
2221 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2223 AsmParser->hasInv2PiInlineImm());
2224 }
2225
2226 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2227 return false;
2228 }
2229
2230 if (type.getScalarSizeInBits() == 16) {
2232 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2233 type, AsmParser->hasInv2PiInlineImm());
2234 }
2235
2237 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2238 AsmParser->hasInv2PiInlineImm());
2239}
2240
2241bool AMDGPUOperand::isLiteralImm(MVT type) const {
2242 // Check that this immediate can be added as literal
2243 if (!isImmTy(ImmTyNone)) {
2244 return false;
2245 }
2246
2247 bool Allow64Bit =
2248 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2249
2250 if (!Imm.IsFPImm) {
2251 // We got int literal token.
2252
2253 if (type == MVT::f64 && hasFPModifiers()) {
2254 // Cannot apply fp modifiers to int literals preserving the same semantics
2255 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2256 // disable these cases.
2257 return false;
2258 }
2259
2260 unsigned Size = type.getSizeInBits();
2261 if (Size == 64) {
2262 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2263 return true;
2264 Size = 32;
2265 }
2266
2267 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2268 // types.
2269 return isSafeTruncation(Imm.Val, Size);
2270 }
2271
2272 // We got fp literal token
2273 if (type == MVT::f64) { // Expected 64-bit fp operand
2274 // We would set low 64-bits of literal to zeroes but we accept this literals
2275 return true;
2276 }
2277
2278 if (type == MVT::i64) { // Expected 64-bit int operand
2279 // We don't allow fp literals in 64-bit integer instructions. It is
2280 // unclear how we should encode them.
2281 return false;
2282 }
2283
2284 // We allow fp literals with f16x2 operands assuming that the specified
2285 // literal goes into the lower half and the upper half is zero. We also
2286 // require that the literal may be losslessly converted to f16.
2287 //
2288 // For i16x2 operands, we assume that the specified literal is encoded as a
2289 // single-precision float. This is pretty odd, but it matches SP3 and what
2290 // happens in hardware.
2291 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2292 : (type == MVT::v2i16) ? MVT::f32
2293 : (type == MVT::v2f32) ? MVT::f32
2294 : type;
2295
2296 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2297 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2298}
2299
2300bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2301 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2302}
2303
2304bool AMDGPUOperand::isVRegWithInputMods() const {
2305 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2306 // GFX90A allows DPP on 64-bit operands.
2307 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2308 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2309}
2310
2311template <bool IsFake16>
2312bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2313 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2314 : AMDGPU::VGPR_16_Lo128RegClassID);
2315}
2316
2317template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2318 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2319 : AMDGPU::VGPR_16RegClassID);
2320}
2321
2322bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2323 if (AsmParser->isVI())
2324 return isVReg32();
2325 if (AsmParser->isGFX9Plus())
2326 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2327 return false;
2328}
2329
2330bool AMDGPUOperand::isSDWAFP16Operand() const {
2331 return isSDWAOperand(MVT::f16);
2332}
2333
2334bool AMDGPUOperand::isSDWAFP32Operand() const {
2335 return isSDWAOperand(MVT::f32);
2336}
2337
2338bool AMDGPUOperand::isSDWAInt16Operand() const {
2339 return isSDWAOperand(MVT::i16);
2340}
2341
2342bool AMDGPUOperand::isSDWAInt32Operand() const {
2343 return isSDWAOperand(MVT::i32);
2344}
2345
2346bool AMDGPUOperand::isBoolReg() const {
2347 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2348 (AsmParser->isWave32() && isSCSrc_b32()));
2349}
2350
2351uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2352{
2353 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2354 assert(Size == 2 || Size == 4 || Size == 8);
2355
2356 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2357
2358 if (Imm.Mods.Abs) {
2359 Val &= ~FpSignMask;
2360 }
2361 if (Imm.Mods.Neg) {
2362 Val ^= FpSignMask;
2363 }
2364
2365 return Val;
2366}
2367
2368void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2369 MCOpIdx = Inst.getNumOperands();
2370
2371 if (isExpr()) {
2373 return;
2374 }
2375
2376 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2377 Inst.getNumOperands())) {
2378 addLiteralImmOperand(Inst, Imm.Val,
2379 ApplyModifiers &
2380 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2381 } else {
2382 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2384 }
2385}
2386
2387void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2388 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2389 auto OpNum = Inst.getNumOperands();
2390 // Check that this operand accepts literals
2391 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2392
2393 if (ApplyModifiers) {
2394 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2395 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2396 Val = applyInputFPModifiers(Val, Size);
2397 }
2398
2399 APInt Literal(64, Val);
2400 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2401
2402 bool CanUse64BitLiterals =
2403 AsmParser->has64BitLiterals() &&
2404 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2405 LitModifier Lit = getModifiers().Lit;
2406 MCContext &Ctx = AsmParser->getContext();
2407
2408 if (Imm.IsFPImm) { // We got fp literal token
2409 switch (OpTy) {
2417 if (Lit == LitModifier::None &&
2419 AsmParser->hasInv2PiInlineImm())) {
2420 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2421 return;
2422 }
2423
2424 // Non-inlineable
2425 if (AMDGPU::isSISrcFPOperand(InstDesc,
2426 OpNum)) { // Expected 64-bit fp operand
2427 bool HasMandatoryLiteral =
2428 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2429 // For fp operands we check if low 32 bits are zeros
2430 if (Literal.getLoBits(32) != 0 &&
2431 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2432 !HasMandatoryLiteral) {
2433 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2434 Inst.getLoc(),
2435 "Can't encode literal as exact 64-bit floating-point operand. "
2436 "Low 32-bits will be set to zero");
2437 Val &= 0xffffffff00000000u;
2438 }
2439
2440 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2443 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2444 (isInt<32>(Val) || isUInt<32>(Val))) {
2445 // The floating-point operand will be verbalized as an
2446 // integer one. If that integer happens to fit 32 bits, on
2447 // re-assembling it will be intepreted as the high half of
2448 // the actual value, so we have to wrap it into lit64().
2449 Lit = LitModifier::Lit64;
2450 } else if (Lit == LitModifier::Lit) {
2451 // For FP64 operands lit() specifies the high half of the value.
2452 Val = Hi_32(Val);
2453 }
2454 }
2455 break;
2456 }
2457
2458 // We don't allow fp literals in 64-bit integer instructions. It is
2459 // unclear how we should encode them. This case should be checked earlier
2460 // in predicate methods (isLiteralImm())
2461 llvm_unreachable("fp literal in 64-bit integer instruction.");
2462
2464 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2465 (isInt<32>(Val) || isUInt<32>(Val)))
2466 Lit = LitModifier::Lit64;
2467 break;
2468
2473 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2474 Literal == 0x3fc45f306725feed) {
2475 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2476 // loss of precision. The constant represents ideomatic fp32 value of
2477 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2478 // bits. Prevent rounding below.
2479 Inst.addOperand(MCOperand::createImm(0x3e22));
2480 return;
2481 }
2482 [[fallthrough]];
2483
2505 bool lost;
2506 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2507 // Convert literal to single precision
2508 FPLiteral.convert(*getOpFltSemantics(OpTy),
2509 APFloat::rmNearestTiesToEven, &lost);
2510 // We allow precision lost but not overflow or underflow. This should be
2511 // checked earlier in isLiteralImm()
2512
2513 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2514 break;
2515 }
2516 default:
2517 llvm_unreachable("invalid operand size");
2518 }
2519
2520 if (Lit != LitModifier::None) {
2521 Inst.addOperand(
2523 } else {
2525 }
2526 return;
2527 }
2528
2529 // We got int literal token.
2530 // Only sign extend inline immediates.
2531 switch (OpTy) {
2546 break;
2547
2551 if (Lit == LitModifier::None &&
2552 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2554 return;
2555 }
2556
2557 // When the 32 MSBs are not zero (effectively means it can't be safely
2558 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2559 // the lit modifier is explicitly used, we need to truncate it to the 32
2560 // LSBs.
2561 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2562 Val = Lo_32(Val);
2563 break;
2564
2569 if (Lit == LitModifier::None &&
2570 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2572 return;
2573 }
2574
2575 // If the target doesn't support 64-bit literals, we need to use the
2576 // constant as the high 32 MSBs of a double-precision floating point value.
2577 if (!AsmParser->has64BitLiterals()) {
2578 Val = static_cast<uint64_t>(Val) << 32;
2579 } else {
2580 // Now the target does support 64-bit literals, there are two cases
2581 // where we still want to use src_literal encoding:
2582 // 1) explicitly forced by using lit modifier;
2583 // 2) the value is a valid 32-bit representation (signed or unsigned),
2584 // meanwhile not forced by lit64 modifier.
2585 if (Lit == LitModifier::Lit ||
2586 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2587 Val = static_cast<uint64_t>(Val) << 32;
2588 }
2589
2590 // For FP64 operands lit() specifies the high half of the value.
2591 if (Lit == LitModifier::Lit)
2592 Val = Hi_32(Val);
2593 break;
2594
2606 break;
2607
2609 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2610 Val <<= 32;
2611 break;
2612
2613 default:
2614 llvm_unreachable("invalid operand type");
2615 }
2616
2617 if (Lit != LitModifier::None) {
2618 Inst.addOperand(
2620 } else {
2622 }
2623}
2624
2625void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2626 MCOpIdx = Inst.getNumOperands();
2627 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2628}
2629
2630bool AMDGPUOperand::isInlineValue() const {
2631 return isRegKind() && ::isInlineValue(getReg());
2632}
2633
2634//===----------------------------------------------------------------------===//
2635// AsmParser
2636//===----------------------------------------------------------------------===//
2637
2638void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2639 // TODO: make those pre-defined variables read-only.
2640 // Currently there is none suitable machinery in the core llvm-mc for this.
2641 // MCSymbol::isRedefinable is intended for another purpose, and
2642 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2643 MCContext &Ctx = getContext();
2644 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2646}
2647
2648static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2649 if (Is == IS_VGPR) {
2650 switch (RegWidth) {
2651 default: return -1;
2652 case 32:
2653 return AMDGPU::VGPR_32RegClassID;
2654 case 64:
2655 return AMDGPU::VReg_64RegClassID;
2656 case 96:
2657 return AMDGPU::VReg_96RegClassID;
2658 case 128:
2659 return AMDGPU::VReg_128RegClassID;
2660 case 160:
2661 return AMDGPU::VReg_160RegClassID;
2662 case 192:
2663 return AMDGPU::VReg_192RegClassID;
2664 case 224:
2665 return AMDGPU::VReg_224RegClassID;
2666 case 256:
2667 return AMDGPU::VReg_256RegClassID;
2668 case 288:
2669 return AMDGPU::VReg_288RegClassID;
2670 case 320:
2671 return AMDGPU::VReg_320RegClassID;
2672 case 352:
2673 return AMDGPU::VReg_352RegClassID;
2674 case 384:
2675 return AMDGPU::VReg_384RegClassID;
2676 case 512:
2677 return AMDGPU::VReg_512RegClassID;
2678 case 1024:
2679 return AMDGPU::VReg_1024RegClassID;
2680 }
2681 } else if (Is == IS_TTMP) {
2682 switch (RegWidth) {
2683 default: return -1;
2684 case 32:
2685 return AMDGPU::TTMP_32RegClassID;
2686 case 64:
2687 return AMDGPU::TTMP_64RegClassID;
2688 case 128:
2689 return AMDGPU::TTMP_128RegClassID;
2690 case 256:
2691 return AMDGPU::TTMP_256RegClassID;
2692 case 512:
2693 return AMDGPU::TTMP_512RegClassID;
2694 }
2695 } else if (Is == IS_SGPR) {
2696 switch (RegWidth) {
2697 default: return -1;
2698 case 32:
2699 return AMDGPU::SGPR_32RegClassID;
2700 case 64:
2701 return AMDGPU::SGPR_64RegClassID;
2702 case 96:
2703 return AMDGPU::SGPR_96RegClassID;
2704 case 128:
2705 return AMDGPU::SGPR_128RegClassID;
2706 case 160:
2707 return AMDGPU::SGPR_160RegClassID;
2708 case 192:
2709 return AMDGPU::SGPR_192RegClassID;
2710 case 224:
2711 return AMDGPU::SGPR_224RegClassID;
2712 case 256:
2713 return AMDGPU::SGPR_256RegClassID;
2714 case 288:
2715 return AMDGPU::SGPR_288RegClassID;
2716 case 320:
2717 return AMDGPU::SGPR_320RegClassID;
2718 case 352:
2719 return AMDGPU::SGPR_352RegClassID;
2720 case 384:
2721 return AMDGPU::SGPR_384RegClassID;
2722 case 512:
2723 return AMDGPU::SGPR_512RegClassID;
2724 }
2725 } else if (Is == IS_AGPR) {
2726 switch (RegWidth) {
2727 default: return -1;
2728 case 32:
2729 return AMDGPU::AGPR_32RegClassID;
2730 case 64:
2731 return AMDGPU::AReg_64RegClassID;
2732 case 96:
2733 return AMDGPU::AReg_96RegClassID;
2734 case 128:
2735 return AMDGPU::AReg_128RegClassID;
2736 case 160:
2737 return AMDGPU::AReg_160RegClassID;
2738 case 192:
2739 return AMDGPU::AReg_192RegClassID;
2740 case 224:
2741 return AMDGPU::AReg_224RegClassID;
2742 case 256:
2743 return AMDGPU::AReg_256RegClassID;
2744 case 288:
2745 return AMDGPU::AReg_288RegClassID;
2746 case 320:
2747 return AMDGPU::AReg_320RegClassID;
2748 case 352:
2749 return AMDGPU::AReg_352RegClassID;
2750 case 384:
2751 return AMDGPU::AReg_384RegClassID;
2752 case 512:
2753 return AMDGPU::AReg_512RegClassID;
2754 case 1024:
2755 return AMDGPU::AReg_1024RegClassID;
2756 }
2757 }
2758 return -1;
2759}
2760
2763 .Case("exec", AMDGPU::EXEC)
2764 .Case("vcc", AMDGPU::VCC)
2765 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2766 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2767 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2768 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2769 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2770 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2771 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2772 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2773 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2774 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2775 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2776 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2777 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2778 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2779 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2780 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2781 .Case("m0", AMDGPU::M0)
2782 .Case("vccz", AMDGPU::SRC_VCCZ)
2783 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2784 .Case("execz", AMDGPU::SRC_EXECZ)
2785 .Case("src_execz", AMDGPU::SRC_EXECZ)
2786 .Case("scc", AMDGPU::SRC_SCC)
2787 .Case("src_scc", AMDGPU::SRC_SCC)
2788 .Case("tba", AMDGPU::TBA)
2789 .Case("tma", AMDGPU::TMA)
2790 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2791 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2792 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2793 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2794 .Case("vcc_lo", AMDGPU::VCC_LO)
2795 .Case("vcc_hi", AMDGPU::VCC_HI)
2796 .Case("exec_lo", AMDGPU::EXEC_LO)
2797 .Case("exec_hi", AMDGPU::EXEC_HI)
2798 .Case("tma_lo", AMDGPU::TMA_LO)
2799 .Case("tma_hi", AMDGPU::TMA_HI)
2800 .Case("tba_lo", AMDGPU::TBA_LO)
2801 .Case("tba_hi", AMDGPU::TBA_HI)
2802 .Case("pc", AMDGPU::PC_REG)
2803 .Case("null", AMDGPU::SGPR_NULL)
2804 .Default(AMDGPU::NoRegister);
2805}
2806
2807bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2808 SMLoc &EndLoc, bool RestoreOnFailure) {
2809 auto R = parseRegister();
2810 if (!R) return true;
2811 assert(R->isReg());
2812 RegNo = R->getReg();
2813 StartLoc = R->getStartLoc();
2814 EndLoc = R->getEndLoc();
2815 return false;
2816}
2817
2818bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2819 SMLoc &EndLoc) {
2820 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2821}
2822
2823ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2824 SMLoc &EndLoc) {
2825 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2826 bool PendingErrors = getParser().hasPendingError();
2827 getParser().clearPendingErrors();
2828 if (PendingErrors)
2829 return ParseStatus::Failure;
2830 if (Result)
2831 return ParseStatus::NoMatch;
2832 return ParseStatus::Success;
2833}
2834
2835bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2836 RegisterKind RegKind,
2837 MCRegister Reg1,
2838 RegisterKind RegKind1, SMLoc Loc) {
2839 // Allow VCC_LO/HI at the end of SGPR lists.
2840 if (RegKind == IS_SGPR) {
2841 unsigned RegIdx = (Reg - AMDGPU::SGPR0) + RegWidth / 32;
2842 if ((RegIdx == 106 && Reg1 == AMDGPU::VCC_LO) ||
2843 (RegIdx == 107 && Reg1 == AMDGPU::VCC_HI)) {
2844 RegWidth += 32;
2845 return true;
2846 }
2847 }
2848
2849 if (RegKind != RegKind1) {
2850 Error(Loc, "registers in a list must be of the same kind");
2851 return MCRegister();
2852 }
2853
2854 switch (RegKind) {
2855 case IS_SPECIAL:
2856 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2857 Reg = AMDGPU::EXEC;
2858 RegWidth = 64;
2859 return true;
2860 }
2861 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2862 Reg = AMDGPU::FLAT_SCR;
2863 RegWidth = 64;
2864 return true;
2865 }
2866 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2867 Reg = AMDGPU::XNACK_MASK;
2868 RegWidth = 64;
2869 return true;
2870 }
2871 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2872 Reg = AMDGPU::VCC;
2873 RegWidth = 64;
2874 return true;
2875 }
2876 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2877 Reg = AMDGPU::TBA;
2878 RegWidth = 64;
2879 return true;
2880 }
2881 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2882 Reg = AMDGPU::TMA;
2883 RegWidth = 64;
2884 return true;
2885 }
2886 Error(Loc, "register does not fit in the list");
2887 return false;
2888 case IS_VGPR:
2889 case IS_SGPR:
2890 case IS_AGPR:
2891 case IS_TTMP:
2892 if (Reg1 != Reg + RegWidth / 32) {
2893 Error(Loc, "registers in a list must have consecutive indices");
2894 return false;
2895 }
2896 RegWidth += 32;
2897 return true;
2898 default:
2899 llvm_unreachable("unexpected register kind");
2900 }
2901}
2902
2903struct RegInfo {
2905 RegisterKind Kind;
2906};
2907
2908static constexpr RegInfo RegularRegisters[] = {
2909 {{"v"}, IS_VGPR},
2910 {{"s"}, IS_SGPR},
2911 {{"ttmp"}, IS_TTMP},
2912 {{"acc"}, IS_AGPR},
2913 {{"a"}, IS_AGPR},
2914};
2915
2916static bool isRegularReg(RegisterKind Kind) {
2917 return Kind == IS_VGPR ||
2918 Kind == IS_SGPR ||
2919 Kind == IS_TTMP ||
2920 Kind == IS_AGPR;
2921}
2922
2924 for (const RegInfo &Reg : RegularRegisters)
2925 if (Str.starts_with(Reg.Name))
2926 return &Reg;
2927 return nullptr;
2928}
2929
2930static bool getRegNum(StringRef Str, unsigned& Num) {
2931 return !Str.getAsInteger(10, Num);
2932}
2933
2934bool
2935AMDGPUAsmParser::isRegister(const AsmToken &Token,
2936 const AsmToken &NextToken) const {
2937
2938 // A list of consecutive registers: [s0,s1,s2,s3]
2939 if (Token.is(AsmToken::LBrac))
2940 return true;
2941
2942 if (!Token.is(AsmToken::Identifier))
2943 return false;
2944
2945 // A single register like s0 or a range of registers like s[0:1]
2946
2947 StringRef Str = Token.getString();
2948 const RegInfo *Reg = getRegularRegInfo(Str);
2949 if (Reg) {
2950 StringRef RegName = Reg->Name;
2951 StringRef RegSuffix = Str.substr(RegName.size());
2952 if (!RegSuffix.empty()) {
2953 RegSuffix.consume_back(".l");
2954 RegSuffix.consume_back(".h");
2955 unsigned Num;
2956 // A single register with an index: rXX
2957 if (getRegNum(RegSuffix, Num))
2958 return true;
2959 } else {
2960 // A range of registers: r[XX:YY].
2961 if (NextToken.is(AsmToken::LBrac))
2962 return true;
2963 }
2964 }
2965
2966 return getSpecialRegForName(Str).isValid();
2967}
2968
2969bool
2970AMDGPUAsmParser::isRegister()
2971{
2972 return isRegister(getToken(), peekToken());
2973}
2974
2975MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2976 unsigned SubReg, unsigned RegWidth,
2977 SMLoc Loc) {
2978 assert(isRegularReg(RegKind));
2979
2980 unsigned AlignSize = 1;
2981 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2982 // SGPR and TTMP registers must be aligned.
2983 // Max required alignment is 4 dwords.
2984 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2985 }
2986
2987 if (RegNum % AlignSize != 0) {
2988 Error(Loc, "invalid register alignment");
2989 return MCRegister();
2990 }
2991
2992 unsigned RegIdx = RegNum / AlignSize;
2993 int RCID = getRegClass(RegKind, RegWidth);
2994 if (RCID == -1) {
2995 Error(Loc, "invalid or unsupported register size");
2996 return MCRegister();
2997 }
2998
2999 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3000 const MCRegisterClass RC = TRI->getRegClass(RCID);
3001 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
3002 Error(Loc, "register index is out of range");
3003 return AMDGPU::NoRegister;
3004 }
3005
3006 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
3007 Error(Loc, "register index is out of range");
3008 return MCRegister();
3009 }
3010
3011 MCRegister Reg = RC.getRegister(RegIdx);
3012
3013 if (SubReg) {
3014 Reg = TRI->getSubReg(Reg, SubReg);
3015
3016 // Currently all regular registers have their .l and .h subregisters, so
3017 // we should never need to generate an error here.
3018 assert(Reg && "Invalid subregister!");
3019 }
3020
3021 return Reg;
3022}
3023
3024bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
3025 unsigned &SubReg) {
3026 int64_t RegLo, RegHi;
3027 if (!skipToken(AsmToken::LBrac, "missing register index"))
3028 return false;
3029
3030 SMLoc FirstIdxLoc = getLoc();
3031 SMLoc SecondIdxLoc;
3032
3033 if (!parseExpr(RegLo))
3034 return false;
3035
3036 if (trySkipToken(AsmToken::Colon)) {
3037 SecondIdxLoc = getLoc();
3038 if (!parseExpr(RegHi))
3039 return false;
3040 } else {
3041 RegHi = RegLo;
3042 }
3043
3044 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3045 return false;
3046
3047 if (!isUInt<32>(RegLo)) {
3048 Error(FirstIdxLoc, "invalid register index");
3049 return false;
3050 }
3051
3052 if (!isUInt<32>(RegHi)) {
3053 Error(SecondIdxLoc, "invalid register index");
3054 return false;
3055 }
3056
3057 if (RegLo > RegHi) {
3058 Error(FirstIdxLoc, "first register index should not exceed second index");
3059 return false;
3060 }
3061
3062 if (RegHi == RegLo) {
3063 StringRef RegSuffix = getTokenStr();
3064 if (RegSuffix == ".l") {
3065 SubReg = AMDGPU::lo16;
3066 lex();
3067 } else if (RegSuffix == ".h") {
3068 SubReg = AMDGPU::hi16;
3069 lex();
3070 }
3071 }
3072
3073 Num = static_cast<unsigned>(RegLo);
3074 RegWidth = 32 * ((RegHi - RegLo) + 1);
3075
3076 return true;
3077}
3078
3079MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3080 unsigned &RegNum,
3081 unsigned &RegWidth,
3082 SmallVectorImpl<AsmToken> &Tokens) {
3083 assert(isToken(AsmToken::Identifier));
3084 MCRegister Reg = getSpecialRegForName(getTokenStr());
3085 if (Reg) {
3086 RegNum = 0;
3087 RegWidth = 32;
3088 RegKind = IS_SPECIAL;
3089 Tokens.push_back(getToken());
3090 lex(); // skip register name
3091 }
3092 return Reg;
3093}
3094
3095MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3096 unsigned &RegNum,
3097 unsigned &RegWidth,
3098 SmallVectorImpl<AsmToken> &Tokens) {
3099 assert(isToken(AsmToken::Identifier));
3100 StringRef RegName = getTokenStr();
3101 auto Loc = getLoc();
3102
3103 const RegInfo *RI = getRegularRegInfo(RegName);
3104 if (!RI) {
3105 Error(Loc, "invalid register name");
3106 return MCRegister();
3107 }
3108
3109 Tokens.push_back(getToken());
3110 lex(); // skip register name
3111
3112 RegKind = RI->Kind;
3113 StringRef RegSuffix = RegName.substr(RI->Name.size());
3114 unsigned SubReg = NoSubRegister;
3115 bool IsRange = false;
3116 if (!RegSuffix.empty()) {
3117 if (RegSuffix.consume_back(".l"))
3118 SubReg = AMDGPU::lo16;
3119 else if (RegSuffix.consume_back(".h"))
3120 SubReg = AMDGPU::hi16;
3121
3122 // Single 32-bit register: vXX.
3123 if (!getRegNum(RegSuffix, RegNum)) {
3124 Error(Loc, "invalid register index");
3125 return MCRegister();
3126 }
3127 RegWidth = 32;
3128 } else {
3129 // Range of registers: v[XX:YY]. ":YY" is optional.
3130 IsRange = true;
3131 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3132 return MCRegister();
3133 }
3134
3135 // Do not allow vcc_lo/hi be referred as s106/107.
3136 MCRegister Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3137 const MCRegisterInfo &TRI = *getContext().getRegisterInfo();
3138 if (RegKind == IS_SGPR && IsRange
3139 ? (TRI.isSubRegister(Reg, VCC_LO) || TRI.isSubRegister(Reg, VCC_HI))
3140 : (Reg == VCC_LO || Reg == VCC_HI)) {
3141 Error(Loc, "register index is out of range");
3142 return MCRegister();
3143 }
3144
3145 return Reg;
3146}
3147
3148MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3149 unsigned &RegNum, unsigned &RegWidth,
3150 SmallVectorImpl<AsmToken> &Tokens) {
3151 MCRegister Reg;
3152 auto ListLoc = getLoc();
3153
3154 if (!skipToken(AsmToken::LBrac,
3155 "expected a register or a list of registers")) {
3156 return MCRegister();
3157 }
3158
3159 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3160
3161 auto Loc = getLoc();
3162 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3163 return MCRegister();
3164 if (RegWidth != 32) {
3165 Error(Loc, "expected a single 32-bit register");
3166 return MCRegister();
3167 }
3168
3169 for (; trySkipToken(AsmToken::Comma); ) {
3170 RegisterKind NextRegKind;
3171 MCRegister NextReg;
3172 unsigned NextRegNum, NextRegWidth;
3173 Loc = getLoc();
3174
3175 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3176 NextRegNum, NextRegWidth,
3177 Tokens)) {
3178 return MCRegister();
3179 }
3180 if (NextRegWidth != 32) {
3181 Error(Loc, "expected a single 32-bit register");
3182 return MCRegister();
3183 }
3184 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, NextRegKind,
3185 Loc))
3186 return MCRegister();
3187 }
3188
3189 if (!skipToken(AsmToken::RBrac,
3190 "expected a comma or a closing square bracket")) {
3191 return MCRegister();
3192 }
3193
3194 if (isRegularReg(RegKind))
3195 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3196
3197 return Reg;
3198}
3199
3200bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3201 MCRegister &Reg, unsigned &RegNum,
3202 unsigned &RegWidth,
3203 SmallVectorImpl<AsmToken> &Tokens) {
3204 auto Loc = getLoc();
3205 Reg = MCRegister();
3206
3207 if (isToken(AsmToken::Identifier)) {
3208 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3209 if (!Reg)
3210 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3211 } else {
3212 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3213 }
3214
3215 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3216 if (!Reg) {
3217 assert(Parser.hasPendingError());
3218 return false;
3219 }
3220
3221 if (!subtargetHasRegister(*TRI, Reg)) {
3222 if (Reg == AMDGPU::SGPR_NULL) {
3223 Error(Loc, "'null' operand is not supported on this GPU");
3224 } else {
3226 " register not available on this GPU");
3227 }
3228 return false;
3229 }
3230
3231 return true;
3232}
3233
3234bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3235 MCRegister &Reg, unsigned &RegNum,
3236 unsigned &RegWidth,
3237 bool RestoreOnFailure /*=false*/) {
3238 Reg = MCRegister();
3239
3241 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3242 if (RestoreOnFailure) {
3243 while (!Tokens.empty()) {
3244 getLexer().UnLex(Tokens.pop_back_val());
3245 }
3246 }
3247 return true;
3248 }
3249 return false;
3250}
3251
3252std::optional<StringRef>
3253AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3254 switch (RegKind) {
3255 case IS_VGPR:
3256 return StringRef(".amdgcn.next_free_vgpr");
3257 case IS_SGPR:
3258 return StringRef(".amdgcn.next_free_sgpr");
3259 default:
3260 return std::nullopt;
3261 }
3262}
3263
3264void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3265 auto SymbolName = getGprCountSymbolName(RegKind);
3266 assert(SymbolName && "initializing invalid register kind");
3267 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3269 Sym->setRedefinable(true);
3270}
3271
3272bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3273 unsigned DwordRegIndex,
3274 unsigned RegWidth) {
3275 // Symbols are only defined for GCN targets
3276 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3277 return true;
3278
3279 auto SymbolName = getGprCountSymbolName(RegKind);
3280 if (!SymbolName)
3281 return true;
3282 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3283
3284 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3285 int64_t OldCount;
3286
3287 if (!Sym->isVariable())
3288 return !Error(getLoc(),
3289 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3290 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3291 return !Error(
3292 getLoc(),
3293 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3294
3295 if (OldCount <= NewMax)
3297
3298 return true;
3299}
3300
3301std::unique_ptr<AMDGPUOperand>
3302AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3303 const auto &Tok = getToken();
3304 SMLoc StartLoc = Tok.getLoc();
3305 SMLoc EndLoc = Tok.getEndLoc();
3306 RegisterKind RegKind;
3307 MCRegister Reg;
3308 unsigned RegNum, RegWidth;
3309
3310 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3311 return nullptr;
3312 }
3313 if (isHsaAbi(getSTI())) {
3314 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3315 return nullptr;
3316 } else
3317 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3318 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3319}
3320
3321ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3322 bool HasSP3AbsModifier, LitModifier Lit) {
3323 // TODO: add syntactic sugar for 1/(2*PI)
3324
3325 if (isRegister() || isModifier())
3326 return ParseStatus::NoMatch;
3327
3328 if (Lit == LitModifier::None) {
3329 if (trySkipId("lit"))
3330 Lit = LitModifier::Lit;
3331 else if (trySkipId("lit64"))
3332 Lit = LitModifier::Lit64;
3333
3334 if (Lit != LitModifier::None) {
3335 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3336 return ParseStatus::Failure;
3337 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3338 if (S.isSuccess() &&
3339 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3340 return ParseStatus::Failure;
3341 return S;
3342 }
3343 }
3344
3345 const auto& Tok = getToken();
3346 const auto& NextTok = peekToken();
3347 bool IsReal = Tok.is(AsmToken::Real);
3348 SMLoc S = getLoc();
3349 bool Negate = false;
3350
3351 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3352 lex();
3353 IsReal = true;
3354 Negate = true;
3355 }
3356
3357 AMDGPUOperand::Modifiers Mods;
3358 Mods.Lit = Lit;
3359
3360 if (IsReal) {
3361 // Floating-point expressions are not supported.
3362 // Can only allow floating-point literals with an
3363 // optional sign.
3364
3365 StringRef Num = getTokenStr();
3366 lex();
3367
3368 APFloat RealVal(APFloat::IEEEdouble());
3369 auto roundMode = APFloat::rmNearestTiesToEven;
3370 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3371 return ParseStatus::Failure;
3372 if (Negate)
3373 RealVal.changeSign();
3374
3375 Operands.push_back(
3376 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3377 AMDGPUOperand::ImmTyNone, true));
3378 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3379 Op.setModifiers(Mods);
3380
3381 return ParseStatus::Success;
3382
3383 } else {
3384 int64_t IntVal;
3385 const MCExpr *Expr;
3386 SMLoc S = getLoc();
3387
3388 if (HasSP3AbsModifier) {
3389 // This is a workaround for handling expressions
3390 // as arguments of SP3 'abs' modifier, for example:
3391 // |1.0|
3392 // |-1|
3393 // |1+x|
3394 // This syntax is not compatible with syntax of standard
3395 // MC expressions (due to the trailing '|').
3396 SMLoc EndLoc;
3397 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3398 return ParseStatus::Failure;
3399 } else {
3400 if (Parser.parseExpression(Expr))
3401 return ParseStatus::Failure;
3402 }
3403
3404 if (Expr->evaluateAsAbsolute(IntVal)) {
3405 if (Lit == LitModifier::Lit && !isInt<32>(IntVal) && !isUInt<32>(IntVal))
3406 return Error(S, "literal value out of range");
3407 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3408 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3409 Op.setModifiers(Mods);
3410 } else {
3411 if (Lit != LitModifier::None)
3412 return ParseStatus::NoMatch;
3413 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3414 }
3415
3416 return ParseStatus::Success;
3417 }
3418
3419 return ParseStatus::NoMatch;
3420}
3421
3422ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3423 if (!isRegister())
3424 return ParseStatus::NoMatch;
3425
3426 if (auto R = parseRegister()) {
3427 assert(R->isReg());
3428 Operands.push_back(std::move(R));
3429 return ParseStatus::Success;
3430 }
3431 return ParseStatus::Failure;
3432}
3433
3434ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3435 bool HasSP3AbsMod, LitModifier Lit) {
3436 ParseStatus Res = parseReg(Operands);
3437 if (!Res.isNoMatch())
3438 return Res;
3439 if (isModifier())
3440 return ParseStatus::NoMatch;
3441 return parseImm(Operands, HasSP3AbsMod, Lit);
3442}
3443
3444bool
3445AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3446 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3447 const auto &str = Token.getString();
3448 return str == "abs" || str == "neg" || str == "sext";
3449 }
3450 return false;
3451}
3452
3453bool
3454AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3455 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3456}
3457
3458bool
3459AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3460 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3461}
3462
3463bool
3464AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3465 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3466}
3467
3468// Check if this is an operand modifier or an opcode modifier
3469// which may look like an expression but it is not. We should
3470// avoid parsing these modifiers as expressions. Currently
3471// recognized sequences are:
3472// |...|
3473// abs(...)
3474// neg(...)
3475// sext(...)
3476// -reg
3477// -|...|
3478// -abs(...)
3479// name:...
3480//
3481bool
3482AMDGPUAsmParser::isModifier() {
3483
3484 AsmToken Tok = getToken();
3485 AsmToken NextToken[2];
3486 peekTokens(NextToken);
3487
3488 return isOperandModifier(Tok, NextToken[0]) ||
3489 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3490 isOpcodeModifierWithVal(Tok, NextToken[0]);
3491}
3492
3493// Check if the current token is an SP3 'neg' modifier.
3494// Currently this modifier is allowed in the following context:
3495//
3496// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3497// 2. Before an 'abs' modifier: -abs(...)
3498// 3. Before an SP3 'abs' modifier: -|...|
3499//
3500// In all other cases "-" is handled as a part
3501// of an expression that follows the sign.
3502//
3503// Note: When "-" is followed by an integer literal,
3504// this is interpreted as integer negation rather
3505// than a floating-point NEG modifier applied to N.
3506// Beside being contr-intuitive, such use of floating-point
3507// NEG modifier would have resulted in different meaning
3508// of integer literals used with VOP1/2/C and VOP3,
3509// for example:
3510// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3511// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3512// Negative fp literals with preceding "-" are
3513// handled likewise for uniformity
3514//
3515bool
3516AMDGPUAsmParser::parseSP3NegModifier() {
3517
3518 AsmToken NextToken[2];
3519 peekTokens(NextToken);
3520
3521 if (isToken(AsmToken::Minus) &&
3522 (isRegister(NextToken[0], NextToken[1]) ||
3523 NextToken[0].is(AsmToken::Pipe) ||
3524 isId(NextToken[0], "abs"))) {
3525 lex();
3526 return true;
3527 }
3528
3529 return false;
3530}
3531
3532ParseStatus
3533AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3534 bool AllowImm) {
3535 bool Neg, SP3Neg;
3536 bool Abs, SP3Abs;
3537 SMLoc Loc;
3538
3539 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3540 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3541 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3542
3543 SP3Neg = parseSP3NegModifier();
3544
3545 Loc = getLoc();
3546 Neg = trySkipId("neg");
3547 if (Neg && SP3Neg)
3548 return Error(Loc, "expected register or immediate");
3549 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3550 return ParseStatus::Failure;
3551
3552 Abs = trySkipId("abs");
3553 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3554 return ParseStatus::Failure;
3555
3556 LitModifier Lit = LitModifier::None;
3557 if (trySkipId("lit")) {
3558 Lit = LitModifier::Lit;
3559 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3560 return ParseStatus::Failure;
3561 } else if (trySkipId("lit64")) {
3562 Lit = LitModifier::Lit64;
3563 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3564 return ParseStatus::Failure;
3565 if (!has64BitLiterals())
3566 return Error(Loc, "lit64 is not supported on this GPU");
3567 }
3568
3569 Loc = getLoc();
3570 SP3Abs = trySkipToken(AsmToken::Pipe);
3571 if (Abs && SP3Abs)
3572 return Error(Loc, "expected register or immediate");
3573
3574 ParseStatus Res;
3575 if (AllowImm) {
3576 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3577 } else {
3578 Res = parseReg(Operands);
3579 }
3580 if (!Res.isSuccess())
3581 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3583 : Res;
3584
3585 if (Lit != LitModifier::None && !Operands.back()->isImm())
3586 Error(Loc, "expected immediate with lit modifier");
3587
3588 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3589 return ParseStatus::Failure;
3590 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3591 return ParseStatus::Failure;
3592 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3593 return ParseStatus::Failure;
3594 if (Lit != LitModifier::None &&
3595 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3596 return ParseStatus::Failure;
3597
3598 AMDGPUOperand::Modifiers Mods;
3599 Mods.Abs = Abs || SP3Abs;
3600 Mods.Neg = Neg || SP3Neg;
3601 Mods.Lit = Lit;
3602
3603 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3604 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3605 if (Op.isExpr())
3606 return Error(Op.getStartLoc(), "expected an absolute expression");
3607 Op.setModifiers(Mods);
3608 }
3609 return ParseStatus::Success;
3610}
3611
3612ParseStatus
3613AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3614 bool AllowImm) {
3615 bool Sext = trySkipId("sext");
3616 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3617 return ParseStatus::Failure;
3618
3619 ParseStatus Res;
3620 if (AllowImm) {
3621 Res = parseRegOrImm(Operands);
3622 } else {
3623 Res = parseReg(Operands);
3624 }
3625 if (!Res.isSuccess())
3626 return Sext ? ParseStatus::Failure : Res;
3627
3628 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3629 return ParseStatus::Failure;
3630
3631 AMDGPUOperand::Modifiers Mods;
3632 Mods.Sext = Sext;
3633
3634 if (Mods.hasIntModifiers()) {
3635 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3636 if (Op.isExpr())
3637 return Error(Op.getStartLoc(), "expected an absolute expression");
3638 Op.setModifiers(Mods);
3639 }
3640
3641 return ParseStatus::Success;
3642}
3643
3644ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3645 return parseRegOrImmWithFPInputMods(Operands, false);
3646}
3647
3648ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3649 return parseRegOrImmWithIntInputMods(Operands, false);
3650}
3651
3652ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3653 auto Loc = getLoc();
3654 if (trySkipId("off")) {
3655 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3656 AMDGPUOperand::ImmTyOff, false));
3657 return ParseStatus::Success;
3658 }
3659
3660 if (!isRegister())
3661 return ParseStatus::NoMatch;
3662
3663 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3664 if (Reg) {
3665 Operands.push_back(std::move(Reg));
3666 return ParseStatus::Success;
3667 }
3668
3669 return ParseStatus::Failure;
3670}
3671
3672unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3673 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3674
3675 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3676 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3677 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3678 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3679 return Match_InvalidOperand;
3680
3681 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3682 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3683 // v_mac_f32/16 allow only dst_sel == DWORD;
3684 auto OpNum =
3685 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3686 const auto &Op = Inst.getOperand(OpNum);
3687 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3688 return Match_InvalidOperand;
3689 }
3690 }
3691
3692 // Asm can first try to match VOPD or VOPD3. By failing early here with
3693 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3694 // Checking later during validateInstruction does not give a chance to retry
3695 // parsing as a different encoding.
3696 if (tryAnotherVOPDEncoding(Inst))
3697 return Match_InvalidOperand;
3698
3699 return Match_Success;
3700}
3701
3711
3712// What asm variants we should check
3713ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3714 if (isForcedDPP() && isForcedVOP3()) {
3715 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3716 return ArrayRef(Variants);
3717 }
3718 if (getForcedEncodingSize() == 32) {
3719 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3720 return ArrayRef(Variants);
3721 }
3722
3723 if (isForcedVOP3()) {
3724 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3725 return ArrayRef(Variants);
3726 }
3727
3728 if (isForcedSDWA()) {
3729 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3731 return ArrayRef(Variants);
3732 }
3733
3734 if (isForcedDPP()) {
3735 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3736 return ArrayRef(Variants);
3737 }
3738
3739 return getAllVariants();
3740}
3741
3742StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3743 if (isForcedDPP() && isForcedVOP3())
3744 return "e64_dpp";
3745
3746 if (getForcedEncodingSize() == 32)
3747 return "e32";
3748
3749 if (isForcedVOP3())
3750 return "e64";
3751
3752 if (isForcedSDWA())
3753 return "sdwa";
3754
3755 if (isForcedDPP())
3756 return "dpp";
3757
3758 return "";
3759}
3760
3761MCRegister
3762AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3763 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3764 for (MCPhysReg Reg : Desc.implicit_uses()) {
3765 switch (Reg) {
3766 case AMDGPU::FLAT_SCR:
3767 case AMDGPU::VCC:
3768 case AMDGPU::VCC_LO:
3769 case AMDGPU::VCC_HI:
3770 case AMDGPU::M0:
3771 return Reg;
3772 default:
3773 break;
3774 }
3775 }
3776 return MCRegister();
3777}
3778
3779// NB: This code is correct only when used to check constant
3780// bus limitations because GFX7 support no f16 inline constants.
3781// Note that there are no cases when a GFX7 opcode violates
3782// constant bus limitations due to the use of an f16 constant.
3783bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3784 unsigned OpIdx) const {
3785 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3786
3789 return false;
3790 }
3791
3792 const MCOperand &MO = Inst.getOperand(OpIdx);
3793
3794 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3795 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3796
3797 switch (OpSize) { // expected operand size
3798 case 8:
3799 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3800 case 4:
3801 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3802 case 2: {
3803 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3806 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3807
3811
3815
3818
3822
3825 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3826
3829 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3830
3832 return false;
3833
3834 llvm_unreachable("invalid operand type");
3835 }
3836 default:
3837 llvm_unreachable("invalid operand size");
3838 }
3839}
3840
3841unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3842 if (!isGFX10Plus())
3843 return 1;
3844
3845 switch (Opcode) {
3846 // 64-bit shift instructions can use only one scalar value input
3847 case AMDGPU::V_LSHLREV_B64_e64:
3848 case AMDGPU::V_LSHLREV_B64_gfx10:
3849 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3850 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3851 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3852 case AMDGPU::V_LSHRREV_B64_e64:
3853 case AMDGPU::V_LSHRREV_B64_gfx10:
3854 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3855 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3856 case AMDGPU::V_ASHRREV_I64_e64:
3857 case AMDGPU::V_ASHRREV_I64_gfx10:
3858 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3859 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3860 case AMDGPU::V_LSHL_B64_e64:
3861 case AMDGPU::V_LSHR_B64_e64:
3862 case AMDGPU::V_ASHR_I64_e64:
3863 return 1;
3864 default:
3865 return 2;
3866 }
3867}
3868
3869constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3871
3872// Get regular operand indices in the same order as specified
3873// in the instruction (but append mandatory literals to the end).
3875 bool AddMandatoryLiterals = false) {
3876
3877 int16_t ImmIdx =
3878 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3879
3880 if (isVOPD(Opcode)) {
3881 int16_t ImmXIdx =
3882 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3883
3884 return {getNamedOperandIdx(Opcode, OpName::src0X),
3885 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3886 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3887 getNamedOperandIdx(Opcode, OpName::src0Y),
3888 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3889 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3890 ImmXIdx,
3891 ImmIdx};
3892 }
3893
3894 return {getNamedOperandIdx(Opcode, OpName::src0),
3895 getNamedOperandIdx(Opcode, OpName::src1),
3896 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3897}
3898
3899bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3900 const MCOperand &MO = Inst.getOperand(OpIdx);
3901 if (MO.isImm())
3902 return !isInlineConstant(Inst, OpIdx);
3903 if (MO.isReg()) {
3904 auto Reg = MO.getReg();
3905 if (!Reg)
3906 return false;
3907 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3908 auto PReg = mc2PseudoReg(Reg);
3909 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3910 }
3911 return true;
3912}
3913
3914// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3915// Writelane is special in that it can use SGPR and M0 (which would normally
3916// count as using the constant bus twice - but in this case it is allowed since
3917// the lane selector doesn't count as a use of the constant bus). However, it is
3918// still required to abide by the 1 SGPR rule.
3919static bool checkWriteLane(const MCInst &Inst) {
3920 const unsigned Opcode = Inst.getOpcode();
3921 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3922 return false;
3923 const MCOperand &LaneSelOp = Inst.getOperand(2);
3924 if (!LaneSelOp.isReg())
3925 return false;
3926 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3927 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3928}
3929
3930bool AMDGPUAsmParser::validateConstantBusLimitations(
3931 const MCInst &Inst, const OperandVector &Operands) {
3932 const unsigned Opcode = Inst.getOpcode();
3933 const MCInstrDesc &Desc = MII.get(Opcode);
3934 MCRegister LastSGPR;
3935 unsigned ConstantBusUseCount = 0;
3936 unsigned NumLiterals = 0;
3937 unsigned LiteralSize;
3938
3939 if (!(Desc.TSFlags &
3942 !isVOPD(Opcode))
3943 return true;
3944
3945 if (checkWriteLane(Inst))
3946 return true;
3947
3948 // Check special imm operands (used by madmk, etc)
3949 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3950 ++NumLiterals;
3951 LiteralSize = 4;
3952 }
3953
3954 SmallDenseSet<MCRegister> SGPRsUsed;
3955 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3956 if (SGPRUsed) {
3957 SGPRsUsed.insert(SGPRUsed);
3958 ++ConstantBusUseCount;
3959 }
3960
3961 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3962
3963 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3964
3965 for (int OpIdx : OpIndices) {
3966 if (OpIdx == -1)
3967 continue;
3968
3969 const MCOperand &MO = Inst.getOperand(OpIdx);
3970 if (usesConstantBus(Inst, OpIdx)) {
3971 if (MO.isReg()) {
3972 LastSGPR = mc2PseudoReg(MO.getReg());
3973 // Pairs of registers with a partial intersections like these
3974 // s0, s[0:1]
3975 // flat_scratch_lo, flat_scratch
3976 // flat_scratch_lo, flat_scratch_hi
3977 // are theoretically valid but they are disabled anyway.
3978 // Note that this code mimics SIInstrInfo::verifyInstruction
3979 if (SGPRsUsed.insert(LastSGPR).second) {
3980 ++ConstantBusUseCount;
3981 }
3982 } else { // Expression or a literal
3983
3984 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3985 continue; // special operand like VINTERP attr_chan
3986
3987 // An instruction may use only one literal.
3988 // This has been validated on the previous step.
3989 // See validateVOPLiteral.
3990 // This literal may be used as more than one operand.
3991 // If all these operands are of the same size,
3992 // this literal counts as one scalar value.
3993 // Otherwise it counts as 2 scalar values.
3994 // See "GFX10 Shader Programming", section 3.6.2.3.
3995
3997 if (Size < 4)
3998 Size = 4;
3999
4000 if (NumLiterals == 0) {
4001 NumLiterals = 1;
4002 LiteralSize = Size;
4003 } else if (LiteralSize != Size) {
4004 NumLiterals = 2;
4005 }
4006 }
4007 }
4008
4009 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
4010 Error(getOperandLoc(Operands, OpIdx),
4011 "invalid operand (violates constant bus restrictions)");
4012 return false;
4013 }
4014 }
4015 return true;
4016}
4017
4018std::optional<unsigned>
4019AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
4020
4021 const unsigned Opcode = Inst.getOpcode();
4022 if (!isVOPD(Opcode))
4023 return {};
4024
4025 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4026
4027 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
4028 const MCOperand &Opr = Inst.getOperand(OperandIdx);
4029 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
4030 ? Opr.getReg()
4031 : MCRegister();
4032 };
4033
4034 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
4035 // source-cache.
4036 bool SkipSrc =
4037 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
4038 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4039 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4040 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
4041 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
4042 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4043 bool AllowSameVGPR = isGFX12Plus();
4044
4045 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4046 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4047 int I = getNamedOperandIdx(Opcode, OpName);
4048 const MCOperand &Op = Inst.getOperand(I);
4049 if (!Op.isImm())
4050 continue;
4051 int64_t Imm = Op.getImm();
4052 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
4053 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
4054 return (unsigned)I;
4055 }
4056
4057 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4058 OpName::vsrc2Y, OpName::imm}) {
4059 int I = getNamedOperandIdx(Opcode, OpName);
4060 if (I == -1)
4061 continue;
4062 const MCOperand &Op = Inst.getOperand(I);
4063 if (Op.isImm())
4064 return (unsigned)I;
4065 }
4066 }
4067
4068 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4069 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4070 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4071
4072 return InvalidCompOprIdx;
4073}
4074
4075bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4076 const OperandVector &Operands) {
4077
4078 unsigned Opcode = Inst.getOpcode();
4079 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4080
4081 if (AsVOPD3) {
4082 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4083 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4084 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4085 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4086 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4087 }
4088 }
4089
4090 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4091 if (!InvalidCompOprIdx.has_value())
4092 return true;
4093
4094 auto CompOprIdx = *InvalidCompOprIdx;
4095 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4096 auto ParsedIdx =
4097 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4098 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4099 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4100
4101 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4102 if (CompOprIdx == VOPD::Component::DST) {
4103 if (AsVOPD3)
4104 Error(Loc, "dst registers must be distinct");
4105 else
4106 Error(Loc, "one dst register must be even and the other odd");
4107 } else {
4108 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4109 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4110 " operands must use different VGPR banks");
4111 }
4112
4113 return false;
4114}
4115
4116// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4117// potentially used as VOPD3 with the same operands.
4118bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4119 // First check if it fits VOPD
4120 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4121 if (!InvalidCompOprIdx.has_value())
4122 return false;
4123
4124 // Then if it fits VOPD3
4125 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4126 if (InvalidCompOprIdx.has_value()) {
4127 // If failed operand is dst it is better to show error about VOPD3
4128 // instruction as it has more capabilities and error message will be
4129 // more informative. If the dst is not legal for VOPD3, then it is not
4130 // legal for VOPD either.
4131 if (*InvalidCompOprIdx == VOPD::Component::DST)
4132 return true;
4133
4134 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4135 // with a conflict in tied implicit src2 of fmac and no asm operand to
4136 // to point to.
4137 return false;
4138 }
4139 return true;
4140}
4141
4142// \returns true is a VOPD3 instruction can be also represented as a shorter
4143// VOPD encoding.
4144bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4145 const unsigned Opcode = Inst.getOpcode();
4146 const auto &II = getVOPDInstInfo(Opcode, &MII);
4147 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4148 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4149 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4150 return false;
4151
4152 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4153 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4154 // be parsed as VOPD which does not accept src2.
4155 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4156 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4157 return false;
4158
4159 // If any modifiers are set this cannot be VOPD.
4160 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4161 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4162 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4163 int I = getNamedOperandIdx(Opcode, OpName);
4164 if (I == -1)
4165 continue;
4166 if (Inst.getOperand(I).getImm())
4167 return false;
4168 }
4169
4170 return !tryVOPD3(Inst);
4171}
4172
4173// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4174// form but switch to VOPD3 otherwise.
4175bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4176 const unsigned Opcode = Inst.getOpcode();
4177 if (!isGFX1250Plus() || !isVOPD(Opcode))
4178 return false;
4179
4180 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4181 return tryVOPD(Inst);
4182 return tryVOPD3(Inst);
4183}
4184
4185bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4186
4187 const unsigned Opc = Inst.getOpcode();
4188 const MCInstrDesc &Desc = MII.get(Opc);
4189
4190 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4191 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4192 assert(ClampIdx != -1);
4193 return Inst.getOperand(ClampIdx).getImm() == 0;
4194 }
4195
4196 return true;
4197}
4198
4201
4202bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4203
4204 const unsigned Opc = Inst.getOpcode();
4205 const MCInstrDesc &Desc = MII.get(Opc);
4206
4207 if ((Desc.TSFlags & MIMGFlags) == 0)
4208 return true;
4209
4210 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4211 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4212 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4213
4214 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4215 return true;
4216
4217 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4218 return true;
4219
4220 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4221 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4222 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4223 if (DMask == 0)
4224 DMask = 1;
4225
4226 bool IsPackedD16 = false;
4227 unsigned DataSize =
4228 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4229 if (hasPackedD16()) {
4230 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4231 IsPackedD16 = D16Idx >= 0;
4232 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4233 DataSize = (DataSize + 1) / 2;
4234 }
4235
4236 if ((VDataSize / 4) == DataSize + TFESize)
4237 return true;
4238
4239 StringRef Modifiers;
4240 if (isGFX90A())
4241 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4242 else
4243 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4244
4245 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4246 return false;
4247}
4248
4249bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4250 const unsigned Opc = Inst.getOpcode();
4251 const MCInstrDesc &Desc = MII.get(Opc);
4252
4253 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4254 return true;
4255
4256 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4257
4258 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4260 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4261 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4262 ? AMDGPU::OpName::srsrc
4263 : AMDGPU::OpName::rsrc;
4264 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4265 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4266 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4267
4268 assert(VAddr0Idx != -1);
4269 assert(SrsrcIdx != -1);
4270 assert(SrsrcIdx > VAddr0Idx);
4271
4272 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4273 if (BaseOpcode->BVH) {
4274 if (IsA16 == BaseOpcode->A16)
4275 return true;
4276 Error(IDLoc, "image address size does not match a16");
4277 return false;
4278 }
4279
4280 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4281 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4282 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4283 unsigned ActualAddrSize =
4284 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4285
4286 unsigned ExpectedAddrSize =
4287 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4288
4289 if (IsNSA) {
4290 if (hasPartialNSAEncoding() &&
4291 ExpectedAddrSize >
4293 int VAddrLastIdx = SrsrcIdx - 1;
4294 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4295
4296 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4297 }
4298 } else {
4299 if (ExpectedAddrSize > 12)
4300 ExpectedAddrSize = 16;
4301
4302 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4303 // This provides backward compatibility for assembly created
4304 // before 160b/192b/224b types were directly supported.
4305 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4306 return true;
4307 }
4308
4309 if (ActualAddrSize == ExpectedAddrSize)
4310 return true;
4311
4312 Error(IDLoc, "image address size does not match dim and a16");
4313 return false;
4314}
4315
4316bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4317
4318 const unsigned Opc = Inst.getOpcode();
4319 const MCInstrDesc &Desc = MII.get(Opc);
4320
4321 if ((Desc.TSFlags & MIMGFlags) == 0)
4322 return true;
4323 if (!Desc.mayLoad() || !Desc.mayStore())
4324 return true; // Not atomic
4325
4326 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4327 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4328
4329 // This is an incomplete check because image_atomic_cmpswap
4330 // may only use 0x3 and 0xf while other atomic operations
4331 // may use 0x1 and 0x3. However these limitations are
4332 // verified when we check that dmask matches dst size.
4333 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4334}
4335
4336bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4337
4338 const unsigned Opc = Inst.getOpcode();
4339 const MCInstrDesc &Desc = MII.get(Opc);
4340
4341 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4342 return true;
4343
4344 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4345 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4346
4347 // GATHER4 instructions use dmask in a different fashion compared to
4348 // other MIMG instructions. The only useful DMASK values are
4349 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4350 // (red,red,red,red) etc.) The ISA document doesn't mention
4351 // this.
4352 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4353}
4354
4355bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4356 const OperandVector &Operands) {
4357 if (!isGFX10Plus())
4358 return true;
4359
4360 const unsigned Opc = Inst.getOpcode();
4361 const MCInstrDesc &Desc = MII.get(Opc);
4362
4363 if ((Desc.TSFlags & MIMGFlags) == 0)
4364 return true;
4365
4366 // image_bvh_intersect_ray instructions do not have dim
4368 return true;
4369
4370 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4371 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4372 if (Op.isDim())
4373 return true;
4374 }
4375 return false;
4376}
4377
4378bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4379 const unsigned Opc = Inst.getOpcode();
4380 const MCInstrDesc &Desc = MII.get(Opc);
4381
4382 if ((Desc.TSFlags & MIMGFlags) == 0)
4383 return true;
4384
4385 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4386 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4388
4389 if (!BaseOpcode->MSAA)
4390 return true;
4391
4392 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4393 assert(DimIdx != -1);
4394
4395 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4396 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4397
4398 return DimInfo->MSAA;
4399}
4400
4401static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4402{
4403 switch (Opcode) {
4404 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4405 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4406 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4407 return true;
4408 default:
4409 return false;
4410 }
4411}
4412
4413// movrels* opcodes should only allow VGPRS as src0.
4414// This is specified in .td description for vop1/vop3,
4415// but sdwa is handled differently. See isSDWAOperand.
4416bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4417 const OperandVector &Operands) {
4418
4419 const unsigned Opc = Inst.getOpcode();
4420 const MCInstrDesc &Desc = MII.get(Opc);
4421
4422 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4423 return true;
4424
4425 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4426 assert(Src0Idx != -1);
4427
4428 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4429 if (Src0.isReg()) {
4430 auto Reg = mc2PseudoReg(Src0.getReg());
4431 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4432 if (!isSGPR(Reg, TRI))
4433 return true;
4434 }
4435
4436 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4437 return false;
4438}
4439
4440bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4441 const OperandVector &Operands) {
4442
4443 const unsigned Opc = Inst.getOpcode();
4444
4445 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4446 return true;
4447
4448 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4449 assert(Src0Idx != -1);
4450
4451 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4452 if (!Src0.isReg())
4453 return true;
4454
4455 auto Reg = mc2PseudoReg(Src0.getReg());
4456 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4457 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4458 Error(getOperandLoc(Operands, Src0Idx),
4459 "source operand must be either a VGPR or an inline constant");
4460 return false;
4461 }
4462
4463 return true;
4464}
4465
4466bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4467 const OperandVector &Operands) {
4468 unsigned Opcode = Inst.getOpcode();
4469 const MCInstrDesc &Desc = MII.get(Opcode);
4470
4471 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4472 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4473 return true;
4474
4475 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4476 if (Src2Idx == -1)
4477 return true;
4478
4479 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4480 Error(getOperandLoc(Operands, Src2Idx),
4481 "inline constants are not allowed for this operand");
4482 return false;
4483 }
4484
4485 return true;
4486}
4487
4488bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4489 const OperandVector &Operands) {
4490 const unsigned Opc = Inst.getOpcode();
4491 const MCInstrDesc &Desc = MII.get(Opc);
4492
4493 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4494 return true;
4495
4496 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4497 if (BlgpIdx != -1) {
4498 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4499 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4500
4501 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4502 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4503
4504 // Validate the correct register size was used for the floating point
4505 // format operands
4506
4507 bool Success = true;
4508 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4509 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4510 Error(getOperandLoc(Operands, Src0Idx),
4511 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4512 Success = false;
4513 }
4514
4515 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4516 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4517 Error(getOperandLoc(Operands, Src1Idx),
4518 "wrong register tuple size for blgp value " + Twine(BLGP));
4519 Success = false;
4520 }
4521
4522 return Success;
4523 }
4524 }
4525
4526 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4527 if (Src2Idx == -1)
4528 return true;
4529
4530 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4531 if (!Src2.isReg())
4532 return true;
4533
4534 MCRegister Src2Reg = Src2.getReg();
4535 MCRegister DstReg = Inst.getOperand(0).getReg();
4536 if (Src2Reg == DstReg)
4537 return true;
4538
4539 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4540 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4541 .getSizeInBits() <= 128)
4542 return true;
4543
4544 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4545 Error(getOperandLoc(Operands, Src2Idx),
4546 "source 2 operand must not partially overlap with dst");
4547 return false;
4548 }
4549
4550 return true;
4551}
4552
4553bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4554 switch (Inst.getOpcode()) {
4555 default:
4556 return true;
4557 case V_DIV_SCALE_F32_gfx6_gfx7:
4558 case V_DIV_SCALE_F32_vi:
4559 case V_DIV_SCALE_F32_gfx10:
4560 case V_DIV_SCALE_F64_gfx6_gfx7:
4561 case V_DIV_SCALE_F64_vi:
4562 case V_DIV_SCALE_F64_gfx10:
4563 break;
4564 }
4565
4566 // TODO: Check that src0 = src1 or src2.
4567
4568 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4569 AMDGPU::OpName::src2_modifiers,
4570 AMDGPU::OpName::src2_modifiers}) {
4571 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4572 .getImm() &
4574 return false;
4575 }
4576 }
4577
4578 return true;
4579}
4580
4581bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4582
4583 const unsigned Opc = Inst.getOpcode();
4584 const MCInstrDesc &Desc = MII.get(Opc);
4585
4586 if ((Desc.TSFlags & MIMGFlags) == 0)
4587 return true;
4588
4589 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4590 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4591 if (isCI() || isSI())
4592 return false;
4593 }
4594
4595 return true;
4596}
4597
4598bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4599 const unsigned Opc = Inst.getOpcode();
4600 const MCInstrDesc &Desc = MII.get(Opc);
4601
4602 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4603 return true;
4604
4605 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4606
4607 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4608}
4609
4610static bool IsRevOpcode(const unsigned Opcode)
4611{
4612 switch (Opcode) {
4613 case AMDGPU::V_SUBREV_F32_e32:
4614 case AMDGPU::V_SUBREV_F32_e64:
4615 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4616 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4617 case AMDGPU::V_SUBREV_F32_e32_vi:
4618 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4619 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4620 case AMDGPU::V_SUBREV_F32_e64_vi:
4621
4622 case AMDGPU::V_SUBREV_CO_U32_e32:
4623 case AMDGPU::V_SUBREV_CO_U32_e64:
4624 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4625 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4626
4627 case AMDGPU::V_SUBBREV_U32_e32:
4628 case AMDGPU::V_SUBBREV_U32_e64:
4629 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4630 case AMDGPU::V_SUBBREV_U32_e32_vi:
4631 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4632 case AMDGPU::V_SUBBREV_U32_e64_vi:
4633
4634 case AMDGPU::V_SUBREV_U32_e32:
4635 case AMDGPU::V_SUBREV_U32_e64:
4636 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4637 case AMDGPU::V_SUBREV_U32_e32_vi:
4638 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4639 case AMDGPU::V_SUBREV_U32_e64_vi:
4640
4641 case AMDGPU::V_SUBREV_F16_e32:
4642 case AMDGPU::V_SUBREV_F16_e64:
4643 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4644 case AMDGPU::V_SUBREV_F16_e32_vi:
4645 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4646 case AMDGPU::V_SUBREV_F16_e64_vi:
4647
4648 case AMDGPU::V_SUBREV_U16_e32:
4649 case AMDGPU::V_SUBREV_U16_e64:
4650 case AMDGPU::V_SUBREV_U16_e32_vi:
4651 case AMDGPU::V_SUBREV_U16_e64_vi:
4652
4653 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4654 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4655 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4656
4657 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4658 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4659
4660 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4661 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4662
4663 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4664 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4665
4666 case AMDGPU::V_LSHRREV_B32_e32:
4667 case AMDGPU::V_LSHRREV_B32_e64:
4668 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4669 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4670 case AMDGPU::V_LSHRREV_B32_e32_vi:
4671 case AMDGPU::V_LSHRREV_B32_e64_vi:
4672 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4673 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4674
4675 case AMDGPU::V_ASHRREV_I32_e32:
4676 case AMDGPU::V_ASHRREV_I32_e64:
4677 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4678 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4679 case AMDGPU::V_ASHRREV_I32_e32_vi:
4680 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4681 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4682 case AMDGPU::V_ASHRREV_I32_e64_vi:
4683
4684 case AMDGPU::V_LSHLREV_B32_e32:
4685 case AMDGPU::V_LSHLREV_B32_e64:
4686 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4687 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4688 case AMDGPU::V_LSHLREV_B32_e32_vi:
4689 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4690 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4691 case AMDGPU::V_LSHLREV_B32_e64_vi:
4692
4693 case AMDGPU::V_LSHLREV_B16_e32:
4694 case AMDGPU::V_LSHLREV_B16_e64:
4695 case AMDGPU::V_LSHLREV_B16_e32_vi:
4696 case AMDGPU::V_LSHLREV_B16_e64_vi:
4697 case AMDGPU::V_LSHLREV_B16_gfx10:
4698
4699 case AMDGPU::V_LSHRREV_B16_e32:
4700 case AMDGPU::V_LSHRREV_B16_e64:
4701 case AMDGPU::V_LSHRREV_B16_e32_vi:
4702 case AMDGPU::V_LSHRREV_B16_e64_vi:
4703 case AMDGPU::V_LSHRREV_B16_gfx10:
4704
4705 case AMDGPU::V_ASHRREV_I16_e32:
4706 case AMDGPU::V_ASHRREV_I16_e64:
4707 case AMDGPU::V_ASHRREV_I16_e32_vi:
4708 case AMDGPU::V_ASHRREV_I16_e64_vi:
4709 case AMDGPU::V_ASHRREV_I16_gfx10:
4710
4711 case AMDGPU::V_LSHLREV_B64_e64:
4712 case AMDGPU::V_LSHLREV_B64_gfx10:
4713 case AMDGPU::V_LSHLREV_B64_vi:
4714
4715 case AMDGPU::V_LSHRREV_B64_e64:
4716 case AMDGPU::V_LSHRREV_B64_gfx10:
4717 case AMDGPU::V_LSHRREV_B64_vi:
4718
4719 case AMDGPU::V_ASHRREV_I64_e64:
4720 case AMDGPU::V_ASHRREV_I64_gfx10:
4721 case AMDGPU::V_ASHRREV_I64_vi:
4722
4723 case AMDGPU::V_PK_LSHLREV_B16:
4724 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4725 case AMDGPU::V_PK_LSHLREV_B16_vi:
4726
4727 case AMDGPU::V_PK_LSHRREV_B16:
4728 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4729 case AMDGPU::V_PK_LSHRREV_B16_vi:
4730 case AMDGPU::V_PK_ASHRREV_I16:
4731 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4732 case AMDGPU::V_PK_ASHRREV_I16_vi:
4733 return true;
4734 default:
4735 return false;
4736 }
4737}
4738
4739bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4740 const OperandVector &Operands) {
4741 using namespace SIInstrFlags;
4742 const unsigned Opcode = Inst.getOpcode();
4743 const MCInstrDesc &Desc = MII.get(Opcode);
4744
4745 // lds_direct register is defined so that it can be used
4746 // with 9-bit operands only. Ignore encodings which do not accept these.
4747 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4748 if ((Desc.TSFlags & Enc) == 0)
4749 return true;
4750
4751 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4752 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4753 if (SrcIdx == -1)
4754 break;
4755 const auto &Src = Inst.getOperand(SrcIdx);
4756 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4757
4758 if (isGFX90A() || isGFX11Plus()) {
4759 Error(getOperandLoc(Operands, SrcIdx),
4760 "lds_direct is not supported on this GPU");
4761 return false;
4762 }
4763
4764 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4765 Error(getOperandLoc(Operands, SrcIdx),
4766 "lds_direct cannot be used with this instruction");
4767 return false;
4768 }
4769
4770 if (SrcName != OpName::src0) {
4771 Error(getOperandLoc(Operands, SrcIdx),
4772 "lds_direct may be used as src0 only");
4773 return false;
4774 }
4775 }
4776 }
4777
4778 return true;
4779}
4780
4781SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4782 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4783 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4784 if (Op.isFlatOffset())
4785 return Op.getStartLoc();
4786 }
4787 return getLoc();
4788}
4789
4790bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4791 const OperandVector &Operands) {
4792 auto Opcode = Inst.getOpcode();
4793 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4794 if (OpNum == -1)
4795 return true;
4796
4797 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4798 if ((TSFlags & SIInstrFlags::FLAT))
4799 return validateFlatOffset(Inst, Operands);
4800
4801 if ((TSFlags & SIInstrFlags::SMRD))
4802 return validateSMEMOffset(Inst, Operands);
4803
4804 const auto &Op = Inst.getOperand(OpNum);
4805 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4806 if (isGFX12Plus() &&
4807 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4808 const unsigned OffsetSize = 24;
4809 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4810 Error(getFlatOffsetLoc(Operands),
4811 Twine("expected a ") + Twine(OffsetSize - 1) +
4812 "-bit unsigned offset for buffer ops");
4813 return false;
4814 }
4815 } else {
4816 const unsigned OffsetSize = 16;
4817 if (!isUIntN(OffsetSize, Op.getImm())) {
4818 Error(getFlatOffsetLoc(Operands),
4819 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4820 return false;
4821 }
4822 }
4823 return true;
4824}
4825
4826bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4827 const OperandVector &Operands) {
4828 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4829 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4830 return true;
4831
4832 auto Opcode = Inst.getOpcode();
4833 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4834 assert(OpNum != -1);
4835
4836 const auto &Op = Inst.getOperand(OpNum);
4837 if (!hasFlatOffsets() && Op.getImm() != 0) {
4838 Error(getFlatOffsetLoc(Operands),
4839 "flat offset modifier is not supported on this GPU");
4840 return false;
4841 }
4842
4843 // For pre-GFX12 FLAT instructions the offset must be positive;
4844 // MSB is ignored and forced to zero.
4845 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4846 bool AllowNegative =
4848 isGFX12Plus();
4849 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4850 Error(getFlatOffsetLoc(Operands),
4851 Twine("expected a ") +
4852 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4853 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4854 return false;
4855 }
4856
4857 return true;
4858}
4859
4860SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4861 // Start with second operand because SMEM Offset cannot be dst or src0.
4862 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4863 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4864 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4865 return Op.getStartLoc();
4866 }
4867 return getLoc();
4868}
4869
4870bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4871 const OperandVector &Operands) {
4872 if (isCI() || isSI())
4873 return true;
4874
4875 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4876 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4877 return true;
4878
4879 auto Opcode = Inst.getOpcode();
4880 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4881 if (OpNum == -1)
4882 return true;
4883
4884 const auto &Op = Inst.getOperand(OpNum);
4885 if (!Op.isImm())
4886 return true;
4887
4888 uint64_t Offset = Op.getImm();
4889 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4892 return true;
4893
4894 Error(getSMEMOffsetLoc(Operands),
4895 isGFX12Plus() && IsBuffer
4896 ? "expected a 23-bit unsigned offset for buffer ops"
4897 : isGFX12Plus() ? "expected a 24-bit signed offset"
4898 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4899 : "expected a 21-bit signed offset");
4900
4901 return false;
4902}
4903
4904bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4905 const OperandVector &Operands) {
4906 unsigned Opcode = Inst.getOpcode();
4907 const MCInstrDesc &Desc = MII.get(Opcode);
4908 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4909 return true;
4910
4911 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4912 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4913
4914 const int OpIndices[] = { Src0Idx, Src1Idx };
4915
4916 unsigned NumExprs = 0;
4917 unsigned NumLiterals = 0;
4918 int64_t LiteralValue;
4919
4920 for (int OpIdx : OpIndices) {
4921 if (OpIdx == -1) break;
4922
4923 const MCOperand &MO = Inst.getOperand(OpIdx);
4924 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4926 bool IsLit = false;
4927 std::optional<int64_t> Imm;
4928 if (MO.isImm()) {
4929 Imm = MO.getImm();
4930 } else if (MO.isExpr()) {
4931 if (isLitExpr(MO.getExpr())) {
4932 IsLit = true;
4933 Imm = getLitValue(MO.getExpr());
4934 }
4935 } else {
4936 continue;
4937 }
4938
4939 if (!Imm.has_value()) {
4940 ++NumExprs;
4941 } else if (!isInlineConstant(Inst, OpIdx)) {
4942 auto OpType = static_cast<AMDGPU::OperandType>(
4943 Desc.operands()[OpIdx].OperandType);
4944 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4945 if (NumLiterals == 0 || LiteralValue != Value) {
4947 ++NumLiterals;
4948 }
4949 }
4950 }
4951 }
4952
4953 if (NumLiterals + NumExprs <= 1)
4954 return true;
4955
4956 Error(getOperandLoc(Operands, Src1Idx),
4957 "only one unique literal operand is allowed");
4958 return false;
4959}
4960
4961bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4962 const unsigned Opc = Inst.getOpcode();
4963 if (isPermlane16(Opc)) {
4964 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4965 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4966
4967 if (OpSel & ~3)
4968 return false;
4969 }
4970
4971 uint64_t TSFlags = MII.get(Opc).TSFlags;
4972
4973 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4974 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4975 if (OpSelIdx != -1) {
4976 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4977 return false;
4978 }
4979 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4980 if (OpSelHiIdx != -1) {
4981 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4982 return false;
4983 }
4984 }
4985
4986 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4987 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4988 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4989 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4990 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4991 if (OpSel & 3)
4992 return false;
4993 }
4994
4995 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4996 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4997 // the first SGPR and use it for both the low and high operations.
4998 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4999 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
5000 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5001 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5002 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5003
5004 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
5005 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5006 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5007 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5008
5009 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5010
5011 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
5012 unsigned Mask = 1U << Index;
5013 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
5014 };
5015
5016 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
5017 !VerifyOneSGPR(/*Index=*/0))
5018 return false;
5019 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
5020 !VerifyOneSGPR(/*Index=*/1))
5021 return false;
5022
5023 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
5024 if (Src2Idx != -1) {
5025 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
5026 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
5027 !VerifyOneSGPR(/*Index=*/2))
5028 return false;
5029 }
5030 }
5031
5032 return true;
5033}
5034
5035bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
5036 if (!hasTrue16Insts())
5037 return true;
5038 const MCRegisterInfo *MRI = getMRI();
5039 const unsigned Opc = Inst.getOpcode();
5040 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5041 if (OpSelIdx == -1)
5042 return true;
5043 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
5044 // If the value is 0 we could have a default OpSel Operand, so conservatively
5045 // allow it.
5046 if (OpSelOpValue == 0)
5047 return true;
5048 unsigned OpCount = 0;
5049 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5050 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5051 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
5052 if (OpIdx == -1)
5053 continue;
5054 const MCOperand &Op = Inst.getOperand(OpIdx);
5055 if (Op.isReg() &&
5056 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
5057 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
5058 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5059 if (OpSelOpIsHi != VGPRSuffixIsHi)
5060 return false;
5061 }
5062 ++OpCount;
5063 }
5064
5065 return true;
5066}
5067
5068bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5069 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5070
5071 const unsigned Opc = Inst.getOpcode();
5072 uint64_t TSFlags = MII.get(Opc).TSFlags;
5073
5074 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5075 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5076 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5077 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5078 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5079 !(TSFlags & SIInstrFlags::IsSWMMAC))
5080 return true;
5081
5082 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5083 if (NegIdx == -1)
5084 return true;
5085
5086 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5087
5088 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5089 // on some src operands but not allowed on other.
5090 // It is convenient that such instructions don't have src_modifiers operand
5091 // for src operands that don't allow neg because they also don't allow opsel.
5092
5093 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5094 AMDGPU::OpName::src1_modifiers,
5095 AMDGPU::OpName::src2_modifiers};
5096
5097 for (unsigned i = 0; i < 3; ++i) {
5098 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5099 if (Neg & (1 << i))
5100 return false;
5101 }
5102 }
5103
5104 return true;
5105}
5106
5107bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5108 const OperandVector &Operands) {
5109 const unsigned Opc = Inst.getOpcode();
5110 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5111 if (DppCtrlIdx >= 0) {
5112 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5113
5114 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5115 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5116 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5117 // only on GFX12.
5118 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5119 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5120 : "DP ALU dpp only supports row_newbcast");
5121 return false;
5122 }
5123 }
5124
5125 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5126 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5127
5128 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5129 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5130 if (Src1Idx >= 0) {
5131 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5132 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5133 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5134 Error(getOperandLoc(Operands, Src1Idx),
5135 "invalid operand for instruction");
5136 return false;
5137 }
5138 if (Src1.isImm()) {
5139 Error(getInstLoc(Operands),
5140 "src1 immediate operand invalid for instruction");
5141 return false;
5142 }
5143 }
5144 }
5145
5146 return true;
5147}
5148
5149// Check if VCC register matches wavefront size
5150bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5151 return (Reg == AMDGPU::VCC && isWave64()) ||
5152 (Reg == AMDGPU::VCC_LO && isWave32());
5153}
5154
5155// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5156bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5157 const OperandVector &Operands) {
5158 unsigned Opcode = Inst.getOpcode();
5159 const MCInstrDesc &Desc = MII.get(Opcode);
5160 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5161 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5162 !HasMandatoryLiteral && !isVOPD(Opcode))
5163 return true;
5164
5165 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5166
5167 std::optional<unsigned> LiteralOpIdx;
5168 std::optional<uint64_t> LiteralValue;
5169
5170 for (int OpIdx : OpIndices) {
5171 if (OpIdx == -1)
5172 continue;
5173
5174 const MCOperand &MO = Inst.getOperand(OpIdx);
5175 if (!MO.isImm() && !MO.isExpr())
5176 continue;
5177 if (!isSISrcOperand(Desc, OpIdx))
5178 continue;
5179
5180 std::optional<int64_t> Imm;
5181 if (MO.isImm())
5182 Imm = MO.getImm();
5183 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5184 Imm = getLitValue(MO.getExpr());
5185
5186 bool IsAnotherLiteral = false;
5187 bool IsForcedLit = findMCOperand(Operands, OpIdx).isForcedLit();
5188 bool IsForcedLit64 = findMCOperand(Operands, OpIdx).isForcedLit64();
5189 if (!Imm.has_value()) {
5190 // Literal value not known, so we conservately assume it's different.
5191 IsAnotherLiteral = true;
5192 } else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst, OpIdx)) {
5193 uint64_t Value = *Imm;
5194 bool IsForcedFP64 =
5195 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5197 HasMandatoryLiteral);
5198 unsigned OpTy = Desc.operands()[OpIdx].OperandType;
5199 bool IsFP64 =
5200 (IsForcedFP64 || (AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
5202 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5203 bool IsValid32Op =
5204 IsForcedLit || AMDGPU::isValid32BitLiteral(Value, IsFP64);
5205
5206 if (((!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5207 !IsForcedFP64) ||
5208 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5209 (!has64BitLiterals() || Desc.getSize() != 4)) {
5210 Error(getOperandLoc(Operands, OpIdx),
5211 "invalid operand for instruction");
5212 return false;
5213 }
5214
5215 // Only src0 can use lit64 in VOP* encoding.
5216 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5217 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5218 Error(getOperandLoc(Operands, OpIdx),
5219 "invalid operand for instruction");
5220 return false;
5221 }
5222
5223 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5224 Value = Hi_32(Value);
5225
5226 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5228 }
5229
5230 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5231 !getFeatureBits()[FeatureVOP3Literal]) {
5232 Error(getOperandLoc(Operands, OpIdx),
5233 "literal operands are not supported");
5234 return false;
5235 }
5236
5237 if (LiteralOpIdx && IsAnotherLiteral) {
5238 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5239 getOperandLoc(Operands, *LiteralOpIdx)),
5240 "only one unique literal operand is allowed");
5241 return false;
5242 }
5243
5244 if (IsAnotherLiteral)
5245 LiteralOpIdx = OpIdx;
5246 }
5247
5248 return true;
5249}
5250
5251// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5252static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5253 const MCRegisterInfo *MRI) {
5254 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5255 if (OpIdx < 0)
5256 return -1;
5257
5258 const MCOperand &Op = Inst.getOperand(OpIdx);
5259 if (!Op.isReg())
5260 return -1;
5261
5262 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5263 auto Reg = Sub ? Sub : Op.getReg();
5264 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5265 return AGPR32.contains(Reg) ? 1 : 0;
5266}
5267
5268bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5269 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5270 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5272 SIInstrFlags::DS)) == 0)
5273 return true;
5274
5275 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5276 ? AMDGPU::OpName::data0
5277 : AMDGPU::OpName::vdata;
5278
5279 const MCRegisterInfo *MRI = getMRI();
5280 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5281 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5282
5283 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5284 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5285 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5286 return false;
5287 }
5288
5289 auto FB = getFeatureBits();
5290 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5291 if (DataAreg < 0 || DstAreg < 0)
5292 return true;
5293 return DstAreg == DataAreg;
5294 }
5295
5296 return DstAreg < 1 && DataAreg < 1;
5297}
5298
5299bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5300 auto FB = getFeatureBits();
5301 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5302 return true;
5303
5304 unsigned Opc = Inst.getOpcode();
5305 const MCRegisterInfo *MRI = getMRI();
5306 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5307 // unaligned VGPR. All others only allow even aligned VGPRs.
5308 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5309 return true;
5310
5311 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5312 switch (Opc) {
5313 default:
5314 break;
5315 case AMDGPU::DS_LOAD_TR6_B96:
5316 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5317 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5318 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5319 return true;
5320 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5321 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5322 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5323 // allows unaligned VGPR for vdst, but other operands still only allow
5324 // even aligned VGPRs.
5325 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5326 if (VAddrIdx != -1) {
5327 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5328 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5329 if ((Sub - AMDGPU::VGPR0) & 1)
5330 return false;
5331 }
5332 return true;
5333 }
5334 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5335 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5336 return true;
5337 }
5338 }
5339
5340 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5341 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5342 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5343 const MCOperand &Op = Inst.getOperand(I);
5344 if (!Op.isReg())
5345 continue;
5346
5347 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5348 if (!Sub)
5349 continue;
5350
5351 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5352 return false;
5353 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5354 return false;
5355 }
5356
5357 return true;
5358}
5359
5360SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5363 if (Op.isBLGP())
5364 return Op.getStartLoc();
5365 }
5366 return SMLoc();
5367}
5368
5369bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5370 const OperandVector &Operands) {
5371 unsigned Opc = Inst.getOpcode();
5372 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5373 if (BlgpIdx == -1)
5374 return true;
5375 SMLoc BLGPLoc = getBLGPLoc(Operands);
5376 if (!BLGPLoc.isValid())
5377 return true;
5378 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5379 auto FB = getFeatureBits();
5380 bool UsesNeg = false;
5381 if (FB[AMDGPU::FeatureGFX940Insts]) {
5382 switch (Opc) {
5383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5387 UsesNeg = true;
5388 }
5389 }
5390
5391 if (IsNeg == UsesNeg)
5392 return true;
5393
5394 Error(BLGPLoc,
5395 UsesNeg ? "invalid modifier: blgp is not supported"
5396 : "invalid modifier: neg is not supported");
5397
5398 return false;
5399}
5400
5401bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5402 const OperandVector &Operands) {
5403 if (!isGFX11Plus())
5404 return true;
5405
5406 unsigned Opc = Inst.getOpcode();
5407 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5408 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5409 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5410 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5411 return true;
5412
5413 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5414 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5415 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5416 if (Reg == AMDGPU::SGPR_NULL)
5417 return true;
5418
5419 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5420 return false;
5421}
5422
5423bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5424 const OperandVector &Operands) {
5425 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5426 if ((TSFlags & SIInstrFlags::DS) == 0)
5427 return true;
5428 if (TSFlags & SIInstrFlags::GWS)
5429 return validateGWS(Inst, Operands);
5430 // Only validate GDS for non-GWS instructions.
5431 if (hasGDS())
5432 return true;
5433 int GDSIdx =
5434 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5435 if (GDSIdx < 0)
5436 return true;
5437 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5438 if (GDS) {
5439 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5440 Error(S, "gds modifier is not supported on this GPU");
5441 return false;
5442 }
5443 return true;
5444}
5445
5446// gfx90a has an undocumented limitation:
5447// DS_GWS opcodes must use even aligned registers.
5448bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5449 const OperandVector &Operands) {
5450 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5451 return true;
5452
5453 int Opc = Inst.getOpcode();
5454 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5455 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5456 return true;
5457
5458 const MCRegisterInfo *MRI = getMRI();
5459 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5460 int Data0Pos =
5461 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5462 assert(Data0Pos != -1);
5463 auto Reg = Inst.getOperand(Data0Pos).getReg();
5464 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5465 if (RegIdx & 1) {
5466 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5467 return false;
5468 }
5469
5470 return true;
5471}
5472
5473bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5474 const OperandVector &Operands,
5475 SMLoc IDLoc) {
5476 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5477 AMDGPU::OpName::cpol);
5478 if (CPolPos == -1)
5479 return true;
5480
5481 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5482
5483 if (!isGFX1250Plus()) {
5484 if (CPol & CPol::SCAL) {
5485 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5486 StringRef CStr(S.getPointer());
5487 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5488 Error(S, "scale_offset is not supported on this GPU");
5489 }
5490 if (CPol & CPol::NV) {
5491 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5492 StringRef CStr(S.getPointer());
5493 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5494 Error(S, "nv is not supported on this GPU");
5495 }
5496 }
5497
5498 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5500 StringRef CStr(S.getPointer());
5501 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5502 Error(S, "scale_offset is not supported for this instruction");
5503 }
5504
5505 if (isGFX12Plus())
5506 return validateTHAndScopeBits(Inst, Operands, CPol);
5507
5508 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5509 if (TSFlags & SIInstrFlags::SMRD) {
5510 if (CPol && (isSI() || isCI())) {
5511 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5512 Error(S, "cache policy is not supported for SMRD instructions");
5513 return false;
5514 }
5515 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5516 Error(IDLoc, "invalid cache policy for SMEM instruction");
5517 return false;
5518 }
5519 }
5520
5521 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5522 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5525 if (!(TSFlags & AllowSCCModifier)) {
5526 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5527 StringRef CStr(S.getPointer());
5528 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5529 Error(S,
5530 "scc modifier is not supported for this instruction on this GPU");
5531 return false;
5532 }
5533 }
5534
5536 return true;
5537
5538 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5539 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5540 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5541 : "instruction must use glc");
5542 return false;
5543 }
5544 } else {
5545 if (CPol & CPol::GLC) {
5546 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5547 StringRef CStr(S.getPointer());
5549 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5550 Error(S, isGFX940() ? "instruction must not use sc0"
5551 : "instruction must not use glc");
5552 return false;
5553 }
5554 }
5555
5556 return true;
5557}
5558
5559bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5560 const OperandVector &Operands,
5561 const unsigned CPol) {
5562 const unsigned TH = CPol & AMDGPU::CPol::TH;
5563 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5564
5565 const unsigned Opcode = Inst.getOpcode();
5566 const MCInstrDesc &TID = MII.get(Opcode);
5567
5568 auto PrintError = [&](StringRef Msg) {
5569 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5570 Error(S, Msg);
5571 return false;
5572 };
5573
5574 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5576 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5577
5578 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5581 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5582
5583 if (TH == 0)
5584 return true;
5585
5586 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5587 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5588 (TH == AMDGPU::CPol::TH_NT_HT)))
5589 return PrintError("invalid th value for SMEM instruction");
5590
5591 if (TH == AMDGPU::CPol::TH_BYPASS) {
5592 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5594 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5596 return PrintError("scope and th combination is not valid");
5597 }
5598
5599 unsigned THType = AMDGPU::getTemporalHintType(TID);
5600 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5601 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5602 return PrintError("invalid th value for atomic instructions");
5603 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5604 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5605 return PrintError("invalid th value for store instructions");
5606 } else {
5607 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5608 return PrintError("invalid th value for load instructions");
5609 }
5610
5611 return true;
5612}
5613
5614bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5615 const OperandVector &Operands) {
5616 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5617 if (Desc.mayStore() &&
5619 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5620 if (Loc != getInstLoc(Operands)) {
5621 Error(Loc, "TFE modifier has no meaning for store instructions");
5622 return false;
5623 }
5624 }
5625
5626 return true;
5627}
5628
5629bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5630 const OperandVector &Operands) {
5631 unsigned Opc = Inst.getOpcode();
5632 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5633 const MCInstrDesc &Desc = MII.get(Opc);
5634
5635 int AFmtIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
5636 if (AFmtIdx == -1)
5637 return true;
5638 unsigned AFmt = Inst.getOperand(AFmtIdx).getImm();
5639 int BFmtIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
5640 unsigned BFmt = Inst.getOperand(BFmtIdx).getImm();
5641
5642 auto validateFmt = [&](unsigned Fmt, AMDGPU::OpName SrcOp) -> bool {
5643 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5644 unsigned RegSize =
5645 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5646 .getSizeInBits();
5647
5649 return true;
5650
5651 Error(getOperandLoc(Operands, SrcIdx),
5652 "wrong register tuple size for " +
5653 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5654 return false;
5655 };
5656
5657 if (!validateFmt(AFmt, AMDGPU::OpName::src0) ||
5658 !validateFmt(BFmt, AMDGPU::OpName::src1))
5659 return false;
5660
5661 int AScaleIdx =
5662 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
5663 if (AScaleIdx == -1)
5664 return true;
5665 unsigned AScale = Inst.getOperand(AScaleIdx).getImm();
5666 int BScaleIdx =
5667 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
5668 unsigned BScale = Inst.getOperand(BScaleIdx).getImm();
5669 if (!isValidWMMAScaleFmtCombination(AFmt, AScale, BFmt, BScale)) {
5670 Error(getImmLoc(AMDGPUOperand::ImmTyMatrixAFMT, Operands),
5671 "invalid matrix and scale format combination");
5672 return false;
5673 }
5674
5675 return true;
5676}
5677
5678bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5679 const OperandVector &Operands) {
5680 if (!validateLdsDirect(Inst, Operands))
5681 return false;
5682 if (!validateTrue16OpSel(Inst)) {
5683 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5684 "op_sel operand conflicts with 16-bit operand suffix");
5685 return false;
5686 }
5687 if (!validateSOPLiteral(Inst, Operands))
5688 return false;
5689 if (!validateVOPLiteral(Inst, Operands)) {
5690 return false;
5691 }
5692 if (!validateConstantBusLimitations(Inst, Operands)) {
5693 return false;
5694 }
5695 if (!validateVOPD(Inst, Operands)) {
5696 return false;
5697 }
5698 if (!validateIntClampSupported(Inst)) {
5699 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5700 "integer clamping is not supported on this GPU");
5701 return false;
5702 }
5703 if (!validateOpSel(Inst)) {
5704 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5705 "invalid op_sel operand");
5706 return false;
5707 }
5708 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5709 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5710 "invalid neg_lo operand");
5711 return false;
5712 }
5713 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5714 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5715 "invalid neg_hi operand");
5716 return false;
5717 }
5718 if (!validateDPP(Inst, Operands)) {
5719 return false;
5720 }
5721 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5722 if (!validateMIMGD16(Inst)) {
5723 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5724 "d16 modifier is not supported on this GPU");
5725 return false;
5726 }
5727 if (!validateMIMGDim(Inst, Operands)) {
5728 Error(IDLoc, "missing dim operand");
5729 return false;
5730 }
5731 if (!validateTensorR128(Inst)) {
5732 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5733 "instruction must set modifier r128=0");
5734 return false;
5735 }
5736 if (!validateMIMGMSAA(Inst)) {
5737 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5738 "invalid dim; must be MSAA type");
5739 return false;
5740 }
5741 if (!validateMIMGDataSize(Inst, IDLoc)) {
5742 return false;
5743 }
5744 if (!validateMIMGAddrSize(Inst, IDLoc))
5745 return false;
5746 if (!validateMIMGAtomicDMask(Inst)) {
5747 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5748 "invalid atomic image dmask");
5749 return false;
5750 }
5751 if (!validateMIMGGatherDMask(Inst)) {
5752 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5753 "invalid image_gather dmask: only one bit must be set");
5754 return false;
5755 }
5756 if (!validateMovrels(Inst, Operands)) {
5757 return false;
5758 }
5759 if (!validateOffset(Inst, Operands)) {
5760 return false;
5761 }
5762 if (!validateMAIAccWrite(Inst, Operands)) {
5763 return false;
5764 }
5765 if (!validateMAISrc2(Inst, Operands)) {
5766 return false;
5767 }
5768 if (!validateMFMA(Inst, Operands)) {
5769 return false;
5770 }
5771 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5772 return false;
5773 }
5774
5775 if (!validateAGPRLdSt(Inst)) {
5776 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5777 ? "invalid register class: data and dst should be all VGPR or AGPR"
5778 : "invalid register class: agpr loads and stores not supported on this GPU"
5779 );
5780 return false;
5781 }
5782 if (!validateVGPRAlign(Inst)) {
5783 Error(IDLoc,
5784 "invalid register class: vgpr tuples must be 64 bit aligned");
5785 return false;
5786 }
5787 if (!validateDS(Inst, Operands)) {
5788 return false;
5789 }
5790
5791 if (!validateBLGP(Inst, Operands)) {
5792 return false;
5793 }
5794
5795 if (!validateDivScale(Inst)) {
5796 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5797 return false;
5798 }
5799 if (!validateWaitCnt(Inst, Operands)) {
5800 return false;
5801 }
5802 if (!validateTFE(Inst, Operands)) {
5803 return false;
5804 }
5805 if (!validateWMMA(Inst, Operands)) {
5806 return false;
5807 }
5808
5809 return true;
5810}
5811
5813 const FeatureBitset &FBS,
5814 unsigned VariantID = 0);
5815
5816static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5817 const FeatureBitset &AvailableFeatures,
5818 unsigned VariantID);
5819
5820bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5821 const FeatureBitset &FBS) {
5822 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5823}
5824
5825bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5826 const FeatureBitset &FBS,
5827 ArrayRef<unsigned> Variants) {
5828 for (auto Variant : Variants) {
5829 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5830 return true;
5831 }
5832
5833 return false;
5834}
5835
5836bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5837 SMLoc IDLoc) {
5838 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5839
5840 // Check if requested instruction variant is supported.
5841 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5842 return false;
5843
5844 // This instruction is not supported.
5845 // Clear any other pending errors because they are no longer relevant.
5846 getParser().clearPendingErrors();
5847
5848 // Requested instruction variant is not supported.
5849 // Check if any other variants are supported.
5850 StringRef VariantName = getMatchedVariantName();
5851 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5852 return Error(IDLoc,
5853 Twine(VariantName,
5854 " variant of this instruction is not supported"));
5855 }
5856
5857 // Check if this instruction may be used with a different wavesize.
5858 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5859 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5860 // FIXME: Use getAvailableFeatures, and do not manually recompute
5861 FeatureBitset FeaturesWS32 = getFeatureBits();
5862 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5863 .flip(AMDGPU::FeatureWavefrontSize32);
5864 FeatureBitset AvailableFeaturesWS32 =
5865 ComputeAvailableFeatures(FeaturesWS32);
5866
5867 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5868 return Error(IDLoc, "instruction requires wavesize=32");
5869 }
5870
5871 // Finally check if this instruction is supported on any other GPU.
5872 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5873 return Error(IDLoc, "instruction not supported on this GPU (" +
5874 getSTI().getCPU() + ")" + ": " + Mnemo);
5875 }
5876
5877 // Instruction not supported on any GPU. Probably a typo.
5878 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5879 return Error(IDLoc, "invalid instruction" + Suggestion);
5880}
5881
5882static bool isInvalidVOPDY(const OperandVector &Operands,
5883 uint64_t InvalidOprIdx) {
5884 assert(InvalidOprIdx < Operands.size());
5885 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5886 if (Op.isToken() && InvalidOprIdx > 1) {
5887 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5888 return PrevOp.isToken() && PrevOp.getToken() == "::";
5889 }
5890 return false;
5891}
5892
5893bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5894 OperandVector &Operands,
5895 MCStreamer &Out,
5896 uint64_t &ErrorInfo,
5897 bool MatchingInlineAsm) {
5898 MCInst Inst;
5899 Inst.setLoc(IDLoc);
5900 unsigned Result = Match_Success;
5901 for (auto Variant : getMatchedVariants()) {
5902 uint64_t EI;
5903 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5904 Variant);
5905 // We order match statuses from least to most specific. We use most specific
5906 // status as resulting
5907 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5908 if (R == Match_Success || R == Match_MissingFeature ||
5909 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5910 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5911 Result != Match_MissingFeature)) {
5912 Result = R;
5913 ErrorInfo = EI;
5914 }
5915 if (R == Match_Success)
5916 break;
5917 }
5918
5919 if (Result == Match_Success) {
5920 if (!validateInstruction(Inst, IDLoc, Operands)) {
5921 return true;
5922 }
5923 Out.emitInstruction(Inst, getSTI());
5924 return false;
5925 }
5926
5927 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5928 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5929 return true;
5930 }
5931
5932 switch (Result) {
5933 default: break;
5934 case Match_MissingFeature:
5935 // It has been verified that the specified instruction
5936 // mnemonic is valid. A match was found but it requires
5937 // features which are not supported on this GPU.
5938 return Error(IDLoc, "operands are not valid for this GPU or mode");
5939
5940 case Match_InvalidOperand: {
5941 SMLoc ErrorLoc = IDLoc;
5942 if (ErrorInfo != ~0ULL) {
5943 if (ErrorInfo >= Operands.size()) {
5944 return Error(IDLoc, "too few operands for instruction");
5945 }
5946 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5947 if (ErrorLoc == SMLoc())
5948 ErrorLoc = IDLoc;
5949
5950 if (isInvalidVOPDY(Operands, ErrorInfo))
5951 return Error(ErrorLoc, "invalid VOPDY instruction");
5952 }
5953 return Error(ErrorLoc, "invalid operand for instruction");
5954 }
5955
5956 case Match_MnemonicFail:
5957 llvm_unreachable("Invalid instructions should have been handled already");
5958 }
5959 llvm_unreachable("Implement any new match types added!");
5960}
5961
5962bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5963 int64_t Tmp = -1;
5964 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5965 return true;
5966 }
5967 if (getParser().parseAbsoluteExpression(Tmp)) {
5968 return true;
5969 }
5970 Ret = static_cast<uint32_t>(Tmp);
5971 return false;
5972}
5973
5974bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5975 if (!getSTI().getTargetTriple().isAMDGCN())
5976 return TokError("directive only supported for amdgcn architecture");
5977
5978 std::string TargetIDDirective;
5979 SMLoc TargetStart = getTok().getLoc();
5980 if (getParser().parseEscapedString(TargetIDDirective))
5981 return true;
5982
5983 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5984 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5985 return getParser().Error(
5986 TargetRange.Start,
5987 (Twine(".amdgcn_target directive's target id ") +
5988 Twine(TargetIDDirective) +
5989 Twine(" does not match the specified target id ") +
5990 Twine(getTargetStreamer().getTargetID()->toString())));
5991
5992 return false;
5993}
5994
5995bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5996 return Error(Range.Start, "value out of range", Range);
5997}
5998
5999bool AMDGPUAsmParser::calculateGPRBlocks(
6000 const FeatureBitset &Features, const MCExpr *VCCUsed,
6001 const MCExpr *FlatScrUsed, bool XNACKUsed,
6002 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
6003 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
6004 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
6005 // TODO(scott.linder): These calculations are duplicated from
6006 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
6007 IsaVersion Version = getIsaVersion(getSTI().getCPU());
6008 MCContext &Ctx = getContext();
6009
6010 const MCExpr *NumSGPRs = NextFreeSGPR;
6011 int64_t EvaluatedSGPRs;
6012
6013 if (Version.Major >= 10)
6015 else {
6016 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(getSTI());
6017
6018 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
6019 !Features.test(FeatureSGPRInitBug) &&
6020 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6021 return OutOfRangeError(SGPRRange);
6022
6023 const MCExpr *ExtraSGPRs =
6024 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
6025 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
6026
6027 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
6028 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
6029 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6030 return OutOfRangeError(SGPRRange);
6031
6032 if (Features.test(FeatureSGPRInitBug))
6033 NumSGPRs =
6035 }
6036
6037 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
6038 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
6039 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
6040 unsigned Granule) -> const MCExpr * {
6041 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
6042 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
6043 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
6044 const MCExpr *AlignToGPR =
6045 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
6046 const MCExpr *DivGPR =
6047 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
6048 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
6049 return SubGPR;
6050 };
6051
6052 VGPRBlocks = GetNumGPRBlocks(
6053 NextFreeVGPR,
6054 IsaInfo::getVGPREncodingGranule(getSTI(), EnableWavefrontSize32));
6055 SGPRBlocks =
6056 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(getSTI()));
6057
6058 return false;
6059}
6060
6061bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6062 if (!getSTI().getTargetTriple().isAMDGCN())
6063 return TokError("directive only supported for amdgcn architecture");
6064
6065 if (!isHsaAbi(getSTI()))
6066 return TokError("directive only supported for amdhsa OS");
6067
6068 StringRef KernelName;
6069 if (getParser().parseIdentifier(KernelName))
6070 return true;
6071
6072 AMDGPU::MCKernelDescriptor KD =
6074 &getSTI(), getContext());
6075
6076 StringSet<> Seen;
6077
6078 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
6079
6080 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
6081 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
6082
6083 SMRange VGPRRange;
6084 const MCExpr *NextFreeVGPR = ZeroExpr;
6085 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
6086 const MCExpr *NamedBarCnt = ZeroExpr;
6087 uint64_t SharedVGPRCount = 0;
6088 uint64_t PreloadLength = 0;
6089 uint64_t PreloadOffset = 0;
6090 SMRange SGPRRange;
6091 const MCExpr *NextFreeSGPR = ZeroExpr;
6092
6093 // Count the number of user SGPRs implied from the enabled feature bits.
6094 unsigned ImpliedUserSGPRCount = 0;
6095
6096 // Track if the asm explicitly contains the directive for the user SGPR
6097 // count.
6098 std::optional<unsigned> ExplicitUserSGPRCount;
6099 const MCExpr *ReserveVCC = OneExpr;
6100 const MCExpr *ReserveFlatScr = OneExpr;
6101 std::optional<bool> EnableWavefrontSize32;
6102
6103 while (true) {
6104 while (trySkipToken(AsmToken::EndOfStatement));
6105
6106 StringRef ID;
6107 SMRange IDRange = getTok().getLocRange();
6108 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6109 return true;
6110
6111 if (ID == ".end_amdhsa_kernel")
6112 break;
6113
6114 if (!Seen.insert(ID).second)
6115 return TokError(".amdhsa_ directives cannot be repeated");
6116
6117 SMLoc ValStart = getLoc();
6118 const MCExpr *ExprVal;
6119 if (getParser().parseExpression(ExprVal))
6120 return true;
6121 SMLoc ValEnd = getLoc();
6122 SMRange ValRange = SMRange(ValStart, ValEnd);
6123
6124 int64_t IVal = 0;
6125 uint64_t Val = IVal;
6126 bool EvaluatableExpr;
6127 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6128 if (IVal < 0)
6129 return OutOfRangeError(ValRange);
6130 Val = IVal;
6131 }
6132
6133#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6134 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6135 return OutOfRangeError(RANGE); \
6136 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6137 getContext());
6138
6139// Some fields use the parsed value immediately which requires the expression to
6140// be solvable.
6141#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6142 if (!(RESOLVED)) \
6143 return Error(IDRange.Start, "directive should have resolvable expression", \
6144 IDRange);
6145
6146 if (ID == ".amdhsa_group_segment_fixed_size") {
6148 CHAR_BIT>(Val))
6149 return OutOfRangeError(ValRange);
6150 KD.group_segment_fixed_size = ExprVal;
6151 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6153 CHAR_BIT>(Val))
6154 return OutOfRangeError(ValRange);
6155 KD.private_segment_fixed_size = ExprVal;
6156 } else if (ID == ".amdhsa_kernarg_size") {
6157 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6158 return OutOfRangeError(ValRange);
6159 KD.kernarg_size = ExprVal;
6160 } else if (ID == ".amdhsa_user_sgpr_count") {
6161 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6162 ExplicitUserSGPRCount = Val;
6163 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6164 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6166 return Error(IDRange.Start,
6167 "directive is not supported with architected flat scratch",
6168 IDRange);
6170 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6171 ExprVal, ValRange);
6172 if (Val)
6173 ImpliedUserSGPRCount += 4;
6174 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6175 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6176 if (!hasKernargPreload())
6177 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6178
6179 if (Val > getMaxNumUserSGPRs())
6180 return OutOfRangeError(ValRange);
6181 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6182 ValRange);
6183 if (Val) {
6184 ImpliedUserSGPRCount += Val;
6185 PreloadLength = Val;
6186 }
6187 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6188 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6189 if (!hasKernargPreload())
6190 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6191
6192 if (Val >= 1024)
6193 return OutOfRangeError(ValRange);
6194 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6195 ValRange);
6196 if (Val)
6197 PreloadOffset = Val;
6198 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6199 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6201 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6202 ValRange);
6203 if (Val)
6204 ImpliedUserSGPRCount += 2;
6205 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6206 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6208 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6209 ValRange);
6210 if (Val)
6211 ImpliedUserSGPRCount += 2;
6212 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6213 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6215 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6216 ExprVal, ValRange);
6217 if (Val)
6218 ImpliedUserSGPRCount += 2;
6219 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6220 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6222 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6223 ValRange);
6224 if (Val)
6225 ImpliedUserSGPRCount += 2;
6226 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6228 return Error(IDRange.Start,
6229 "directive is not supported with architected flat scratch",
6230 IDRange);
6231 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6233 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6234 ExprVal, ValRange);
6235 if (Val)
6236 ImpliedUserSGPRCount += 2;
6237 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6238 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6240 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6241 ExprVal, ValRange);
6242 if (Val)
6243 ImpliedUserSGPRCount += 1;
6244 } else if (ID == ".amdhsa_wavefront_size32") {
6245 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6246 if (IVersion.Major < 10)
6247 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6248 EnableWavefrontSize32 = Val;
6250 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6251 ValRange);
6252 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6254 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6255 ValRange);
6256 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6258 return Error(IDRange.Start,
6259 "directive is not supported with architected flat scratch",
6260 IDRange);
6262 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6263 ValRange);
6264 } else if (ID == ".amdhsa_enable_private_segment") {
6266 return Error(
6267 IDRange.Start,
6268 "directive is not supported without architected flat scratch",
6269 IDRange);
6271 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6272 ValRange);
6273 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6275 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6276 ValRange);
6277 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6279 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6280 ValRange);
6281 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6283 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6284 ValRange);
6285 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6287 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6288 ValRange);
6289 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6291 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6292 ValRange);
6293 } else if (ID == ".amdhsa_next_free_vgpr") {
6294 VGPRRange = ValRange;
6295 NextFreeVGPR = ExprVal;
6296 } else if (ID == ".amdhsa_next_free_sgpr") {
6297 SGPRRange = ValRange;
6298 NextFreeSGPR = ExprVal;
6299 } else if (ID == ".amdhsa_accum_offset") {
6300 if (!isGFX90A())
6301 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6302 AccumOffset = ExprVal;
6303 } else if (ID == ".amdhsa_named_barrier_count") {
6304 if (!isGFX1250Plus())
6305 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6306 NamedBarCnt = ExprVal;
6307 } else if (ID == ".amdhsa_reserve_vcc") {
6308 if (EvaluatableExpr && !isUInt<1>(Val))
6309 return OutOfRangeError(ValRange);
6310 ReserveVCC = ExprVal;
6311 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6312 if (IVersion.Major < 7)
6313 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6315 return Error(IDRange.Start,
6316 "directive is not supported with architected flat scratch",
6317 IDRange);
6318 if (EvaluatableExpr && !isUInt<1>(Val))
6319 return OutOfRangeError(ValRange);
6320 ReserveFlatScr = ExprVal;
6321 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6322 if (IVersion.Major < 8)
6323 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6324 if (!isUInt<1>(Val))
6325 return OutOfRangeError(ValRange);
6326 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6327 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6328 IDRange);
6329 } else if (ID == ".amdhsa_float_round_mode_32") {
6331 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6332 ValRange);
6333 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6335 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6336 ValRange);
6337 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6339 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6340 ValRange);
6341 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6343 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6344 ValRange);
6345 } else if (ID == ".amdhsa_dx10_clamp") {
6346 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6347 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6348 IDRange);
6350 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6351 ValRange);
6352 } else if (ID == ".amdhsa_ieee_mode") {
6353 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6354 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6355 IDRange);
6357 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6358 ValRange);
6359 } else if (ID == ".amdhsa_fp16_overflow") {
6360 if (IVersion.Major < 9)
6361 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6363 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6364 ValRange);
6365 } else if (ID == ".amdhsa_tg_split") {
6366 if (!isGFX90A())
6367 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6368 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6369 ExprVal, ValRange);
6370 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6371 if (!supportsWGP(getSTI()))
6372 return Error(IDRange.Start,
6373 "directive unsupported on " + getSTI().getCPU(), IDRange);
6375 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6376 ValRange);
6377 } else if (ID == ".amdhsa_memory_ordered") {
6378 if (IVersion.Major < 10)
6379 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6381 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6382 ValRange);
6383 } else if (ID == ".amdhsa_forward_progress") {
6384 if (IVersion.Major < 10)
6385 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6387 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6388 ValRange);
6389 } else if (ID == ".amdhsa_shared_vgpr_count") {
6390 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6391 if (IVersion.Major < 10 || IVersion.Major >= 12)
6392 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6393 IDRange);
6394 SharedVGPRCount = Val;
6396 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6397 ValRange);
6398 } else if (ID == ".amdhsa_inst_pref_size") {
6399 if (IVersion.Major < 11)
6400 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6401 if (IVersion.Major == 11) {
6403 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6404 ValRange);
6405 } else {
6407 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6408 ValRange);
6409 }
6410 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6413 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6414 ExprVal, ValRange);
6415 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6417 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6418 ExprVal, ValRange);
6419 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6422 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6423 ExprVal, ValRange);
6424 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6426 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6427 ExprVal, ValRange);
6428 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6430 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6431 ExprVal, ValRange);
6432 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6434 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6435 ExprVal, ValRange);
6436 } else if (ID == ".amdhsa_exception_int_div_zero") {
6438 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6439 ExprVal, ValRange);
6440 } else if (ID == ".amdhsa_round_robin_scheduling") {
6441 if (IVersion.Major < 12)
6442 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6444 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6445 ValRange);
6446 } else {
6447 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6448 }
6449
6450#undef PARSE_BITS_ENTRY
6451 }
6452
6453 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6454 return TokError(".amdhsa_next_free_vgpr directive is required");
6455
6456 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6457 return TokError(".amdhsa_next_free_sgpr directive is required");
6458
6459 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6460 if (UserSGPRCount > getMaxNumUserSGPRs())
6461 return TokError("too many user SGPRs enabled, found " +
6462 Twine(UserSGPRCount) + ", but only " +
6463 Twine(getMaxNumUserSGPRs()) + " are supported.");
6464
6465 // Consider the case where the total number of UserSGPRs with trailing
6466 // allocated preload SGPRs, is greater than the number of explicitly
6467 // referenced SGPRs.
6468 if (PreloadLength) {
6469 MCContext &Ctx = getContext();
6470 NextFreeSGPR = AMDGPUMCExpr::createMax(
6471 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6472 }
6473
6474 const MCExpr *VGPRBlocks;
6475 const MCExpr *SGPRBlocks;
6476 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6477 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6478 EnableWavefrontSize32, NextFreeVGPR,
6479 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6480 SGPRBlocks))
6481 return true;
6482
6483 int64_t EvaluatedVGPRBlocks;
6484 bool VGPRBlocksEvaluatable =
6485 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6486 if (VGPRBlocksEvaluatable &&
6488 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6489 return OutOfRangeError(VGPRRange);
6490 }
6492 KD.compute_pgm_rsrc1, VGPRBlocks,
6493 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6494 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6495
6496 int64_t EvaluatedSGPRBlocks;
6497 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6499 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6500 return OutOfRangeError(SGPRRange);
6502 KD.compute_pgm_rsrc1, SGPRBlocks,
6503 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6504 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6505
6506 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6507 return TokError("amdgpu_user_sgpr_count smaller than implied by "
6508 "enabled user SGPRs");
6509
6510 if (isGFX1250Plus()) {
6513 MCConstantExpr::create(UserSGPRCount, getContext()),
6514 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6515 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6516 } else {
6519 MCConstantExpr::create(UserSGPRCount, getContext()),
6520 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6521 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6522 }
6523
6524 int64_t IVal = 0;
6525 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6526 return TokError("Kernarg size should be resolvable");
6527 uint64_t kernarg_size = IVal;
6528 if (PreloadLength && kernarg_size &&
6529 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6530 return TokError("Kernarg preload length + offset is larger than the "
6531 "kernarg segment size");
6532
6533 if (isGFX90A()) {
6534 if (!Seen.contains(".amdhsa_accum_offset"))
6535 return TokError(".amdhsa_accum_offset directive is required");
6536 int64_t EvaluatedAccum;
6537 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6538 uint64_t UEvaluatedAccum = EvaluatedAccum;
6539 if (AccumEvaluatable &&
6540 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6541 return TokError("accum_offset should be in range [4..256] in "
6542 "increments of 4");
6543
6544 int64_t EvaluatedNumVGPR;
6545 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6546 AccumEvaluatable &&
6547 UEvaluatedAccum >
6548 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6549 return TokError("accum_offset exceeds total VGPR allocation");
6550 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6552 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6555 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6556 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6557 getContext());
6558 }
6559
6560 if (isGFX1250Plus())
6562 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6563 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6564 getContext());
6565
6566 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6567 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6568 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6569 return TokError("shared_vgpr_count directive not valid on "
6570 "wavefront size 32");
6571 }
6572
6573 if (VGPRBlocksEvaluatable &&
6574 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6575 63)) {
6576 return TokError("shared_vgpr_count*2 + "
6577 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6578 "exceed 63\n");
6579 }
6580 }
6581
6582 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6583 NextFreeVGPR, NextFreeSGPR,
6584 ReserveVCC, ReserveFlatScr);
6585 return false;
6586}
6587
6588bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6589 uint32_t Version;
6590 if (ParseAsAbsoluteExpression(Version))
6591 return true;
6592
6593 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6594 return false;
6595}
6596
6597bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6598 AMDGPUMCKernelCodeT &C) {
6599 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6600 // assembly for backwards compatibility.
6601 if (ID == "max_scratch_backing_memory_byte_size") {
6602 Parser.eatToEndOfStatement();
6603 return false;
6604 }
6605
6606 SmallString<40> ErrStr;
6607 raw_svector_ostream Err(ErrStr);
6608 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6609 return TokError(Err.str());
6610 }
6611 Lex();
6612
6613 if (ID == "enable_wavefront_size32") {
6614 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6615 if (!isGFX10Plus())
6616 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6617 if (!isWave32())
6618 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6619 } else {
6620 if (!isWave64())
6621 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6622 }
6623 }
6624
6625 if (ID == "wavefront_size") {
6626 if (C.wavefront_size == 5) {
6627 if (!isGFX10Plus())
6628 return TokError("wavefront_size=5 is only allowed on GFX10+");
6629 if (!isWave32())
6630 return TokError("wavefront_size=5 requires +WavefrontSize32");
6631 } else if (C.wavefront_size == 6) {
6632 if (!isWave64())
6633 return TokError("wavefront_size=6 requires +WavefrontSize64");
6634 }
6635 }
6636
6637 return false;
6638}
6639
6640bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6641 AMDGPUMCKernelCodeT KernelCode;
6642 KernelCode.initDefault(getSTI(), getContext());
6643
6644 while (true) {
6645 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6646 // will set the current token to EndOfStatement.
6647 while(trySkipToken(AsmToken::EndOfStatement));
6648
6649 StringRef ID;
6650 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6651 return true;
6652
6653 if (ID == ".end_amd_kernel_code_t")
6654 break;
6655
6656 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6657 return true;
6658 }
6659
6660 KernelCode.validate(&getSTI(), getContext());
6661 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6662
6663 return false;
6664}
6665
6666bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6667 StringRef KernelName;
6668 if (!parseId(KernelName, "expected symbol name"))
6669 return true;
6670
6671 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6673
6674 KernelScope.initialize(getContext());
6675 return false;
6676}
6677
6678bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6679 if (!getSTI().getTargetTriple().isAMDGCN()) {
6680 return Error(getLoc(),
6681 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6682 "architectures");
6683 }
6684
6685 auto TargetIDDirective = getLexer().getTok().getStringContents();
6686 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6687 return Error(getParser().getTok().getLoc(), "target id must match options");
6688
6689 getTargetStreamer().EmitISAVersion();
6690 Lex();
6691
6692 return false;
6693}
6694
6695bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6696 assert(isHsaAbi(getSTI()));
6697
6698 std::string HSAMetadataString;
6699 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6700 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6701 return true;
6702
6703 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6704 return Error(getLoc(), "invalid HSA metadata");
6705
6706 return false;
6707}
6708
6709/// Common code to parse out a block of text (typically YAML) between start and
6710/// end directives.
6711bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6712 const char *AssemblerDirectiveEnd,
6713 std::string &CollectString) {
6714
6715 raw_string_ostream CollectStream(CollectString);
6716
6717 getLexer().setSkipSpace(false);
6718
6719 bool FoundEnd = false;
6720 while (!isToken(AsmToken::Eof)) {
6721 while (isToken(AsmToken::Space)) {
6722 CollectStream << getTokenStr();
6723 Lex();
6724 }
6725
6726 if (trySkipId(AssemblerDirectiveEnd)) {
6727 FoundEnd = true;
6728 break;
6729 }
6730
6731 CollectStream << Parser.parseStringToEndOfStatement()
6732 << getContext().getAsmInfo().getSeparatorString();
6733
6734 Parser.eatToEndOfStatement();
6735 }
6736
6737 getLexer().setSkipSpace(true);
6738
6739 if (isToken(AsmToken::Eof) && !FoundEnd) {
6740 return TokError(Twine("expected directive ") +
6741 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6742 }
6743
6744 return false;
6745}
6746
6747/// Parse the assembler directive for new MsgPack-format PAL metadata.
6748bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6749 std::string String;
6750 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6752 return true;
6753
6754 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6755 if (!PALMetadata->setFromString(String))
6756 return Error(getLoc(), "invalid PAL metadata");
6757 return false;
6758}
6759
6760/// Parse the assembler directive for old linear-format PAL metadata.
6761bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6762 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6763 return Error(getLoc(),
6764 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6765 "not available on non-amdpal OSes")).str());
6766 }
6767
6768 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6769 PALMetadata->setLegacy();
6770 for (;;) {
6771 uint32_t Key, Value;
6772 if (ParseAsAbsoluteExpression(Key)) {
6773 return TokError(Twine("invalid value in ") +
6775 }
6776 if (!trySkipToken(AsmToken::Comma)) {
6777 return TokError(Twine("expected an even number of values in ") +
6779 }
6780 if (ParseAsAbsoluteExpression(Value)) {
6781 return TokError(Twine("invalid value in ") +
6783 }
6784 PALMetadata->setRegister(Key, Value);
6785 if (!trySkipToken(AsmToken::Comma))
6786 break;
6787 }
6788 return false;
6789}
6790
6791/// ParseDirectiveAMDGPULDS
6792/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6793bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6794 if (getParser().checkForValidSection())
6795 return true;
6796
6797 StringRef Name;
6798 SMLoc NameLoc = getLoc();
6799 if (getParser().parseIdentifier(Name))
6800 return TokError("expected identifier in directive");
6801
6802 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6803 if (getParser().parseComma())
6804 return true;
6805
6806 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(getSTI());
6807
6808 int64_t Size;
6809 SMLoc SizeLoc = getLoc();
6810 if (getParser().parseAbsoluteExpression(Size))
6811 return true;
6812 if (Size < 0)
6813 return Error(SizeLoc, "size must be non-negative");
6814 if (Size > LocalMemorySize)
6815 return Error(SizeLoc, "size is too large");
6816
6817 int64_t Alignment = 4;
6818 if (trySkipToken(AsmToken::Comma)) {
6819 SMLoc AlignLoc = getLoc();
6820 if (getParser().parseAbsoluteExpression(Alignment))
6821 return true;
6822 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6823 return Error(AlignLoc, "alignment must be a power of two");
6824
6825 // Alignment larger than the size of LDS is possible in theory, as long
6826 // as the linker manages to place to symbol at address 0, but we do want
6827 // to make sure the alignment fits nicely into a 32-bit integer.
6828 if (Alignment >= 1u << 31)
6829 return Error(AlignLoc, "alignment is too large");
6830 }
6831
6832 if (parseEOL())
6833 return true;
6834
6835 Symbol->redefineIfPossible();
6836 if (!Symbol->isUndefined())
6837 return Error(NameLoc, "invalid symbol redefinition");
6838
6839 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6840 return false;
6841}
6842
6843bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6844 if (getParser().checkForValidSection())
6845 return true;
6846
6847 StringRef FuncName;
6848 if (getParser().parseIdentifier(FuncName))
6849 return TokError("expected symbol name after .amdgpu_info");
6850
6851 MCSymbol *FuncSym = getContext().getOrCreateSymbol(FuncName);
6852 AMDGPU::InfoSectionData ParsedInfoData;
6853 AMDGPU::FuncInfo FI;
6854 FI.Sym = FuncSym;
6855 bool HasScalarAttrs = false;
6856
6857 while (true) {
6858 while (trySkipToken(AsmToken::EndOfStatement))
6859 ;
6860
6861 StringRef ID;
6862 SMLoc IDLoc = getLoc();
6863 if (!parseId(ID, "expected directive or .end_amdgpu_info"))
6864 return true;
6865
6866 if (ID == ".end_amdgpu_info")
6867 break;
6868
6869 // Every per-entry directive shares the `.amdgpu_` namespace prefix; strip
6870 // it once and dispatch on the distinguishing suffix below. The unstripped
6871 // ID is preserved for diagnostics.
6872 StringRef Dir = ID;
6873 if (!Dir.consume_front(".amdgpu_"))
6874 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6875
6876 if (Dir == "flags") {
6877 int64_t Val;
6878 if (getParser().parseAbsoluteExpression(Val))
6879 return true;
6880 auto Flags = static_cast<AMDGPU::FuncInfoFlags>(Val);
6881 FI.UsesVCC = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6882 FI.UsesFlatScratch =
6883 !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6884 FI.HasDynStack = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_HAS_DYN_STACK);
6885 HasScalarAttrs = true;
6886 } else if (Dir == "num_sgpr") {
6887 int64_t Val;
6888 if (getParser().parseAbsoluteExpression(Val))
6889 return true;
6890 FI.NumSGPR = static_cast<uint32_t>(Val);
6891 HasScalarAttrs = true;
6892 } else if (Dir == "num_vgpr") {
6893 int64_t Val;
6894 if (getParser().parseAbsoluteExpression(Val))
6895 return true;
6896 FI.NumArchVGPR = static_cast<uint32_t>(Val);
6897 HasScalarAttrs = true;
6898 } else if (Dir == "num_agpr") {
6899 int64_t Val;
6900 if (getParser().parseAbsoluteExpression(Val))
6901 return true;
6902 FI.NumAccVGPR = static_cast<uint32_t>(Val);
6903 HasScalarAttrs = true;
6904 } else if (Dir == "private_segment_size") {
6905 int64_t Val;
6906 if (getParser().parseAbsoluteExpression(Val))
6907 return true;
6908 FI.PrivateSegmentSize = static_cast<uint32_t>(Val);
6909 HasScalarAttrs = true;
6910 } else if (Dir == "use") {
6911 StringRef ResName;
6912 if (getParser().parseIdentifier(ResName))
6913 return TokError("expected resource symbol for .amdgpu_use");
6914 ParsedInfoData.Uses.push_back(
6915 {FuncSym, getContext().getOrCreateSymbol(ResName)});
6916 } else if (Dir == "call") {
6917 StringRef DstName;
6918 if (getParser().parseIdentifier(DstName))
6919 return TokError("expected callee symbol for .amdgpu_call");
6920 ParsedInfoData.Calls.push_back(
6921 {FuncSym, getContext().getOrCreateSymbol(DstName)});
6922 } else if (Dir == "indirect_call") {
6923 std::string TypeId;
6924 if (getParser().parseEscapedString(TypeId))
6925 return TokError("expected type ID string for .amdgpu_indirect_call");
6926 ParsedInfoData.IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6927 } else if (Dir == "typeid") {
6928 std::string TypeId;
6929 if (getParser().parseEscapedString(TypeId))
6930 return TokError("expected type ID string for .amdgpu_typeid");
6931 ParsedInfoData.TypeIds.push_back({FuncSym, std::move(TypeId)});
6932 } else {
6933 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6934 }
6935 }
6936
6937 if (HasScalarAttrs)
6938 ParsedInfoData.Funcs.push_back(std::move(FI));
6939
6940 AMDGPU::InfoSectionData &Data = InfoData ? *InfoData : InfoData.emplace();
6941 for (AMDGPU::FuncInfo &Func : ParsedInfoData.Funcs)
6942 Data.Funcs.push_back(std::move(Func));
6943 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.Uses)
6944 Data.Uses.push_back(Use);
6945 for (std::pair<MCSymbol *, MCSymbol *> &Call : ParsedInfoData.Calls)
6946 Data.Calls.push_back(Call);
6947 for (std::pair<MCSymbol *, std::string> &IndirectCall :
6948 ParsedInfoData.IndirectCalls)
6949 Data.IndirectCalls.push_back(std::move(IndirectCall));
6950 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.TypeIds)
6951 Data.TypeIds.push_back(std::move(TypeId));
6952
6953 return false;
6954}
6955
6956void AMDGPUAsmParser::onEndOfFile() {
6957 if (InfoData)
6958 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6959}
6960
6961bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6962 StringRef IDVal = DirectiveID.getString();
6963
6964 if (isHsaAbi(getSTI())) {
6965 if (IDVal == ".amdhsa_kernel")
6966 return ParseDirectiveAMDHSAKernel();
6967
6968 if (IDVal == ".amdhsa_code_object_version")
6969 return ParseDirectiveAMDHSACodeObjectVersion();
6970
6971 // TODO: Restructure/combine with PAL metadata directive.
6973 return ParseDirectiveHSAMetadata();
6974 } else {
6975 if (IDVal == ".amd_kernel_code_t")
6976 return ParseDirectiveAMDKernelCodeT();
6977
6978 if (IDVal == ".amdgpu_hsa_kernel")
6979 return ParseDirectiveAMDGPUHsaKernel();
6980
6981 if (IDVal == ".amd_amdgpu_isa")
6982 return ParseDirectiveISAVersion();
6983
6985 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6986 Twine(" directive is "
6987 "not available on non-amdhsa OSes"))
6988 .str());
6989 }
6990 }
6991
6992 if (IDVal == ".amdgcn_target")
6993 return ParseDirectiveAMDGCNTarget();
6994
6995 if (IDVal == ".amdgpu_lds")
6996 return ParseDirectiveAMDGPULDS();
6997
6998 if (IDVal == ".amdgpu_info")
6999 return ParseDirectiveAMDGPUInfo();
7000
7001 if (IDVal == PALMD::AssemblerDirectiveBegin)
7002 return ParseDirectivePALMetadataBegin();
7003
7004 if (IDVal == PALMD::AssemblerDirective)
7005 return ParseDirectivePALMetadata();
7006
7007 return true;
7008}
7009
7010bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
7011 MCRegister Reg) {
7012 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
7013 return isGFX9Plus();
7014
7015 // GFX10+ has 2 more SGPRs 104 and 105.
7016 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
7017 return hasSGPR104_SGPR105();
7018
7019 switch (Reg.id()) {
7020 case SRC_SHARED_BASE_LO:
7021 case SRC_SHARED_BASE:
7022 case SRC_SHARED_LIMIT_LO:
7023 case SRC_SHARED_LIMIT:
7024 case SRC_PRIVATE_BASE_LO:
7025 case SRC_PRIVATE_BASE:
7026 case SRC_PRIVATE_LIMIT_LO:
7027 case SRC_PRIVATE_LIMIT:
7028 return isGFX9Plus();
7029 case SRC_FLAT_SCRATCH_BASE_LO:
7030 case SRC_FLAT_SCRATCH_BASE_HI:
7031 return hasGloballyAddressableScratch();
7032 case SRC_POPS_EXITING_WAVE_ID:
7033 return isGFX9Plus() && !isGFX11Plus();
7034 case TBA:
7035 case TBA_LO:
7036 case TBA_HI:
7037 case TMA:
7038 case TMA_LO:
7039 case TMA_HI:
7040 return !isGFX9Plus();
7041 case XNACK_MASK:
7042 case XNACK_MASK_LO:
7043 case XNACK_MASK_HI:
7044 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7045 case SGPR_NULL:
7046 return isGFX10Plus();
7047 case SRC_EXECZ:
7048 case SRC_VCCZ:
7049 return !isGFX11Plus();
7050 default:
7051 break;
7052 }
7053
7054 if (isCI())
7055 return true;
7056
7057 if (isSI() || isGFX10Plus()) {
7058 // No flat_scr on SI.
7059 // On GFX10Plus flat scratch is not a valid register operand and can only be
7060 // accessed with s_setreg/s_getreg.
7061 switch (Reg.id()) {
7062 case FLAT_SCR:
7063 case FLAT_SCR_LO:
7064 case FLAT_SCR_HI:
7065 return false;
7066 default:
7067 return true;
7068 }
7069 }
7070
7071 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
7072 // SI/CI have.
7073 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
7074 return hasSGPR102_SGPR103();
7075
7076 return true;
7077}
7078
7079ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
7080 StringRef Mnemonic,
7081 OperandMode Mode) {
7082 ParseStatus Res = parseVOPD(Operands);
7083 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7084 return Res;
7085
7086 // Try to parse with a custom parser
7087 Res = MatchOperandParserImpl(Operands, Mnemonic);
7088
7089 // If we successfully parsed the operand or if there as an error parsing,
7090 // we are done.
7091 //
7092 // If we are parsing after we reach EndOfStatement then this means we
7093 // are appending default values to the Operands list. This is only done
7094 // by custom parser, so we shouldn't continue on to the generic parsing.
7095 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7096 return Res;
7097
7098 SMLoc RBraceLoc;
7099 SMLoc LBraceLoc = getLoc();
7100 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
7101 unsigned Prefix = Operands.size();
7102
7103 for (;;) {
7104 auto Loc = getLoc();
7105 Res = parseReg(Operands);
7106 if (Res.isNoMatch())
7107 Error(Loc, "expected a register");
7108 if (!Res.isSuccess())
7109 return ParseStatus::Failure;
7110
7111 RBraceLoc = getLoc();
7112 if (trySkipToken(AsmToken::RBrac))
7113 break;
7114
7115 if (!skipToken(AsmToken::Comma,
7116 "expected a comma or a closing square bracket"))
7117 return ParseStatus::Failure;
7118 }
7119
7120 if (Operands.size() - Prefix > 1) {
7121 Operands.insert(Operands.begin() + Prefix,
7122 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
7123 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
7124 }
7125
7126 return ParseStatus::Success;
7127 }
7128
7129 return parseRegOrImm(Operands);
7130}
7131
7132StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7133 // Clear any forced encodings from the previous instruction.
7134 setForcedEncodingSize(0);
7135 setForcedDPP(false);
7136 setForcedSDWA(false);
7137
7138 if (Name.consume_back("_e64_dpp")) {
7139 setForcedDPP(true);
7140 setForcedEncodingSize(64);
7141 return Name;
7142 }
7143 if (Name.consume_back("_e64")) {
7144 setForcedEncodingSize(64);
7145 return Name;
7146 }
7147 if (Name.consume_back("_e32")) {
7148 setForcedEncodingSize(32);
7149 return Name;
7150 }
7151 if (Name.consume_back("_dpp")) {
7152 setForcedDPP(true);
7153 return Name;
7154 }
7155 if (Name.consume_back("_sdwa")) {
7156 setForcedSDWA(true);
7157 return Name;
7158 }
7159 return Name;
7160}
7161
7162static void applyMnemonicAliases(StringRef &Mnemonic,
7163 const FeatureBitset &Features,
7164 unsigned VariantID);
7165
7166bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
7167 StringRef Name, SMLoc NameLoc,
7168 OperandVector &Operands) {
7169 // Add the instruction mnemonic
7170 Name = parseMnemonicSuffix(Name);
7171
7172 // If the target architecture uses MnemonicAlias, call it here to parse
7173 // operands correctly.
7174 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
7175
7176 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
7177
7178 bool IsMIMG = Name.starts_with("image_");
7179
7180 while (!trySkipToken(AsmToken::EndOfStatement)) {
7181 OperandMode Mode = OperandMode_Default;
7182 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
7183 Mode = OperandMode_NSA;
7184 ParseStatus Res = parseOperand(Operands, Name, Mode);
7185
7186 if (!Res.isSuccess()) {
7187 checkUnsupportedInstruction(Name, NameLoc);
7188 if (!Parser.hasPendingError()) {
7189 // FIXME: use real operand location rather than the current location.
7190 StringRef Msg = Res.isFailure() ? "failed parsing operand."
7191 : "not a valid operand.";
7192 Error(getLoc(), Msg);
7193 }
7194 while (!trySkipToken(AsmToken::EndOfStatement)) {
7195 lex();
7196 }
7197 return true;
7198 }
7199
7200 // Eat the comma or space if there is one.
7201 trySkipToken(AsmToken::Comma);
7202 }
7203
7204 return false;
7205}
7206
7207//===----------------------------------------------------------------------===//
7208// Utility functions
7209//===----------------------------------------------------------------------===//
7210
7211ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7212 OperandVector &Operands) {
7213 SMLoc S = getLoc();
7214 if (!trySkipId(Name))
7215 return ParseStatus::NoMatch;
7216
7217 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7218 return ParseStatus::Success;
7219}
7220
7221ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7222 int64_t &IntVal) {
7223
7224 if (!trySkipId(Prefix, AsmToken::Colon))
7225 return ParseStatus::NoMatch;
7226
7228}
7229
7230ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7231 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7232 std::function<bool(int64_t &)> ConvertResult) {
7233 SMLoc S = getLoc();
7234 int64_t Value = 0;
7235
7236 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7237 if (!Res.isSuccess())
7238 return Res;
7239
7240 if (ConvertResult && !ConvertResult(Value)) {
7241 Error(S, "invalid " + StringRef(Prefix) + " value.");
7242 }
7243
7244 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7245 return ParseStatus::Success;
7246}
7247
7248ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7249 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7250 bool (*ConvertResult)(int64_t &)) {
7251 SMLoc S = getLoc();
7252 if (!trySkipId(Prefix, AsmToken::Colon))
7253 return ParseStatus::NoMatch;
7254
7255 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7256 return ParseStatus::Failure;
7257
7258 unsigned Val = 0;
7259 const unsigned MaxSize = 4;
7260
7261 // FIXME: How to verify the number of elements matches the number of src
7262 // operands?
7263 for (int I = 0; ; ++I) {
7264 int64_t Op;
7265 SMLoc Loc = getLoc();
7266 if (!parseExpr(Op))
7267 return ParseStatus::Failure;
7268
7269 if (Op != 0 && Op != 1)
7270 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7271
7272 Val |= (Op << I);
7273
7274 if (trySkipToken(AsmToken::RBrac))
7275 break;
7276
7277 if (I + 1 == MaxSize)
7278 return Error(getLoc(), "expected a closing square bracket");
7279
7280 if (!skipToken(AsmToken::Comma, "expected a comma"))
7281 return ParseStatus::Failure;
7282 }
7283
7284 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7285 return ParseStatus::Success;
7286}
7287
7288ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7289 OperandVector &Operands,
7290 AMDGPUOperand::ImmTy ImmTy,
7291 bool IgnoreNegative) {
7292 int64_t Bit;
7293 SMLoc S = getLoc();
7294
7295 if (trySkipId(Name)) {
7296 Bit = 1;
7297 } else if (trySkipId("no", Name)) {
7298 if (IgnoreNegative)
7299 return ParseStatus::Success;
7300 Bit = 0;
7301 } else {
7302 return ParseStatus::NoMatch;
7303 }
7304
7305 if (Name == "r128" && !hasMIMG_R128())
7306 return Error(S, "r128 modifier is not supported on this GPU");
7307 if (Name == "a16" && !hasA16())
7308 return Error(S, "a16 modifier is not supported on this GPU");
7309
7310 if (Bit == 0 && Name == "gds") {
7311 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7312 if (Mnemo.starts_with("ds_gws"))
7313 return Error(S, "nogds is not allowed");
7314 }
7315
7316 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7317 ImmTy = AMDGPUOperand::ImmTyR128A16;
7318
7319 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7320 return ParseStatus::Success;
7321}
7322
7323unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7324 bool &Disabling) const {
7325 Disabling = Id.consume_front("no");
7326
7327 if (isGFX940() && !Mnemo.starts_with("s_")) {
7328 return StringSwitch<unsigned>(Id)
7329 .Case("nt", AMDGPU::CPol::NT)
7330 .Case("sc0", AMDGPU::CPol::SC0)
7331 .Case("sc1", AMDGPU::CPol::SC1)
7332 .Default(0);
7333 }
7334
7335 return StringSwitch<unsigned>(Id)
7336 .Case("dlc", AMDGPU::CPol::DLC)
7337 .Case("glc", AMDGPU::CPol::GLC)
7338 .Case("scc", AMDGPU::CPol::SCC)
7339 .Case("slc", AMDGPU::CPol::SLC)
7340 .Default(0);
7341}
7342
7343ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7344 if (isGFX12Plus()) {
7345 SMLoc StringLoc = getLoc();
7346
7347 int64_t CPolVal = 0;
7348 ParseStatus ResTH = ParseStatus::NoMatch;
7349 ParseStatus ResScope = ParseStatus::NoMatch;
7350 ParseStatus ResNV = ParseStatus::NoMatch;
7351 ParseStatus ResScal = ParseStatus::NoMatch;
7352
7353 for (;;) {
7354 if (ResTH.isNoMatch()) {
7355 int64_t TH;
7356 ResTH = parseTH(Operands, TH);
7357 if (ResTH.isFailure())
7358 return ResTH;
7359 if (ResTH.isSuccess()) {
7360 CPolVal |= TH;
7361 continue;
7362 }
7363 }
7364
7365 if (ResScope.isNoMatch()) {
7366 int64_t Scope;
7367 ResScope = parseScope(Operands, Scope);
7368 if (ResScope.isFailure())
7369 return ResScope;
7370 if (ResScope.isSuccess()) {
7371 CPolVal |= Scope;
7372 continue;
7373 }
7374 }
7375
7376 // NV bit exists on GFX12+, but does something starting from GFX1250.
7377 // Allow parsing on all GFX12 and fail on validation for better
7378 // diagnostics.
7379 if (ResNV.isNoMatch()) {
7380 if (trySkipId("nv")) {
7381 ResNV = ParseStatus::Success;
7382 CPolVal |= CPol::NV;
7383 continue;
7384 } else if (trySkipId("no", "nv")) {
7385 ResNV = ParseStatus::Success;
7386 continue;
7387 }
7388 }
7389
7390 if (ResScal.isNoMatch()) {
7391 if (trySkipId("scale_offset")) {
7392 ResScal = ParseStatus::Success;
7393 CPolVal |= CPol::SCAL;
7394 continue;
7395 } else if (trySkipId("no", "scale_offset")) {
7396 ResScal = ParseStatus::Success;
7397 continue;
7398 }
7399 }
7400
7401 break;
7402 }
7403
7404 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7405 ResScal.isNoMatch())
7406 return ParseStatus::NoMatch;
7407
7408 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7409 AMDGPUOperand::ImmTyCPol));
7410 return ParseStatus::Success;
7411 }
7412
7413 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7414 SMLoc OpLoc = getLoc();
7415 unsigned Enabled = 0, Seen = 0;
7416 for (;;) {
7417 SMLoc S = getLoc();
7418 bool Disabling;
7419 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7420 if (!CPol)
7421 break;
7422
7423 lex();
7424
7425 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7426 return Error(S, "dlc modifier is not supported on this GPU");
7427
7428 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7429 return Error(S, "scc modifier is not supported on this GPU");
7430
7431 if (Seen & CPol)
7432 return Error(S, "duplicate cache policy modifier");
7433
7434 if (!Disabling)
7435 Enabled |= CPol;
7436
7437 Seen |= CPol;
7438 }
7439
7440 if (!Seen)
7441 return ParseStatus::NoMatch;
7442
7443 Operands.push_back(
7444 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7445 return ParseStatus::Success;
7446}
7447
7448ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7449 int64_t &Scope) {
7450 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7452
7453 ParseStatus Res = parseStringOrIntWithPrefix(
7454 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7455 Scope);
7456
7457 if (Res.isSuccess())
7458 Scope = Scopes[Scope];
7459
7460 return Res;
7461}
7462
7463ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7464 TH = AMDGPU::CPol::TH_RT; // default
7465
7466 StringRef Value;
7467 SMLoc StringLoc;
7468 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7469 if (!Res.isSuccess())
7470 return Res;
7471
7472 if (Value == "TH_DEFAULT")
7474 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7475 Value == "TH_LOAD_NT_WB") {
7476 return Error(StringLoc, "invalid th value");
7477 } else if (Value.consume_front("TH_ATOMIC_")) {
7479 } else if (Value.consume_front("TH_LOAD_")) {
7481 } else if (Value.consume_front("TH_STORE_")) {
7483 } else {
7484 return Error(StringLoc, "invalid th value");
7485 }
7486
7487 if (Value == "BYPASS")
7489
7490 if (TH != 0) {
7492 TH |= StringSwitch<int64_t>(Value)
7493 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7494 .Case("RT", AMDGPU::CPol::TH_RT)
7495 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7496 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7497 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7499 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7500 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7502 .Default(0xffffffff);
7503 else
7504 TH |= StringSwitch<int64_t>(Value)
7505 .Case("RT", AMDGPU::CPol::TH_RT)
7506 .Case("NT", AMDGPU::CPol::TH_NT)
7507 .Case("HT", AMDGPU::CPol::TH_HT)
7508 .Case("LU", AMDGPU::CPol::TH_LU)
7509 .Case("WB", AMDGPU::CPol::TH_WB)
7510 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7511 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7512 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7513 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7514 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7515 .Default(0xffffffff);
7516 }
7517
7518 if (TH == 0xffffffff)
7519 return Error(StringLoc, "invalid th value");
7520
7521 return ParseStatus::Success;
7522}
7523
7524static void
7526 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7527 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7528 std::optional<unsigned> InsertAt = std::nullopt) {
7529 auto i = OptionalIdx.find(ImmT);
7530 if (i != OptionalIdx.end()) {
7531 unsigned Idx = i->second;
7532 const AMDGPUOperand &Op =
7533 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7534 if (InsertAt)
7535 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7536 else
7537 Op.addImmOperands(Inst, 1);
7538 } else {
7539 if (InsertAt.has_value())
7540 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7541 else
7543 }
7544}
7545
7546ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7547 StringRef &Value,
7548 SMLoc &StringLoc) {
7549 if (!trySkipId(Prefix, AsmToken::Colon))
7550 return ParseStatus::NoMatch;
7551
7552 StringLoc = getLoc();
7553 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7555}
7556
7557ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7558 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7559 int64_t &IntVal) {
7560 if (!trySkipId(Name, AsmToken::Colon))
7561 return ParseStatus::NoMatch;
7562
7563 SMLoc StringLoc = getLoc();
7564
7565 StringRef Value;
7566 if (isToken(AsmToken::Identifier)) {
7567 Value = getTokenStr();
7568 lex();
7569
7570 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7571 if (Value == Ids[IntVal])
7572 break;
7573 } else if (!parseExpr(IntVal))
7574 return ParseStatus::Failure;
7575
7576 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7577 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7578
7579 return ParseStatus::Success;
7580}
7581
7582ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7583 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7584 AMDGPUOperand::ImmTy Type) {
7585 SMLoc S = getLoc();
7586 int64_t IntVal;
7587
7588 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7589 if (Res.isSuccess())
7590 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7591
7592 return Res;
7593}
7594
7595//===----------------------------------------------------------------------===//
7596// MTBUF format
7597//===----------------------------------------------------------------------===//
7598
7599bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7600 int64_t MaxVal,
7601 int64_t &Fmt) {
7602 int64_t Val;
7603 SMLoc Loc = getLoc();
7604
7605 auto Res = parseIntWithPrefix(Pref, Val);
7606 if (Res.isFailure())
7607 return false;
7608 if (Res.isNoMatch())
7609 return true;
7610
7611 if (Val < 0 || Val > MaxVal) {
7612 Error(Loc, Twine("out of range ", StringRef(Pref)));
7613 return false;
7614 }
7615
7616 Fmt = Val;
7617 return true;
7618}
7619
7620ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7621 AMDGPUOperand::ImmTy ImmTy) {
7622 const char *Pref = "index_key";
7623 int64_t ImmVal = 0;
7624 SMLoc Loc = getLoc();
7625 auto Res = parseIntWithPrefix(Pref, ImmVal);
7626 if (!Res.isSuccess())
7627 return Res;
7628
7629 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7630 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7631 (ImmVal < 0 || ImmVal > 1))
7632 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7633
7634 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7635 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7636
7637 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7638 return ParseStatus::Success;
7639}
7640
7641ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7642 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7643}
7644
7645ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7646 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7647}
7648
7649ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7650 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7651}
7652
7653ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7654 StringRef Name,
7655 AMDGPUOperand::ImmTy Type) {
7656 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7657 Type);
7658}
7659
7660ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7661 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7662 AMDGPUOperand::ImmTyMatrixAFMT);
7663}
7664
7665ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7666 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7667 AMDGPUOperand::ImmTyMatrixBFMT);
7668}
7669
7670ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7671 StringRef Name,
7672 AMDGPUOperand::ImmTy Type) {
7673 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7674 Type);
7675}
7676
7677ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7678 return tryParseMatrixScale(Operands, "matrix_a_scale",
7679 AMDGPUOperand::ImmTyMatrixAScale);
7680}
7681
7682ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7683 return tryParseMatrixScale(Operands, "matrix_b_scale",
7684 AMDGPUOperand::ImmTyMatrixBScale);
7685}
7686
7687ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7688 StringRef Name,
7689 AMDGPUOperand::ImmTy Type) {
7690 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7691 Type);
7692}
7693
7694ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7695 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7696 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7697}
7698
7699ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7700 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7701 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7702}
7703
7704// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7705// values to live in a joint format operand in the MCInst encoding.
7706ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7707 using namespace llvm::AMDGPU::MTBUFFormat;
7708
7709 int64_t Dfmt = DFMT_UNDEF;
7710 int64_t Nfmt = NFMT_UNDEF;
7711
7712 // dfmt and nfmt can appear in either order, and each is optional.
7713 for (int I = 0; I < 2; ++I) {
7714 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7715 return ParseStatus::Failure;
7716
7717 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7718 return ParseStatus::Failure;
7719
7720 // Skip optional comma between dfmt/nfmt
7721 // but guard against 2 commas following each other.
7722 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7723 !peekToken().is(AsmToken::Comma)) {
7724 trySkipToken(AsmToken::Comma);
7725 }
7726 }
7727
7728 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7729 return ParseStatus::NoMatch;
7730
7731 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7732 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7733
7734 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7735 return ParseStatus::Success;
7736}
7737
7738ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7739 using namespace llvm::AMDGPU::MTBUFFormat;
7740
7741 int64_t Fmt = UFMT_UNDEF;
7742
7743 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7744 return ParseStatus::Failure;
7745
7746 if (Fmt == UFMT_UNDEF)
7747 return ParseStatus::NoMatch;
7748
7749 Format = Fmt;
7750 return ParseStatus::Success;
7751}
7752
7753bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7754 int64_t &Nfmt,
7755 StringRef FormatStr,
7756 SMLoc Loc) {
7757 using namespace llvm::AMDGPU::MTBUFFormat;
7758 int64_t Format;
7759
7760 Format = getDfmt(FormatStr);
7761 if (Format != DFMT_UNDEF) {
7762 Dfmt = Format;
7763 return true;
7764 }
7765
7766 Format = getNfmt(FormatStr, getSTI());
7767 if (Format != NFMT_UNDEF) {
7768 Nfmt = Format;
7769 return true;
7770 }
7771
7772 Error(Loc, "unsupported format");
7773 return false;
7774}
7775
7776ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7777 SMLoc FormatLoc,
7778 int64_t &Format) {
7779 using namespace llvm::AMDGPU::MTBUFFormat;
7780
7781 int64_t Dfmt = DFMT_UNDEF;
7782 int64_t Nfmt = NFMT_UNDEF;
7783 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7784 return ParseStatus::Failure;
7785
7786 if (trySkipToken(AsmToken::Comma)) {
7787 StringRef Str;
7788 SMLoc Loc = getLoc();
7789 if (!parseId(Str, "expected a format string") ||
7790 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7791 return ParseStatus::Failure;
7792 if (Dfmt == DFMT_UNDEF)
7793 return Error(Loc, "duplicate numeric format");
7794 if (Nfmt == NFMT_UNDEF)
7795 return Error(Loc, "duplicate data format");
7796 }
7797
7798 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7799 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7800
7801 if (isGFX10Plus()) {
7802 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7803 if (Ufmt == UFMT_UNDEF)
7804 return Error(FormatLoc, "unsupported format");
7805 Format = Ufmt;
7806 } else {
7807 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7808 }
7809
7810 return ParseStatus::Success;
7811}
7812
7813ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7814 SMLoc Loc,
7815 int64_t &Format) {
7816 using namespace llvm::AMDGPU::MTBUFFormat;
7817
7818 auto Id = getUnifiedFormat(FormatStr, getSTI());
7819 if (Id == UFMT_UNDEF)
7820 return ParseStatus::NoMatch;
7821
7822 if (!isGFX10Plus())
7823 return Error(Loc, "unified format is not supported on this GPU");
7824
7825 Format = Id;
7826 return ParseStatus::Success;
7827}
7828
7829ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7830 using namespace llvm::AMDGPU::MTBUFFormat;
7831 SMLoc Loc = getLoc();
7832
7833 if (!parseExpr(Format))
7834 return ParseStatus::Failure;
7835 if (!isValidFormatEncoding(Format, getSTI()))
7836 return Error(Loc, "out of range format");
7837
7838 return ParseStatus::Success;
7839}
7840
7841ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7842 using namespace llvm::AMDGPU::MTBUFFormat;
7843
7844 if (!trySkipId("format", AsmToken::Colon))
7845 return ParseStatus::NoMatch;
7846
7847 if (trySkipToken(AsmToken::LBrac)) {
7848 StringRef FormatStr;
7849 SMLoc Loc = getLoc();
7850 if (!parseId(FormatStr, "expected a format string"))
7851 return ParseStatus::Failure;
7852
7853 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7854 if (Res.isNoMatch())
7855 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7856 if (!Res.isSuccess())
7857 return Res;
7858
7859 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7860 return ParseStatus::Failure;
7861
7862 return ParseStatus::Success;
7863 }
7864
7865 return parseNumericFormat(Format);
7866}
7867
7868ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7869 using namespace llvm::AMDGPU::MTBUFFormat;
7870
7871 int64_t Format = getDefaultFormatEncoding(getSTI());
7872 ParseStatus Res;
7873 SMLoc Loc = getLoc();
7874
7875 // Parse legacy format syntax.
7876 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7877 if (Res.isFailure())
7878 return Res;
7879
7880 bool FormatFound = Res.isSuccess();
7881
7882 Operands.push_back(
7883 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7884
7885 if (FormatFound)
7886 trySkipToken(AsmToken::Comma);
7887
7888 if (isToken(AsmToken::EndOfStatement)) {
7889 // We are expecting an soffset operand,
7890 // but let matcher handle the error.
7891 return ParseStatus::Success;
7892 }
7893
7894 // Parse soffset.
7895 Res = parseRegOrImm(Operands);
7896 if (!Res.isSuccess())
7897 return Res;
7898
7899 trySkipToken(AsmToken::Comma);
7900
7901 if (!FormatFound) {
7902 Res = parseSymbolicOrNumericFormat(Format);
7903 if (Res.isFailure())
7904 return Res;
7905 if (Res.isSuccess()) {
7906 auto Size = Operands.size();
7907 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7908 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7909 Op.setImm(Format);
7910 }
7911 return ParseStatus::Success;
7912 }
7913
7914 if (isId("format") && peekToken().is(AsmToken::Colon))
7915 return Error(getLoc(), "duplicate format");
7916 return ParseStatus::Success;
7917}
7918
7919ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7920 ParseStatus Res =
7921 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7922 if (Res.isNoMatch()) {
7923 Res = parseIntWithPrefix("inst_offset", Operands,
7924 AMDGPUOperand::ImmTyInstOffset);
7925 }
7926 return Res;
7927}
7928
7929ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7930 ParseStatus Res =
7931 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7932 if (Res.isNoMatch())
7933 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7934 return Res;
7935}
7936
7937ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7938 ParseStatus Res =
7939 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7940 if (Res.isNoMatch()) {
7941 Res =
7942 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7943 }
7944 return Res;
7945}
7946
7947//===----------------------------------------------------------------------===//
7948// Exp
7949//===----------------------------------------------------------------------===//
7950
7951void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7952 OptionalImmIndexMap OptionalIdx;
7953
7954 unsigned OperandIdx[4];
7955 unsigned EnMask = 0;
7956 int SrcIdx = 0;
7957
7958 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7959 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7960
7961 // Add the register arguments
7962 if (Op.isReg()) {
7963 assert(SrcIdx < 4);
7964 OperandIdx[SrcIdx] = Inst.size();
7965 Op.addRegOperands(Inst, 1);
7966 ++SrcIdx;
7967 continue;
7968 }
7969
7970 if (Op.isOff()) {
7971 assert(SrcIdx < 4);
7972 OperandIdx[SrcIdx] = Inst.size();
7973 Inst.addOperand(MCOperand::createReg(MCRegister()));
7974 ++SrcIdx;
7975 continue;
7976 }
7977
7978 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7979 Op.addImmOperands(Inst, 1);
7980 continue;
7981 }
7982
7983 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7984 continue;
7985
7986 // Handle optional arguments
7987 OptionalIdx[Op.getImmTy()] = i;
7988 }
7989
7990 assert(SrcIdx == 4);
7991
7992 bool Compr = false;
7993 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7994 Compr = true;
7995 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7996 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7997 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7998 }
7999
8000 for (auto i = 0; i < SrcIdx; ++i) {
8001 if (Inst.getOperand(OperandIdx[i]).getReg()) {
8002 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
8003 }
8004 }
8005
8006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
8007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
8008
8009 Inst.addOperand(MCOperand::createImm(EnMask));
8010}
8011
8012//===----------------------------------------------------------------------===//
8013// s_waitcnt
8014//===----------------------------------------------------------------------===//
8015
8016static bool
8018 const AMDGPU::IsaVersion ISA,
8019 int64_t &IntVal,
8020 int64_t CntVal,
8021 bool Saturate,
8022 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
8023 unsigned (*decode)(const IsaVersion &Version, unsigned))
8024{
8025 bool Failed = false;
8026
8027 IntVal = encode(ISA, IntVal, CntVal);
8028 if (CntVal != decode(ISA, IntVal)) {
8029 if (Saturate) {
8030 IntVal = encode(ISA, IntVal, -1);
8031 } else {
8032 Failed = true;
8033 }
8034 }
8035 return Failed;
8036}
8037
8038bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
8039
8040 SMLoc CntLoc = getLoc();
8041 StringRef CntName = getTokenStr();
8042
8043 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8044 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8045 return false;
8046
8047 int64_t CntVal;
8048 SMLoc ValLoc = getLoc();
8049 if (!parseExpr(CntVal))
8050 return false;
8051
8052 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8053
8054 bool Failed = true;
8055 bool Sat = CntName.ends_with("_sat");
8056
8057 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
8058 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
8059 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
8060 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
8061 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
8062 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
8063 } else {
8064 Error(CntLoc, "invalid counter name " + CntName);
8065 return false;
8066 }
8067
8068 if (Failed) {
8069 Error(ValLoc, "too large value for " + CntName);
8070 return false;
8071 }
8072
8073 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8074 return false;
8075
8076 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8077 if (isToken(AsmToken::EndOfStatement)) {
8078 Error(getLoc(), "expected a counter name");
8079 return false;
8080 }
8081 }
8082
8083 return true;
8084}
8085
8086ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
8087 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8088 int64_t Waitcnt = getWaitcntBitMask(ISA);
8089 SMLoc S = getLoc();
8090
8091 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8092 while (!isToken(AsmToken::EndOfStatement)) {
8093 if (!parseCnt(Waitcnt))
8094 return ParseStatus::Failure;
8095 }
8096 } else {
8097 if (!parseExpr(Waitcnt))
8098 return ParseStatus::Failure;
8099 }
8100
8101 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
8102 return ParseStatus::Success;
8103}
8104
8105bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8106 SMLoc FieldLoc = getLoc();
8107 StringRef FieldName = getTokenStr();
8108 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
8109 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8110 return false;
8111
8112 SMLoc ValueLoc = getLoc();
8113 StringRef ValueName = getTokenStr();
8114 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
8115 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
8116 return false;
8117
8118 unsigned Shift;
8119 if (FieldName == "instid0") {
8120 Shift = 0;
8121 } else if (FieldName == "instskip") {
8122 Shift = 4;
8123 } else if (FieldName == "instid1") {
8124 Shift = 7;
8125 } else {
8126 Error(FieldLoc, "invalid field name " + FieldName);
8127 return false;
8128 }
8129
8130 int Value;
8131 if (Shift == 4) {
8132 // Parse values for instskip.
8133 Value = StringSwitch<int>(ValueName)
8134 .Case("SAME", 0)
8135 .Case("NEXT", 1)
8136 .Case("SKIP_1", 2)
8137 .Case("SKIP_2", 3)
8138 .Case("SKIP_3", 4)
8139 .Case("SKIP_4", 5)
8140 .Default(-1);
8141 } else {
8142 // Parse values for instid0 and instid1.
8143 Value = StringSwitch<int>(ValueName)
8144 .Case("NO_DEP", 0)
8145 .Case("VALU_DEP_1", 1)
8146 .Case("VALU_DEP_2", 2)
8147 .Case("VALU_DEP_3", 3)
8148 .Case("VALU_DEP_4", 4)
8149 .Case("TRANS32_DEP_1", 5)
8150 .Case("TRANS32_DEP_2", 6)
8151 .Case("TRANS32_DEP_3", 7)
8152 .Case("FMA_ACCUM_CYCLE_1", 8)
8153 .Case("SALU_CYCLE_1", 9)
8154 .Case("SALU_CYCLE_2", 10)
8155 .Case("SALU_CYCLE_3", 11)
8156 .Default(-1);
8157 }
8158 if (Value < 0) {
8159 Error(ValueLoc, "invalid value name " + ValueName);
8160 return false;
8161 }
8162
8163 Delay |= Value << Shift;
8164 return true;
8165}
8166
8167ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
8168 int64_t Delay = 0;
8169 SMLoc S = getLoc();
8170
8171 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8172 do {
8173 if (!parseDelay(Delay))
8174 return ParseStatus::Failure;
8175 } while (trySkipToken(AsmToken::Pipe));
8176 } else {
8177 if (!parseExpr(Delay))
8178 return ParseStatus::Failure;
8179 }
8180
8181 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
8182 return ParseStatus::Success;
8183}
8184
8185bool
8186AMDGPUOperand::isSWaitCnt() const {
8187 return isImm();
8188}
8189
8190bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
8191
8192//===----------------------------------------------------------------------===//
8193// DepCtr
8194//===----------------------------------------------------------------------===//
8195
8196void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
8197 StringRef DepCtrName) {
8198 switch (ErrorId) {
8199 case OPR_ID_UNKNOWN:
8200 Error(Loc, Twine("invalid counter name ", DepCtrName));
8201 return;
8202 case OPR_ID_UNSUPPORTED:
8203 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
8204 return;
8205 case OPR_ID_DUPLICATE:
8206 Error(Loc, Twine("duplicate counter name ", DepCtrName));
8207 return;
8208 case OPR_VAL_INVALID:
8209 Error(Loc, Twine("invalid value for ", DepCtrName));
8210 return;
8211 default:
8212 assert(false);
8213 }
8214}
8215
8216bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8217
8218 using namespace llvm::AMDGPU::DepCtr;
8219
8220 SMLoc DepCtrLoc = getLoc();
8221 StringRef DepCtrName = getTokenStr();
8222
8223 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8224 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8225 return false;
8226
8227 int64_t ExprVal;
8228 if (!parseExpr(ExprVal))
8229 return false;
8230
8231 unsigned PrevOprMask = UsedOprMask;
8232 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8233
8234 if (CntVal < 0) {
8235 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8236 return false;
8237 }
8238
8239 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8240 return false;
8241
8242 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8243 if (isToken(AsmToken::EndOfStatement)) {
8244 Error(getLoc(), "expected a counter name");
8245 return false;
8246 }
8247 }
8248
8249 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8250 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8251 return true;
8252}
8253
8254ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8255 using namespace llvm::AMDGPU::DepCtr;
8256
8257 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8258 SMLoc Loc = getLoc();
8259
8260 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8261 unsigned UsedOprMask = 0;
8262 while (!isToken(AsmToken::EndOfStatement)) {
8263 if (!parseDepCtr(DepCtr, UsedOprMask))
8264 return ParseStatus::Failure;
8265 }
8266 } else {
8267 if (!parseExpr(DepCtr))
8268 return ParseStatus::Failure;
8269 }
8270
8271 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8272 return ParseStatus::Success;
8273}
8274
8275bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8276
8277//===----------------------------------------------------------------------===//
8278// hwreg
8279//===----------------------------------------------------------------------===//
8280
8281ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8282 OperandInfoTy &Offset,
8283 OperandInfoTy &Width) {
8284 using namespace llvm::AMDGPU::Hwreg;
8285
8286 if (!trySkipId("hwreg", AsmToken::LParen))
8287 return ParseStatus::NoMatch;
8288
8289 // The register may be specified by name or using a numeric code
8290 HwReg.Loc = getLoc();
8291 if (isToken(AsmToken::Identifier) &&
8292 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8293 HwReg.IsSymbolic = true;
8294 lex(); // skip register name
8295 } else if (!parseExpr(HwReg.Val, "a register name")) {
8296 return ParseStatus::Failure;
8297 }
8298
8299 if (trySkipToken(AsmToken::RParen))
8300 return ParseStatus::Success;
8301
8302 // parse optional params
8303 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8304 return ParseStatus::Failure;
8305
8306 Offset.Loc = getLoc();
8307 if (!parseExpr(Offset.Val))
8308 return ParseStatus::Failure;
8309
8310 if (!skipToken(AsmToken::Comma, "expected a comma"))
8311 return ParseStatus::Failure;
8312
8313 Width.Loc = getLoc();
8314 if (!parseExpr(Width.Val) ||
8315 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8316 return ParseStatus::Failure;
8317
8318 return ParseStatus::Success;
8319}
8320
8321ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8322 using namespace llvm::AMDGPU::Hwreg;
8323
8324 int64_t ImmVal = 0;
8325 SMLoc Loc = getLoc();
8326
8327 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8328 HwregId::Default);
8329 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8330 HwregOffset::Default);
8331 struct : StructuredOpField {
8332 using StructuredOpField::StructuredOpField;
8333 bool validate(AMDGPUAsmParser &Parser) const override {
8334 if (!isUIntN(Width, Val - 1))
8335 return Error(Parser, "only values from 1 to 32 are legal");
8336 return true;
8337 }
8338 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8339 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8340
8341 if (Res.isNoMatch())
8342 Res = parseHwregFunc(HwReg, Offset, Width);
8343
8344 if (Res.isSuccess()) {
8345 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8346 return ParseStatus::Failure;
8347 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8348 }
8349
8350 if (Res.isNoMatch() &&
8351 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8353
8354 if (!Res.isSuccess())
8355 return ParseStatus::Failure;
8356
8357 if (!isUInt<16>(ImmVal))
8358 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8359 Operands.push_back(
8360 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8361 return ParseStatus::Success;
8362}
8363
8364bool AMDGPUOperand::isHwreg() const {
8365 return isImmTy(ImmTyHwreg);
8366}
8367
8368//===----------------------------------------------------------------------===//
8369// sendmsg
8370//===----------------------------------------------------------------------===//
8371
8372bool
8373AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8374 OperandInfoTy &Op,
8375 OperandInfoTy &Stream) {
8376 using namespace llvm::AMDGPU::SendMsg;
8377
8378 Msg.Loc = getLoc();
8379 if (isToken(AsmToken::Identifier) &&
8380 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8381 Msg.IsSymbolic = true;
8382 lex(); // skip message name
8383 } else if (!parseExpr(Msg.Val, "a message name")) {
8384 return false;
8385 }
8386
8387 if (trySkipToken(AsmToken::Comma)) {
8388 Op.IsDefined = true;
8389 Op.Loc = getLoc();
8390 if (isToken(AsmToken::Identifier) &&
8391 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8393 lex(); // skip operation name
8394 } else if (!parseExpr(Op.Val, "an operation name")) {
8395 return false;
8396 }
8397
8398 if (trySkipToken(AsmToken::Comma)) {
8399 Stream.IsDefined = true;
8400 Stream.Loc = getLoc();
8401 if (!parseExpr(Stream.Val))
8402 return false;
8403 }
8404 }
8405
8406 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8407}
8408
8409bool
8410AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8411 const OperandInfoTy &Op,
8412 const OperandInfoTy &Stream) {
8413 using namespace llvm::AMDGPU::SendMsg;
8414
8415 // Validation strictness depends on whether message is specified
8416 // in a symbolic or in a numeric form. In the latter case
8417 // only encoding possibility is checked.
8418 bool Strict = Msg.IsSymbolic;
8419
8420 if (Strict) {
8421 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8422 Error(Msg.Loc, "specified message id is not supported on this GPU");
8423 return false;
8424 }
8425 } else {
8426 if (!isValidMsgId(Msg.Val, getSTI())) {
8427 Error(Msg.Loc, "invalid message id");
8428 return false;
8429 }
8430 }
8431 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8432 if (Op.IsDefined) {
8433 Error(Op.Loc, "message does not support operations");
8434 } else {
8435 Error(Msg.Loc, "missing message operation");
8436 }
8437 return false;
8438 }
8439 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8440 if (Op.Val == OPR_ID_UNSUPPORTED)
8441 Error(Op.Loc, "specified operation id is not supported on this GPU");
8442 else
8443 Error(Op.Loc, "invalid operation id");
8444 return false;
8445 }
8446 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8447 Stream.IsDefined) {
8448 Error(Stream.Loc, "message operation does not support streams");
8449 return false;
8450 }
8451 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8452 Error(Stream.Loc, "invalid message stream id");
8453 return false;
8454 }
8455 return true;
8456}
8457
8458ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8459 using namespace llvm::AMDGPU::SendMsg;
8460
8461 int64_t ImmVal = 0;
8462 SMLoc Loc = getLoc();
8463
8464 if (trySkipId("sendmsg", AsmToken::LParen)) {
8465 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8466 OperandInfoTy Op(OP_NONE_);
8467 OperandInfoTy Stream(STREAM_ID_NONE_);
8468 if (parseSendMsgBody(Msg, Op, Stream) &&
8469 validateSendMsg(Msg, Op, Stream)) {
8470 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8471 } else {
8472 return ParseStatus::Failure;
8473 }
8474 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8475 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8476 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8477 } else {
8478 return ParseStatus::Failure;
8479 }
8480
8481 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8482 return ParseStatus::Success;
8483}
8484
8485bool AMDGPUOperand::isSendMsg() const {
8486 return isImmTy(ImmTySendMsg);
8487}
8488
8489ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8490 using namespace llvm::AMDGPU::WaitEvent;
8491
8492 SMLoc Loc = getLoc();
8493 int64_t ImmVal = 0;
8494
8495 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8496 1, 0);
8497 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8498
8499 StructuredOpField *TargetBitfield =
8500 isGFX11() ? &DontWaitExportReady : &ExportReady;
8501
8502 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8503 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8505 else if (Res.isSuccess()) {
8506 if (!validateStructuredOpFields({TargetBitfield}))
8507 return ParseStatus::Failure;
8508 ImmVal = TargetBitfield->Val;
8509 }
8510
8511 if (!Res.isSuccess())
8512 return ParseStatus::Failure;
8513
8514 if (!isUInt<16>(ImmVal))
8515 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8516
8517 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8518 AMDGPUOperand::ImmTyWaitEvent));
8519 return ParseStatus::Success;
8520}
8521
8522bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8523
8524//===----------------------------------------------------------------------===//
8525// v_interp
8526//===----------------------------------------------------------------------===//
8527
8528ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8529 StringRef Str;
8530 SMLoc S = getLoc();
8531
8532 if (!parseId(Str))
8533 return ParseStatus::NoMatch;
8534
8535 int Slot = StringSwitch<int>(Str)
8536 .Case("p10", 0)
8537 .Case("p20", 1)
8538 .Case("p0", 2)
8539 .Default(-1);
8540
8541 if (Slot == -1)
8542 return Error(S, "invalid interpolation slot");
8543
8544 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8545 AMDGPUOperand::ImmTyInterpSlot));
8546 return ParseStatus::Success;
8547}
8548
8549ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8550 StringRef Str;
8551 SMLoc S = getLoc();
8552
8553 if (!parseId(Str))
8554 return ParseStatus::NoMatch;
8555
8556 if (!Str.starts_with("attr"))
8557 return Error(S, "invalid interpolation attribute");
8558
8559 StringRef Chan = Str.take_back(2);
8560 int AttrChan = StringSwitch<int>(Chan)
8561 .Case(".x", 0)
8562 .Case(".y", 1)
8563 .Case(".z", 2)
8564 .Case(".w", 3)
8565 .Default(-1);
8566 if (AttrChan == -1)
8567 return Error(S, "invalid or missing interpolation attribute channel");
8568
8569 Str = Str.drop_back(2).drop_front(4);
8570
8571 uint8_t Attr;
8572 if (Str.getAsInteger(10, Attr))
8573 return Error(S, "invalid or missing interpolation attribute number");
8574
8575 if (Attr > 32)
8576 return Error(S, "out of bounds interpolation attribute number");
8577
8578 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8579
8580 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8581 AMDGPUOperand::ImmTyInterpAttr));
8582 Operands.push_back(AMDGPUOperand::CreateImm(
8583 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8584 return ParseStatus::Success;
8585}
8586
8587//===----------------------------------------------------------------------===//
8588// exp
8589//===----------------------------------------------------------------------===//
8590
8591ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8592 using namespace llvm::AMDGPU::Exp;
8593
8594 StringRef Str;
8595 SMLoc S = getLoc();
8596
8597 if (!parseId(Str))
8598 return ParseStatus::NoMatch;
8599
8600 unsigned Id = getTgtId(Str);
8601 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8602 return Error(S, (Id == ET_INVALID)
8603 ? "invalid exp target"
8604 : "exp target is not supported on this GPU");
8605
8606 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8607 AMDGPUOperand::ImmTyExpTgt));
8608 return ParseStatus::Success;
8609}
8610
8611//===----------------------------------------------------------------------===//
8612// parser helpers
8613//===----------------------------------------------------------------------===//
8614
8615bool
8616AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8617 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8618}
8619
8620bool
8621AMDGPUAsmParser::isId(const StringRef Id) const {
8622 return isId(getToken(), Id);
8623}
8624
8625bool
8626AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8627 return getTokenKind() == Kind;
8628}
8629
8630StringRef AMDGPUAsmParser::getId() const {
8631 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8632}
8633
8634bool
8635AMDGPUAsmParser::trySkipId(const StringRef Id) {
8636 if (isId(Id)) {
8637 lex();
8638 return true;
8639 }
8640 return false;
8641}
8642
8643bool
8644AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8645 if (isToken(AsmToken::Identifier)) {
8646 StringRef Tok = getTokenStr();
8647 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8648 lex();
8649 return true;
8650 }
8651 }
8652 return false;
8653}
8654
8655bool
8656AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8657 if (isId(Id) && peekToken().is(Kind)) {
8658 lex();
8659 lex();
8660 return true;
8661 }
8662 return false;
8663}
8664
8665bool
8666AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8667 if (isToken(Kind)) {
8668 lex();
8669 return true;
8670 }
8671 return false;
8672}
8673
8674bool
8675AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8676 const StringRef ErrMsg) {
8677 if (!trySkipToken(Kind)) {
8678 Error(getLoc(), ErrMsg);
8679 return false;
8680 }
8681 return true;
8682}
8683
8684bool
8685AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8686 SMLoc S = getLoc();
8687
8688 const MCExpr *Expr;
8689 if (Parser.parseExpression(Expr))
8690 return false;
8691
8692 if (Expr->evaluateAsAbsolute(Imm))
8693 return true;
8694
8695 if (Expected.empty()) {
8696 Error(S, "expected absolute expression");
8697 } else {
8698 Error(S, Twine("expected ", Expected) +
8699 Twine(" or an absolute expression"));
8700 }
8701 return false;
8702}
8703
8704bool
8705AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8706 SMLoc S = getLoc();
8707
8708 const MCExpr *Expr;
8709 if (Parser.parseExpression(Expr))
8710 return false;
8711
8712 int64_t IntVal;
8713 if (Expr->evaluateAsAbsolute(IntVal)) {
8714 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8715 } else {
8716 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8717 }
8718 return true;
8719}
8720
8721bool
8722AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8723 if (isToken(AsmToken::String)) {
8724 Val = getToken().getStringContents();
8725 lex();
8726 return true;
8727 }
8728 Error(getLoc(), ErrMsg);
8729 return false;
8730}
8731
8732bool
8733AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8734 if (isToken(AsmToken::Identifier)) {
8735 Val = getTokenStr();
8736 lex();
8737 return true;
8738 }
8739 if (!ErrMsg.empty())
8740 Error(getLoc(), ErrMsg);
8741 return false;
8742}
8743
8744AsmToken
8745AMDGPUAsmParser::getToken() const {
8746 return Parser.getTok();
8747}
8748
8749AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8750 return isToken(AsmToken::EndOfStatement)
8751 ? getToken()
8752 : getLexer().peekTok(ShouldSkipSpace);
8753}
8754
8755void
8756AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8757 auto TokCount = getLexer().peekTokens(Tokens);
8758
8759 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8760 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8761}
8762
8764AMDGPUAsmParser::getTokenKind() const {
8765 return getLexer().getKind();
8766}
8767
8768SMLoc
8769AMDGPUAsmParser::getLoc() const {
8770 return getToken().getLoc();
8771}
8772
8773StringRef
8774AMDGPUAsmParser::getTokenStr() const {
8775 return getToken().getString();
8776}
8777
8778void
8779AMDGPUAsmParser::lex() {
8780 Parser.Lex();
8781}
8782
8783const AMDGPUOperand &
8784AMDGPUAsmParser::findMCOperand(const OperandVector &Operands,
8785 int MCOpIdx) const {
8786 for (const auto &Op : Operands) {
8787 const AMDGPUOperand &TargetOp = static_cast<AMDGPUOperand &>(*Op);
8788 if (TargetOp.getMCOpIdx() == MCOpIdx)
8789 return TargetOp;
8790 }
8791 llvm_unreachable("no such MC operand!");
8792}
8793
8794SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8795 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8796}
8797
8798// Returns one of the given locations that comes later in the source.
8799SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8800 return a.getPointer() < b.getPointer() ? b : a;
8801}
8802
8803SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8804 int MCOpIdx) const {
8805 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8806}
8807
8808SMLoc
8809AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8810 const OperandVector &Operands) const {
8811 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8812 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8813 if (Test(Op))
8814 return Op.getStartLoc();
8815 }
8816 return getInstLoc(Operands);
8817}
8818
8819SMLoc
8820AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8821 const OperandVector &Operands) const {
8822 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8823 return getOperandLoc(Test, Operands);
8824}
8825
8826ParseStatus
8827AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8828 if (!trySkipToken(AsmToken::LCurly))
8829 return ParseStatus::NoMatch;
8830
8831 bool First = true;
8832 while (!trySkipToken(AsmToken::RCurly)) {
8833 if (!First &&
8834 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8835 return ParseStatus::Failure;
8836
8837 StringRef Id = getTokenStr();
8838 SMLoc IdLoc = getLoc();
8839 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8840 !skipToken(AsmToken::Colon, "colon expected"))
8841 return ParseStatus::Failure;
8842
8843 const auto *I =
8844 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8845 if (I == Fields.end())
8846 return Error(IdLoc, "unknown field");
8847 if ((*I)->IsDefined)
8848 return Error(IdLoc, "duplicate field");
8849
8850 // TODO: Support symbolic values.
8851 (*I)->Loc = getLoc();
8852 if (!parseExpr((*I)->Val))
8853 return ParseStatus::Failure;
8854 (*I)->IsDefined = true;
8855
8856 First = false;
8857 }
8858 return ParseStatus::Success;
8859}
8860
8861bool AMDGPUAsmParser::validateStructuredOpFields(
8863 return all_of(Fields, [this](const StructuredOpField *F) {
8864 return F->validate(*this);
8865 });
8866}
8867
8868//===----------------------------------------------------------------------===//
8869// swizzle
8870//===----------------------------------------------------------------------===//
8871
8873static unsigned
8874encodeBitmaskPerm(const unsigned AndMask,
8875 const unsigned OrMask,
8876 const unsigned XorMask) {
8877 using namespace llvm::AMDGPU::Swizzle;
8878
8879 return BITMASK_PERM_ENC |
8880 (AndMask << BITMASK_AND_SHIFT) |
8881 (OrMask << BITMASK_OR_SHIFT) |
8882 (XorMask << BITMASK_XOR_SHIFT);
8883}
8884
8885bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8886 const unsigned MaxVal,
8887 const Twine &ErrMsg, SMLoc &Loc) {
8888 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8889 return false;
8890 }
8891 Loc = getLoc();
8892 if (!parseExpr(Op)) {
8893 return false;
8894 }
8895 if (Op < MinVal || Op > MaxVal) {
8896 Error(Loc, ErrMsg);
8897 return false;
8898 }
8899
8900 return true;
8901}
8902
8903bool
8904AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8905 const unsigned MinVal,
8906 const unsigned MaxVal,
8907 const StringRef ErrMsg) {
8908 SMLoc Loc;
8909 for (unsigned i = 0; i < OpNum; ++i) {
8910 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8911 return false;
8912 }
8913
8914 return true;
8915}
8916
8917bool
8918AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8919 using namespace llvm::AMDGPU::Swizzle;
8920
8921 int64_t Lane[LANE_NUM];
8922 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8923 "expected a 2-bit lane id")) {
8925 for (unsigned I = 0; I < LANE_NUM; ++I) {
8926 Imm |= Lane[I] << (LANE_SHIFT * I);
8927 }
8928 return true;
8929 }
8930 return false;
8931}
8932
8933bool
8934AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8935 using namespace llvm::AMDGPU::Swizzle;
8936
8937 SMLoc Loc;
8938 int64_t GroupSize;
8939 int64_t LaneIdx;
8940
8941 if (!parseSwizzleOperand(GroupSize,
8942 2, 32,
8943 "group size must be in the interval [2,32]",
8944 Loc)) {
8945 return false;
8946 }
8947 if (!isPowerOf2_64(GroupSize)) {
8948 Error(Loc, "group size must be a power of two");
8949 return false;
8950 }
8951 if (parseSwizzleOperand(LaneIdx,
8952 0, GroupSize - 1,
8953 "lane id must be in the interval [0,group size - 1]",
8954 Loc)) {
8955 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8956 return true;
8957 }
8958 return false;
8959}
8960
8961bool
8962AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8963 using namespace llvm::AMDGPU::Swizzle;
8964
8965 SMLoc Loc;
8966 int64_t GroupSize;
8967
8968 if (!parseSwizzleOperand(GroupSize,
8969 2, 32,
8970 "group size must be in the interval [2,32]",
8971 Loc)) {
8972 return false;
8973 }
8974 if (!isPowerOf2_64(GroupSize)) {
8975 Error(Loc, "group size must be a power of two");
8976 return false;
8977 }
8978
8979 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8980 return true;
8981}
8982
8983bool
8984AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8985 using namespace llvm::AMDGPU::Swizzle;
8986
8987 SMLoc Loc;
8988 int64_t GroupSize;
8989
8990 if (!parseSwizzleOperand(GroupSize,
8991 1, 16,
8992 "group size must be in the interval [1,16]",
8993 Loc)) {
8994 return false;
8995 }
8996 if (!isPowerOf2_64(GroupSize)) {
8997 Error(Loc, "group size must be a power of two");
8998 return false;
8999 }
9000
9001 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
9002 return true;
9003}
9004
9005bool
9006AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
9007 using namespace llvm::AMDGPU::Swizzle;
9008
9009 if (!skipToken(AsmToken::Comma, "expected a comma")) {
9010 return false;
9011 }
9012
9013 StringRef Ctl;
9014 SMLoc StrLoc = getLoc();
9015 if (!parseString(Ctl)) {
9016 return false;
9017 }
9018 if (Ctl.size() != BITMASK_WIDTH) {
9019 Error(StrLoc, "expected a 5-character mask");
9020 return false;
9021 }
9022
9023 unsigned AndMask = 0;
9024 unsigned OrMask = 0;
9025 unsigned XorMask = 0;
9026
9027 for (size_t i = 0; i < Ctl.size(); ++i) {
9028 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
9029 switch(Ctl[i]) {
9030 default:
9031 Error(StrLoc, "invalid mask");
9032 return false;
9033 case '0':
9034 break;
9035 case '1':
9036 OrMask |= Mask;
9037 break;
9038 case 'p':
9039 AndMask |= Mask;
9040 break;
9041 case 'i':
9042 AndMask |= Mask;
9043 XorMask |= Mask;
9044 break;
9045 }
9046 }
9047
9048 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
9049 return true;
9050}
9051
9052bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
9053 using namespace llvm::AMDGPU::Swizzle;
9054
9055 if (!AMDGPU::isGFX9Plus(getSTI())) {
9056 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
9057 return false;
9058 }
9059
9060 int64_t Swizzle;
9061 SMLoc Loc;
9062 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
9063 "FFT swizzle must be in the interval [0," +
9064 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
9065 Loc))
9066 return false;
9067
9068 Imm = FFT_MODE_ENC | Swizzle;
9069 return true;
9070}
9071
9072bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
9073 using namespace llvm::AMDGPU::Swizzle;
9074
9075 if (!AMDGPU::isGFX9Plus(getSTI())) {
9076 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
9077 return false;
9078 }
9079
9080 SMLoc Loc;
9081 int64_t Direction;
9082
9083 if (!parseSwizzleOperand(Direction, 0, 1,
9084 "direction must be 0 (left) or 1 (right)", Loc))
9085 return false;
9086
9087 int64_t RotateSize;
9088 if (!parseSwizzleOperand(
9089 RotateSize, 0, ROTATE_MAX_SIZE,
9090 "number of threads to rotate must be in the interval [0," +
9091 Twine(ROTATE_MAX_SIZE) + Twine(']'),
9092 Loc))
9093 return false;
9094
9096 (RotateSize << ROTATE_SIZE_SHIFT);
9097 return true;
9098}
9099
9100bool
9101AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
9102
9103 SMLoc OffsetLoc = getLoc();
9104
9105 if (!parseExpr(Imm, "a swizzle macro")) {
9106 return false;
9107 }
9108 if (!isUInt<16>(Imm)) {
9109 Error(OffsetLoc, "expected a 16-bit offset");
9110 return false;
9111 }
9112 return true;
9113}
9114
9115bool
9116AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
9117 using namespace llvm::AMDGPU::Swizzle;
9118
9119 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
9120
9121 SMLoc ModeLoc = getLoc();
9122 bool Ok = false;
9123
9124 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9125 Ok = parseSwizzleQuadPerm(Imm);
9126 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9127 Ok = parseSwizzleBitmaskPerm(Imm);
9128 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9129 Ok = parseSwizzleBroadcast(Imm);
9130 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
9131 Ok = parseSwizzleSwap(Imm);
9132 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9133 Ok = parseSwizzleReverse(Imm);
9134 } else if (trySkipId(IdSymbolic[ID_FFT])) {
9135 Ok = parseSwizzleFFT(Imm);
9136 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9137 Ok = parseSwizzleRotate(Imm);
9138 } else {
9139 Error(ModeLoc, "expected a swizzle mode");
9140 }
9141
9142 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
9143 }
9144
9145 return false;
9146}
9147
9148ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
9149 SMLoc S = getLoc();
9150 int64_t Imm = 0;
9151
9152 if (trySkipId("offset")) {
9153
9154 bool Ok = false;
9155 if (skipToken(AsmToken::Colon, "expected a colon")) {
9156 if (trySkipId("swizzle")) {
9157 Ok = parseSwizzleMacro(Imm);
9158 } else {
9159 Ok = parseSwizzleOffset(Imm);
9160 }
9161 }
9162
9163 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
9164
9166 }
9167 return ParseStatus::NoMatch;
9168}
9169
9170bool
9171AMDGPUOperand::isSwizzle() const {
9172 return isImmTy(ImmTySwizzle);
9173}
9174
9175//===----------------------------------------------------------------------===//
9176// VGPR Index Mode
9177//===----------------------------------------------------------------------===//
9178
9179int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9180
9181 using namespace llvm::AMDGPU::VGPRIndexMode;
9182
9183 if (trySkipToken(AsmToken::RParen)) {
9184 return OFF;
9185 }
9186
9187 int64_t Imm = 0;
9188
9189 while (true) {
9190 unsigned Mode = 0;
9191 SMLoc S = getLoc();
9192
9193 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
9194 if (trySkipId(IdSymbolic[ModeId])) {
9195 Mode = 1 << ModeId;
9196 break;
9197 }
9198 }
9199
9200 if (Mode == 0) {
9201 Error(S, (Imm == 0)?
9202 "expected a VGPR index mode or a closing parenthesis" :
9203 "expected a VGPR index mode");
9204 return UNDEF;
9205 }
9206
9207 if (Imm & Mode) {
9208 Error(S, "duplicate VGPR index mode");
9209 return UNDEF;
9210 }
9211 Imm |= Mode;
9212
9213 if (trySkipToken(AsmToken::RParen))
9214 break;
9215 if (!skipToken(AsmToken::Comma,
9216 "expected a comma or a closing parenthesis"))
9217 return UNDEF;
9218 }
9219
9220 return Imm;
9221}
9222
9223ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9224
9225 using namespace llvm::AMDGPU::VGPRIndexMode;
9226
9227 int64_t Imm = 0;
9228 SMLoc S = getLoc();
9229
9230 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9231 Imm = parseGPRIdxMacro();
9232 if (Imm == UNDEF)
9233 return ParseStatus::Failure;
9234 } else {
9235 if (getParser().parseAbsoluteExpression(Imm))
9236 return ParseStatus::Failure;
9237 if (Imm < 0 || !isUInt<4>(Imm))
9238 return Error(S, "invalid immediate: only 4-bit values are legal");
9239 }
9240
9241 Operands.push_back(
9242 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9243 return ParseStatus::Success;
9244}
9245
9246bool AMDGPUOperand::isGPRIdxMode() const {
9247 return isImmTy(ImmTyGprIdxMode);
9248}
9249
9250//===----------------------------------------------------------------------===//
9251// sopp branch targets
9252//===----------------------------------------------------------------------===//
9253
9254ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9255
9256 // Make sure we are not parsing something
9257 // that looks like a label or an expression but is not.
9258 // This will improve error messages.
9259 if (isRegister() || isModifier())
9260 return ParseStatus::NoMatch;
9261
9262 if (!parseExpr(Operands))
9263 return ParseStatus::Failure;
9264
9265 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9266 assert(Opr.isImm() || Opr.isExpr());
9267 SMLoc Loc = Opr.getStartLoc();
9268
9269 // Currently we do not support arbitrary expressions as branch targets.
9270 // Only labels and absolute expressions are accepted.
9271 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9272 Error(Loc, "expected an absolute expression or a label");
9273 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9274 Error(Loc, "expected a 16-bit signed jump offset");
9275 }
9276
9277 return ParseStatus::Success;
9278}
9279
9280//===----------------------------------------------------------------------===//
9281// Boolean holding registers
9282//===----------------------------------------------------------------------===//
9283
9284ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9285 return parseReg(Operands);
9286}
9287
9288//===----------------------------------------------------------------------===//
9289// mubuf
9290//===----------------------------------------------------------------------===//
9291
9292void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9293 const OperandVector &Operands,
9294 bool IsAtomic) {
9295 OptionalImmIndexMap OptionalIdx;
9296 unsigned FirstOperandIdx = 1;
9297 bool IsAtomicReturn = false;
9298
9299 if (IsAtomic) {
9300 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9302 }
9303
9304 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9305 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9306
9307 // Add the register arguments
9308 if (Op.isReg()) {
9309 Op.addRegOperands(Inst, 1);
9310 // Insert a tied src for atomic return dst.
9311 // This cannot be postponed as subsequent calls to
9312 // addImmOperands rely on correct number of MC operands.
9313 if (IsAtomicReturn && i == FirstOperandIdx)
9314 Op.addRegOperands(Inst, 1);
9315 continue;
9316 }
9317
9318 // Handle the case where soffset is an immediate
9319 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9320 Op.addImmOperands(Inst, 1);
9321 continue;
9322 }
9323
9324 // Handle tokens like 'offen' which are sometimes hard-coded into the
9325 // asm string. There are no MCInst operands for these.
9326 if (Op.isToken()) {
9327 continue;
9328 }
9329 assert(Op.isImm());
9330
9331 // Handle optional arguments
9332 OptionalIdx[Op.getImmTy()] = i;
9333 }
9334
9335 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9336 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9337 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9338 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9340}
9341
9342//===----------------------------------------------------------------------===//
9343// smrd
9344//===----------------------------------------------------------------------===//
9345
9346bool AMDGPUOperand::isSMRDOffset8() const {
9347 return isImmLiteral() && isUInt<8>(getImm());
9348}
9349
9350bool AMDGPUOperand::isSMEMOffset() const {
9351 // Offset range is checked later by validator.
9352 return isImmLiteral();
9353}
9354
9355bool AMDGPUOperand::isSMRDLiteralOffset() const {
9356 // 32-bit literals are only supported on CI and we only want to use them
9357 // when the offset is > 8-bits.
9358 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9359}
9360
9361//===----------------------------------------------------------------------===//
9362// vop3
9363//===----------------------------------------------------------------------===//
9364
9365static bool ConvertOmodMul(int64_t &Mul) {
9366 if (Mul != 1 && Mul != 2 && Mul != 4)
9367 return false;
9368
9369 Mul >>= 1;
9370 return true;
9371}
9372
9373static bool ConvertOmodDiv(int64_t &Div) {
9374 if (Div == 1) {
9375 Div = 0;
9376 return true;
9377 }
9378
9379 if (Div == 2) {
9380 Div = 3;
9381 return true;
9382 }
9383
9384 return false;
9385}
9386
9387// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9388// This is intentional and ensures compatibility with sp3.
9389// See bug 35397 for details.
9390bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9391 if (BoundCtrl == 0 || BoundCtrl == 1) {
9392 if (!isGFX11Plus())
9393 BoundCtrl = 1;
9394 return true;
9395 }
9396 return false;
9397}
9398
9399void AMDGPUAsmParser::onBeginOfFile() {
9400 if (!getParser().getStreamer().getTargetStreamer() ||
9401 getSTI().getTargetTriple().getArch() == Triple::r600)
9402 return;
9403
9404 if (!getTargetStreamer().getTargetID())
9405 getTargetStreamer().initializeTargetID(getSTI(),
9406 getSTI().getFeatureString());
9407
9408 if (isHsaAbi(getSTI()))
9409 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9410}
9411
9412/// Parse AMDGPU specific expressions.
9413///
9414/// expr ::= or(expr, ...) |
9415/// max(expr, ...) |
9416/// min(expr, ...)
9417///
9418bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9419 using AGVK = AMDGPUMCExpr::VariantKind;
9420
9421 if (isToken(AsmToken::Identifier)) {
9422 StringRef TokenId = getTokenStr();
9423 AGVK VK = StringSwitch<AGVK>(TokenId)
9424 .Case("max", AGVK::AGVK_Max)
9425 .Case("min", AGVK::AGVK_Min)
9426 .Case("or", AGVK::AGVK_Or)
9427 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9428 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9429 .Case("alignto", AGVK::AGVK_AlignTo)
9430 .Case("occupancy", AGVK::AGVK_Occupancy)
9431 .Case("instprefsize", AGVK::AGVK_InstPrefSize)
9432 .Default(AGVK::AGVK_None);
9433
9434 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9436 uint64_t CommaCount = 0;
9437 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9438 lex(); // Eat '('
9439 while (true) {
9440 if (trySkipToken(AsmToken::RParen)) {
9441 if (Exprs.empty()) {
9442 Error(getToken().getLoc(),
9443 "empty " + Twine(TokenId) + " expression");
9444 return true;
9445 }
9446 if (CommaCount + 1 != Exprs.size()) {
9447 Error(getToken().getLoc(),
9448 "mismatch of commas in " + Twine(TokenId) + " expression");
9449 return true;
9450 }
9451 if (unsigned Expected = AMDGPUMCExpr::getNumExpectedArgs(VK);
9452 Expected && Exprs.size() != Expected) {
9453 Error(getToken().getLoc(), Twine(TokenId) + " expression expects " +
9454 Twine(Expected) + " operands");
9455 return true;
9456 }
9457 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9458 return false;
9459 }
9460 const MCExpr *Expr;
9461 if (getParser().parseExpression(Expr, EndLoc))
9462 return true;
9463 Exprs.push_back(Expr);
9464 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9465 if (LastTokenWasComma)
9466 CommaCount++;
9467 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9468 Error(getToken().getLoc(),
9469 "unexpected token in " + Twine(TokenId) + " expression");
9470 return true;
9471 }
9472 }
9473 }
9474 }
9475 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9476}
9477
9478ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9479 StringRef Name = getTokenStr();
9480 if (Name == "mul") {
9481 return parseIntWithPrefix("mul", Operands,
9482 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9483 }
9484
9485 if (Name == "div") {
9486 return parseIntWithPrefix("div", Operands,
9487 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9488 }
9489
9490 return ParseStatus::NoMatch;
9491}
9492
9493// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9494// the number of src operands present, then copies that bit into src0_modifiers.
9495static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9496 int Opc = Inst.getOpcode();
9497 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9498 if (OpSelIdx == -1)
9499 return;
9500
9501 int SrcNum;
9502 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9503 AMDGPU::OpName::src2};
9504 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9505 ++SrcNum)
9506 ;
9507 assert(SrcNum > 0);
9508
9509 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9510
9511 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9512 if (DstIdx == -1)
9513 return;
9514
9515 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9516 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9517 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9518 if (DstOp.isReg() &&
9519 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9520 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9521 ModVal |= SISrcMods::DST_OP_SEL;
9522 } else {
9523 if ((OpSel & (1 << SrcNum)) != 0)
9524 ModVal |= SISrcMods::DST_OP_SEL;
9525 }
9526 Inst.getOperand(ModIdx).setImm(ModVal);
9527}
9528
9529void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9530 const OperandVector &Operands) {
9531 cvtVOP3P(Inst, Operands);
9532 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9533}
9534
9535void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9536 OptionalImmIndexMap &OptionalIdx) {
9537 cvtVOP3P(Inst, Operands, OptionalIdx);
9538 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9539}
9540
9541static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9542 return
9543 // 1. This operand is input modifiers
9544 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9545 // 2. This is not last operand
9546 && Desc.NumOperands > (OpNum + 1)
9547 // 3. Next operand is register class
9548 && Desc.operands()[OpNum + 1].RegClass != -1
9549 // 4. Next register is not tied to any other operand
9550 && Desc.getOperandConstraint(OpNum + 1,
9552}
9553
9554void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9555 unsigned Opc = Inst.getOpcode();
9556 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9557 AMDGPU::OpName::src2};
9558 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9559 AMDGPU::OpName::src1_modifiers,
9560 AMDGPU::OpName::src2_modifiers};
9561 for (int J = 0; J < 3; ++J) {
9562 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9563 if (OpIdx == -1)
9564 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9565 // no src1. So continue instead of break.
9566 continue;
9567
9568 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9569 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9570
9571 if ((OpSel & (1 << J)) != 0)
9572 ModVal |= SISrcMods::OP_SEL_0;
9573 // op_sel[3] is encoded in src0_modifiers.
9574 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9575 ModVal |= SISrcMods::DST_OP_SEL;
9576
9577 Inst.getOperand(ModIdx).setImm(ModVal);
9578 }
9579}
9580
9581void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9582{
9583 OptionalImmIndexMap OptionalIdx;
9584 unsigned Opc = Inst.getOpcode();
9585
9586 unsigned I = 1;
9587 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9588 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9589 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9590 }
9591
9592 for (unsigned E = Operands.size(); I != E; ++I) {
9593 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9595 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9596 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9597 Op.isInterpAttrChan()) {
9598 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9599 } else if (Op.isImmModifier()) {
9600 OptionalIdx[Op.getImmTy()] = I;
9601 } else {
9602 llvm_unreachable("unhandled operand type");
9603 }
9604 }
9605
9606 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9607 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9608 AMDGPUOperand::ImmTyHigh);
9609
9610 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9611 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9612 AMDGPUOperand::ImmTyClamp);
9613
9614 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9615 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9616 AMDGPUOperand::ImmTyOModSI);
9617
9618 // Some v_interp instructions use op_sel[3] for dst.
9619 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9620 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9621 AMDGPUOperand::ImmTyOpSel);
9622 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9623 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9624
9625 cvtOpSelHelper(Inst, OpSel);
9626 }
9627}
9628
9629void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9630{
9631 OptionalImmIndexMap OptionalIdx;
9632 unsigned Opc = Inst.getOpcode();
9633
9634 unsigned I = 1;
9635 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9636 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9637 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9638 }
9639
9640 for (unsigned E = Operands.size(); I != E; ++I) {
9641 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9643 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9644 } else if (Op.isImmModifier()) {
9645 OptionalIdx[Op.getImmTy()] = I;
9646 } else {
9647 llvm_unreachable("unhandled operand type");
9648 }
9649 }
9650
9651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9652
9653 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9654 if (OpSelIdx != -1)
9655 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9656
9657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9658
9659 if (OpSelIdx == -1)
9660 return;
9661
9662 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9663 cvtOpSelHelper(Inst, OpSel);
9664}
9665
9666void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9667 const OperandVector &Operands) {
9668 OptionalImmIndexMap OptionalIdx;
9669 unsigned Opc = Inst.getOpcode();
9670 unsigned I = 1;
9671 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9672
9673 const MCInstrDesc &Desc = MII.get(Opc);
9674
9675 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9676 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9677
9678 for (unsigned E = Operands.size(); I != E; ++I) {
9679 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9680 int NumOperands = Inst.getNumOperands();
9681 // The order of operands in MCInst and parsed operands are different.
9682 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9683 // indices for parsing scale values correctly.
9684 if (NumOperands == CbszOpIdx) {
9687 }
9688 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9689 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9690 } else if (Op.isImmModifier()) {
9691 OptionalIdx[Op.getImmTy()] = I;
9692 } else {
9693 Op.addRegOrImmOperands(Inst, 1);
9694 }
9695 }
9696
9697 // Insert CBSZ and BLGP operands for F8F6F4 variants
9698 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9699 if (CbszIdx != OptionalIdx.end()) {
9700 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9701 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9702 }
9703
9704 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9705 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9706 if (BlgpIdx != OptionalIdx.end()) {
9707 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9708 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9709 }
9710
9711 // Add dummy src_modifiers
9714
9715 // Handle op_sel fields
9716
9717 unsigned OpSel = 0;
9718 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9719 if (OpselIdx != OptionalIdx.end()) {
9720 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9721 .getImm();
9722 }
9723
9724 unsigned OpSelHi = 0;
9725 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9726 if (OpselHiIdx != OptionalIdx.end()) {
9727 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9728 .getImm();
9729 }
9730 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9731 AMDGPU::OpName::src1_modifiers};
9732
9733 for (unsigned J = 0; J < 2; ++J) {
9734 unsigned ModVal = 0;
9735 if (OpSel & (1 << J))
9736 ModVal |= SISrcMods::OP_SEL_0;
9737 if (OpSelHi & (1 << J))
9738 ModVal |= SISrcMods::OP_SEL_1;
9739
9740 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9741 Inst.getOperand(ModIdx).setImm(ModVal);
9742 }
9743}
9744
9745void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9746 OptionalImmIndexMap &OptionalIdx) {
9747 unsigned Opc = Inst.getOpcode();
9748
9749 unsigned I = 1;
9750 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9751 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9752 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9753 }
9754
9755 for (unsigned E = Operands.size(); I != E; ++I) {
9756 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9758 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9759 } else if (Op.isImmModifier()) {
9760 OptionalIdx[Op.getImmTy()] = I;
9761 } else {
9762 Op.addRegOrImmOperands(Inst, 1);
9763 }
9764 }
9765
9766 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9767 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9768 AMDGPUOperand::ImmTyScaleSel);
9769
9770 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9771 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9772 AMDGPUOperand::ImmTyClamp);
9773
9774 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9775 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9776 Inst.addOperand(Inst.getOperand(0));
9777 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9778 AMDGPUOperand::ImmTyByteSel);
9779 }
9780
9781 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9782 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9783 AMDGPUOperand::ImmTyOModSI);
9784
9785 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9786 // it has src2 register operand that is tied to dst operand
9787 // we don't allow modifiers for this operand in assembler so src2_modifiers
9788 // should be 0.
9789 if (isMAC(Opc)) {
9790 auto *it = Inst.begin();
9791 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9792 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9793 ++it;
9794 // Copy the operand to ensure it's not invalidated when Inst grows.
9795 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9796 }
9797}
9798
9799void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9800 OptionalImmIndexMap OptionalIdx;
9801 cvtVOP3(Inst, Operands, OptionalIdx);
9802}
9803
9804void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9805 OptionalImmIndexMap &OptIdx) {
9806 const int Opc = Inst.getOpcode();
9807 const MCInstrDesc &Desc = MII.get(Opc);
9808
9809 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9810
9811 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9812 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9813 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9814 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9815 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9816 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9817 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9818 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9819 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9820 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9821 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9822 Inst.addOperand(Inst.getOperand(0));
9823 }
9824
9825 // Append vdst_in only if a previous converter (cvtVOP3DPP for DPP variants,
9826 // cvtVOP3 for byte_sel variants) hasn't already placed it. Use the position
9827 // of the named operand to detect that, the same way cvtVOP3DPP does
9828 // internally.
9829 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9830 if (VdstInIdx != -1 && VdstInIdx == static_cast<int>(Inst.getNumOperands()))
9831 Inst.addOperand(Inst.getOperand(0));
9832
9833 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9834 if (BitOp3Idx != -1) {
9835 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9836 }
9837
9838 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9839 // instruction, and then figure out where to actually put the modifiers
9840
9841 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9842 if (OpSelIdx != -1) {
9843 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9844 }
9845
9846 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9847 if (OpSelHiIdx != -1) {
9848 int DefaultVal = IsPacked ? -1 : 0;
9849 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9850 DefaultVal);
9851 }
9852
9853 int MatrixAFMTIdx =
9854 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9855 if (MatrixAFMTIdx != -1) {
9856 addOptionalImmOperand(Inst, Operands, OptIdx,
9857 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9858 }
9859
9860 int MatrixBFMTIdx =
9861 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9862 if (MatrixBFMTIdx != -1) {
9863 addOptionalImmOperand(Inst, Operands, OptIdx,
9864 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9865 }
9866
9867 int MatrixAScaleIdx =
9868 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9869 if (MatrixAScaleIdx != -1) {
9870 addOptionalImmOperand(Inst, Operands, OptIdx,
9871 AMDGPUOperand::ImmTyMatrixAScale, 0);
9872 }
9873
9874 int MatrixBScaleIdx =
9875 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9876 if (MatrixBScaleIdx != -1) {
9877 addOptionalImmOperand(Inst, Operands, OptIdx,
9878 AMDGPUOperand::ImmTyMatrixBScale, 0);
9879 }
9880
9881 int MatrixAScaleFmtIdx =
9882 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9883 if (MatrixAScaleFmtIdx != -1) {
9884 addOptionalImmOperand(Inst, Operands, OptIdx,
9885 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9886 }
9887
9888 int MatrixBScaleFmtIdx =
9889 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9890 if (MatrixBScaleFmtIdx != -1) {
9891 addOptionalImmOperand(Inst, Operands, OptIdx,
9892 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9893 }
9894
9895 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9896 addOptionalImmOperand(Inst, Operands, OptIdx,
9897 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9898
9899 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9900 addOptionalImmOperand(Inst, Operands, OptIdx,
9901 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9902
9903 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9904 if (NegLoIdx != -1)
9905 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9906
9907 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9908 if (NegHiIdx != -1)
9909 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9910
9911 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9912 AMDGPU::OpName::src2};
9913 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9914 AMDGPU::OpName::src1_modifiers,
9915 AMDGPU::OpName::src2_modifiers};
9916
9917 unsigned OpSel = 0;
9918 unsigned OpSelHi = 0;
9919 unsigned NegLo = 0;
9920 unsigned NegHi = 0;
9921
9922 if (OpSelIdx != -1)
9923 OpSel = Inst.getOperand(OpSelIdx).getImm();
9924
9925 if (OpSelHiIdx != -1)
9926 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9927
9928 if (NegLoIdx != -1)
9929 NegLo = Inst.getOperand(NegLoIdx).getImm();
9930
9931 if (NegHiIdx != -1)
9932 NegHi = Inst.getOperand(NegHiIdx).getImm();
9933
9934 for (int J = 0; J < 3; ++J) {
9935 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9936 if (OpIdx == -1)
9937 break;
9938
9939 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9940
9941 if (ModIdx == -1)
9942 continue;
9943
9944 uint32_t ModVal = 0;
9945
9946 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9947 if (SrcOp.isReg() && getMRI()
9948 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9949 .contains(SrcOp.getReg())) {
9950 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9951 if (VGPRSuffixIsHi)
9952 ModVal |= SISrcMods::OP_SEL_0;
9953 } else {
9954 if ((OpSel & (1 << J)) != 0)
9955 ModVal |= SISrcMods::OP_SEL_0;
9956 }
9957
9958 if ((OpSelHi & (1 << J)) != 0)
9959 ModVal |= SISrcMods::OP_SEL_1;
9960
9961 if ((NegLo & (1 << J)) != 0)
9962 ModVal |= SISrcMods::NEG;
9963
9964 if ((NegHi & (1 << J)) != 0)
9965 ModVal |= SISrcMods::NEG_HI;
9966
9967 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9968 }
9969}
9970
9971void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9972 OptionalImmIndexMap OptIdx;
9973 cvtVOP3(Inst, Operands, OptIdx);
9974 cvtVOP3P(Inst, Operands, OptIdx);
9975}
9976
9978 unsigned i, unsigned Opc,
9979 AMDGPU::OpName OpName) {
9980 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9981 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9982 else
9983 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9984}
9985
9986void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9987 unsigned Opc = Inst.getOpcode();
9988
9989 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9990 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9991 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9992 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9993 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9994
9995 OptionalImmIndexMap OptIdx;
9996 for (unsigned i = 5; i < Operands.size(); ++i) {
9997 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9998 OptIdx[Op.getImmTy()] = i;
9999 }
10000
10001 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
10002 addOptionalImmOperand(Inst, Operands, OptIdx,
10003 AMDGPUOperand::ImmTyIndexKey8bit);
10004
10005 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
10006 addOptionalImmOperand(Inst, Operands, OptIdx,
10007 AMDGPUOperand::ImmTyIndexKey16bit);
10008
10009 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
10010 addOptionalImmOperand(Inst, Operands, OptIdx,
10011 AMDGPUOperand::ImmTyIndexKey32bit);
10012
10013 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10014 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
10015
10016 cvtVOP3P(Inst, Operands, OptIdx);
10017}
10018
10019//===----------------------------------------------------------------------===//
10020// VOPD
10021//===----------------------------------------------------------------------===//
10022
10023ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
10024 if (!hasVOPD(getSTI()))
10025 return ParseStatus::NoMatch;
10026
10027 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
10028 SMLoc S = getLoc();
10029 lex();
10030 lex();
10031 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
10032 SMLoc OpYLoc = getLoc();
10033 StringRef OpYName;
10034 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
10035 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
10036 return ParseStatus::Success;
10037 }
10038 return Error(OpYLoc, "expected a VOPDY instruction after ::");
10039 }
10040 return ParseStatus::NoMatch;
10041}
10042
10043// Create VOPD MCInst operands using parsed assembler operands.
10044void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
10045 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10046
10047 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
10048 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
10050 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10051 return;
10052 }
10053 if (Op.isReg()) {
10054 Op.addRegOperands(Inst, 1);
10055 return;
10056 }
10057 if (Op.isImm()) {
10058 Op.addImmOperands(Inst, 1);
10059 return;
10060 }
10061 llvm_unreachable("Unhandled operand type in cvtVOPD");
10062 };
10063
10064 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
10065
10066 // MCInst operands are ordered as follows:
10067 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
10068
10069 for (auto CompIdx : VOPD::COMPONENTS) {
10070 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
10071 }
10072
10073 for (auto CompIdx : VOPD::COMPONENTS) {
10074 const auto &CInfo = InstInfo[CompIdx];
10075 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10076 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10077 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10078 if (CInfo.hasSrc2Acc())
10079 addOp(CInfo.getIndexOfDstInParsedOperands());
10080 }
10081
10082 int BitOp3Idx =
10083 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
10084 if (BitOp3Idx != -1) {
10085 OptionalImmIndexMap OptIdx;
10086 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
10087 if (Op.isImm())
10088 OptIdx[Op.getImmTy()] = Operands.size() - 1;
10089
10090 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
10091 }
10092}
10093
10094//===----------------------------------------------------------------------===//
10095// dpp
10096//===----------------------------------------------------------------------===//
10097
10098bool AMDGPUOperand::isDPP8() const {
10099 return isImmTy(ImmTyDPP8);
10100}
10101
10102bool AMDGPUOperand::isDPPCtrl() const {
10103 using namespace AMDGPU::DPP;
10104
10105 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
10106 if (result) {
10107 int64_t Imm = getImm();
10108 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
10109 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
10110 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
10111 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
10112 (Imm == DppCtrl::WAVE_SHL1) ||
10113 (Imm == DppCtrl::WAVE_ROL1) ||
10114 (Imm == DppCtrl::WAVE_SHR1) ||
10115 (Imm == DppCtrl::WAVE_ROR1) ||
10116 (Imm == DppCtrl::ROW_MIRROR) ||
10117 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
10118 (Imm == DppCtrl::BCAST15) ||
10119 (Imm == DppCtrl::BCAST31) ||
10120 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
10121 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
10122 }
10123 return false;
10124}
10125
10126//===----------------------------------------------------------------------===//
10127// mAI
10128//===----------------------------------------------------------------------===//
10129
10130bool AMDGPUOperand::isBLGP() const {
10131 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
10132}
10133
10134bool AMDGPUOperand::isS16Imm() const {
10135 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
10136}
10137
10138bool AMDGPUOperand::isU16Imm() const {
10139 return isImmLiteral() && isUInt<16>(getImm());
10140}
10141
10142//===----------------------------------------------------------------------===//
10143// dim
10144//===----------------------------------------------------------------------===//
10145
10146bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
10147 // We want to allow "dim:1D" etc.,
10148 // but the initial 1 is tokenized as an integer.
10149 std::string Token;
10150 if (isToken(AsmToken::Integer)) {
10151 SMLoc Loc = getToken().getEndLoc();
10152 Token = std::string(getTokenStr());
10153 lex();
10154 if (getLoc() != Loc)
10155 return false;
10156 }
10157
10158 StringRef Suffix;
10159 if (!parseId(Suffix))
10160 return false;
10161 Token += Suffix;
10162
10163 StringRef DimId = Token;
10164 DimId.consume_front("SQ_RSRC_IMG_");
10165
10166 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
10167 if (!DimInfo)
10168 return false;
10169
10170 Encoding = DimInfo->Encoding;
10171 return true;
10172}
10173
10174ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
10175 if (!isGFX10Plus())
10176 return ParseStatus::NoMatch;
10177
10178 SMLoc S = getLoc();
10179
10180 if (!trySkipId("dim", AsmToken::Colon))
10181 return ParseStatus::NoMatch;
10182
10183 unsigned Encoding;
10184 SMLoc Loc = getLoc();
10185 if (!parseDimId(Encoding))
10186 return Error(Loc, "invalid dim value");
10187
10188 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
10189 AMDGPUOperand::ImmTyDim));
10190 return ParseStatus::Success;
10191}
10192
10193//===----------------------------------------------------------------------===//
10194// dpp
10195//===----------------------------------------------------------------------===//
10196
10197ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
10198 SMLoc S = getLoc();
10199
10200 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
10201 return ParseStatus::NoMatch;
10202
10203 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10204
10205 int64_t Sels[8];
10206
10207 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10208 return ParseStatus::Failure;
10209
10210 for (size_t i = 0; i < 8; ++i) {
10211 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10212 return ParseStatus::Failure;
10213
10214 SMLoc Loc = getLoc();
10215 if (getParser().parseAbsoluteExpression(Sels[i]))
10216 return ParseStatus::Failure;
10217 if (0 > Sels[i] || 7 < Sels[i])
10218 return Error(Loc, "expected a 3-bit value");
10219 }
10220
10221 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10222 return ParseStatus::Failure;
10223
10224 unsigned DPP8 = 0;
10225 for (size_t i = 0; i < 8; ++i)
10226 DPP8 |= (Sels[i] << (i * 3));
10227
10228 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10229 return ParseStatus::Success;
10230}
10231
10232bool
10233AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10234 const OperandVector &Operands) {
10235 if (Ctrl == "row_newbcast")
10236 return isGFX90A();
10237
10238 if (Ctrl == "row_share" ||
10239 Ctrl == "row_xmask")
10240 return isGFX10Plus();
10241
10242 if (Ctrl == "wave_shl" ||
10243 Ctrl == "wave_shr" ||
10244 Ctrl == "wave_rol" ||
10245 Ctrl == "wave_ror" ||
10246 Ctrl == "row_bcast")
10247 return isVI() || isGFX9();
10248
10249 return Ctrl == "row_mirror" ||
10250 Ctrl == "row_half_mirror" ||
10251 Ctrl == "quad_perm" ||
10252 Ctrl == "row_shl" ||
10253 Ctrl == "row_shr" ||
10254 Ctrl == "row_ror";
10255}
10256
10257int64_t
10258AMDGPUAsmParser::parseDPPCtrlPerm() {
10259 // quad_perm:[%d,%d,%d,%d]
10260
10261 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10262 return -1;
10263
10264 int64_t Val = 0;
10265 for (int i = 0; i < 4; ++i) {
10266 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10267 return -1;
10268
10269 int64_t Temp;
10270 SMLoc Loc = getLoc();
10271 if (getParser().parseAbsoluteExpression(Temp))
10272 return -1;
10273 if (Temp < 0 || Temp > 3) {
10274 Error(Loc, "expected a 2-bit value");
10275 return -1;
10276 }
10277
10278 Val += (Temp << i * 2);
10279 }
10280
10281 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10282 return -1;
10283
10284 return Val;
10285}
10286
10287int64_t
10288AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10289 using namespace AMDGPU::DPP;
10290
10291 // sel:%d
10292
10293 int64_t Val;
10294 SMLoc Loc = getLoc();
10295
10296 if (getParser().parseAbsoluteExpression(Val))
10297 return -1;
10298
10299 struct DppCtrlCheck {
10300 int64_t Ctrl;
10301 int Lo;
10302 int Hi;
10303 };
10304
10305 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10306 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10307 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10308 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10309 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10310 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10311 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10312 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10313 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10314 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10315 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10316 .Default({-1, 0, 0});
10317
10318 bool Valid;
10319 if (Check.Ctrl == -1) {
10320 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10321 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10322 } else {
10323 Valid = Check.Lo <= Val && Val <= Check.Hi;
10324 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10325 }
10326
10327 if (!Valid) {
10328 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10329 return -1;
10330 }
10331
10332 return Val;
10333}
10334
10335ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10336 using namespace AMDGPU::DPP;
10337
10338 if (!isToken(AsmToken::Identifier) ||
10339 !isSupportedDPPCtrl(getTokenStr(), Operands))
10340 return ParseStatus::NoMatch;
10341
10342 SMLoc S = getLoc();
10343 int64_t Val = -1;
10344 StringRef Ctrl;
10345
10346 parseId(Ctrl);
10347
10348 if (Ctrl == "row_mirror") {
10349 Val = DppCtrl::ROW_MIRROR;
10350 } else if (Ctrl == "row_half_mirror") {
10351 Val = DppCtrl::ROW_HALF_MIRROR;
10352 } else {
10353 if (skipToken(AsmToken::Colon, "expected a colon")) {
10354 if (Ctrl == "quad_perm") {
10355 Val = parseDPPCtrlPerm();
10356 } else {
10357 Val = parseDPPCtrlSel(Ctrl);
10358 }
10359 }
10360 }
10361
10362 if (Val == -1)
10363 return ParseStatus::Failure;
10364
10365 Operands.push_back(
10366 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10367 return ParseStatus::Success;
10368}
10369
10370void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10371 bool IsDPP8) {
10372 OptionalImmIndexMap OptionalIdx;
10373 unsigned Opc = Inst.getOpcode();
10374 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10375
10376 // MAC instructions are special because they have 'old'
10377 // operand which is not tied to dst (but assumed to be).
10378 // They also have dummy unused src2_modifiers.
10379 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10380 int Src2ModIdx =
10381 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10382 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10383 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10384
10385 unsigned I = 1;
10386 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10387 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10388 }
10389
10390 int Fi = 0;
10391 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10392 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10393 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10394 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10395 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10396 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10397 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10398 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10399 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10400
10401 for (unsigned E = Operands.size(); I != E; ++I) {
10402
10403 if (IsMAC) {
10404 int NumOperands = Inst.getNumOperands();
10405 if (OldIdx == NumOperands) {
10406 // Handle old operand
10407 constexpr int DST_IDX = 0;
10408 Inst.addOperand(Inst.getOperand(DST_IDX));
10409 } else if (Src2ModIdx == NumOperands) {
10410 // Add unused dummy src2_modifiers
10412 }
10413 }
10414
10415 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10416 Inst.addOperand(Inst.getOperand(0));
10417 }
10418
10419 if (IsVOP3CvtSrDpp) {
10420 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10422 Inst.addOperand(MCOperand::createReg(MCRegister()));
10423 }
10424 }
10425
10426 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10428 if (TiedTo != -1) {
10429 assert((unsigned)TiedTo < Inst.getNumOperands());
10430 // handle tied old or src2 for MAC instructions
10431 Inst.addOperand(Inst.getOperand(TiedTo));
10432 }
10433 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10434 // Add the register arguments
10435 if (IsDPP8 && Op.isDppFI()) {
10436 Fi = Op.getImm();
10437 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10438 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10439 } else if (Op.isReg()) {
10440 Op.addRegOperands(Inst, 1);
10441 } else if (Op.isImm() &&
10442 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10443 Op.addImmOperands(Inst, 1);
10444 } else if (Op.isImm()) {
10445 OptionalIdx[Op.getImmTy()] = I;
10446 } else {
10447 llvm_unreachable("unhandled operand type");
10448 }
10449 }
10450
10451 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10452 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10453 AMDGPUOperand::ImmTyClamp);
10454
10455 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10456 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10457 Inst.addOperand(Inst.getOperand(0));
10458 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10459 AMDGPUOperand::ImmTyByteSel);
10460 }
10461
10462 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10463 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10464
10465 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10466 cvtVOP3P(Inst, Operands, OptionalIdx);
10467 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10468 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10469 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10471 }
10472
10473 if (IsDPP8) {
10474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10475 using namespace llvm::AMDGPU::DPP;
10476 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10477 } else {
10478 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10479 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10480 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10481 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10482
10483 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10484 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10485 AMDGPUOperand::ImmTyDppFI);
10486 }
10487}
10488
10489void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10490 OptionalImmIndexMap OptionalIdx;
10491
10492 unsigned I = 1;
10493 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10494 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10495 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10496 }
10497
10498 int Fi = 0;
10499 for (unsigned E = Operands.size(); I != E; ++I) {
10500 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10502 if (TiedTo != -1) {
10503 assert((unsigned)TiedTo < Inst.getNumOperands());
10504 // handle tied old or src2 for MAC instructions
10505 Inst.addOperand(Inst.getOperand(TiedTo));
10506 }
10507 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10508 // Add the register arguments
10509 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10510 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10511 // Skip it.
10512 continue;
10513 }
10514
10515 if (IsDPP8) {
10516 if (Op.isDPP8()) {
10517 Op.addImmOperands(Inst, 1);
10518 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10519 Op.addRegWithFPInputModsOperands(Inst, 2);
10520 } else if (Op.isDppFI()) {
10521 Fi = Op.getImm();
10522 } else if (Op.isReg()) {
10523 Op.addRegOperands(Inst, 1);
10524 } else {
10525 llvm_unreachable("Invalid operand type");
10526 }
10527 } else {
10529 Op.addRegWithFPInputModsOperands(Inst, 2);
10530 } else if (Op.isReg()) {
10531 Op.addRegOperands(Inst, 1);
10532 } else if (Op.isDPPCtrl()) {
10533 Op.addImmOperands(Inst, 1);
10534 } else if (Op.isImm()) {
10535 // Handle optional arguments
10536 OptionalIdx[Op.getImmTy()] = I;
10537 } else {
10538 llvm_unreachable("Invalid operand type");
10539 }
10540 }
10541 }
10542
10543 if (IsDPP8) {
10544 using namespace llvm::AMDGPU::DPP;
10545 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10546 } else {
10547 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10549 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10550 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10551 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10552 AMDGPUOperand::ImmTyDppFI);
10553 }
10554 }
10555}
10556
10557//===----------------------------------------------------------------------===//
10558// sdwa
10559//===----------------------------------------------------------------------===//
10560
10561ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10562 StringRef Prefix,
10563 AMDGPUOperand::ImmTy Type) {
10564 return parseStringOrIntWithPrefix(
10565 Operands, Prefix,
10566 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10567 Type);
10568}
10569
10570ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10571 return parseStringOrIntWithPrefix(
10572 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10573 AMDGPUOperand::ImmTySDWADstUnused);
10574}
10575
10576void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10577 cvtSDWA(Inst, Operands, SDWAInstType::VOP1);
10578}
10579
10580void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10581 cvtSDWA(Inst, Operands, SDWAInstType::VOP2);
10582}
10583
10584void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10585 cvtSDWA(Inst, Operands, SDWAInstType::VOP2, true, true);
10586}
10587
10588void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10589 cvtSDWA(Inst, Operands, SDWAInstType::VOP2, false, true);
10590}
10591
10592void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10593 cvtSDWA(Inst, Operands, SDWAInstType::VOPC, isVI());
10594}
10595
10596void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10597 SDWAInstType BasicInstType, bool SkipDstVcc,
10598 bool SkipSrcVcc) {
10599 using namespace llvm::AMDGPU::SDWA;
10600
10601 OptionalImmIndexMap OptionalIdx;
10602 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10603 bool SkippedVcc = false;
10604
10605 unsigned I = 1;
10606 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10607 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10608 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10609 }
10610
10611 for (unsigned E = Operands.size(); I != E; ++I) {
10612 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10613 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10614 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10615 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10616 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10617 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10618 // Skip VCC only if we didn't skip it on previous iteration.
10619 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10620 if (BasicInstType == SDWAInstType::VOP2 &&
10621 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10622 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10623 SkippedVcc = true;
10624 continue;
10625 }
10626 if (BasicInstType == SDWAInstType::VOPC && Inst.getNumOperands() == 0) {
10627 SkippedVcc = true;
10628 continue;
10629 }
10630 }
10632 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10633 } else if (Op.isImm()) {
10634 // Handle optional arguments
10635 OptionalIdx[Op.getImmTy()] = I;
10636 } else {
10637 llvm_unreachable("Invalid operand type");
10638 }
10639 SkippedVcc = false;
10640 }
10641
10642 const unsigned Opc = Inst.getOpcode();
10643 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10644 Opc != AMDGPU::V_NOP_sdwa_vi) {
10645 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10646 switch (BasicInstType) {
10647 case SDWAInstType::VOP1:
10648 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10649 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10650 AMDGPUOperand::ImmTyClamp, 0);
10651
10652 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10653 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10654 AMDGPUOperand::ImmTyOModSI, 0);
10655
10656 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10657 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10658 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10659
10660 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10661 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10662 AMDGPUOperand::ImmTySDWADstUnused,
10663 DstUnused::UNUSED_PRESERVE);
10664
10665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10666 break;
10667
10668 case SDWAInstType::VOP2:
10669 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10670 AMDGPUOperand::ImmTyClamp, 0);
10671
10672 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10674
10675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10679 break;
10680
10681 case SDWAInstType::VOPC:
10682 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10683 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10684 AMDGPUOperand::ImmTyClamp, 0);
10685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10687 break;
10688 }
10689 }
10690
10691 // special case v_mac_{f16, f32}:
10692 // it has src2 register operand that is tied to dst operand
10693 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10694 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10695 auto *it = Inst.begin();
10696 std::advance(
10697 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10698 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10699 }
10700}
10701
10702/// Force static initialization.
10703extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10708
10709#define GET_MATCHER_IMPLEMENTATION
10710#define GET_MNEMONIC_SPELL_CHECKER
10711#define GET_MNEMONIC_CHECKER
10712#include "AMDGPUGenAsmMatcher.inc"
10713
10714ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10715 unsigned MCK) {
10716 switch (MCK) {
10717 case MCK_addr64:
10718 return parseTokenOp("addr64", Operands);
10719 case MCK_done:
10720 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10721 case MCK_idxen:
10722 return parseTokenOp("idxen", Operands);
10723 case MCK_lds:
10724 return parseNamedBit("lds", Operands, AMDGPUOperand::ImmTyLDS,
10725 /*IgnoreNegative=*/true);
10726 case MCK_offen:
10727 return parseTokenOp("offen", Operands);
10728 case MCK_off:
10729 return parseTokenOp("off", Operands);
10730 case MCK_row_95_en:
10731 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10732 case MCK_gds:
10733 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10734 case MCK_tfe:
10735 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10736 }
10737 return tryCustomParseOperand(Operands, MCK);
10738}
10739
10740// This function should be defined after auto-generated include so that we have
10741// MatchClassKind enum defined
10742unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10743 unsigned Kind) {
10744 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10745 // But MatchInstructionImpl() expects to meet token and fails to validate
10746 // operand. This method checks if we are given immediate operand but expect to
10747 // get corresponding token.
10748 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10749 switch (Kind) {
10750 case MCK_addr64:
10751 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10752 case MCK_gds:
10753 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10754 case MCK_lds:
10755 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10756 case MCK_idxen:
10757 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10758 case MCK_offen:
10759 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10760 case MCK_tfe:
10761 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10762 case MCK_done:
10763 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10764 case MCK_row_95_en:
10765 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10766 case MCK_SSrc_b32:
10767 // When operands have expression values, they will return true for isToken,
10768 // because it is not possible to distinguish between a token and an
10769 // expression at parse time. MatchInstructionImpl() will always try to
10770 // match an operand as a token, when isToken returns true, and when the
10771 // name of the expression is not a valid token, the match will fail,
10772 // so we need to handle it here.
10773 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10774 case MCK_SSrc_f32:
10775 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10776 case MCK_SOPPBrTarget:
10777 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10778 case MCK_VReg32OrOff:
10779 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10780 case MCK_InterpSlot:
10781 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10782 case MCK_InterpAttr:
10783 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10784 case MCK_InterpAttrChan:
10785 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10786 case MCK_SReg_64:
10787 case MCK_SReg_64_XEXEC:
10788 // Null is defined as a 32-bit register but
10789 // it should also be enabled with 64-bit operands or larger.
10790 // The following code enables it for SReg_64 and larger operands
10791 // used as source and destination. Remaining source
10792 // operands are handled in isInlinableImm.
10793 case MCK_SReg_96:
10794 case MCK_SReg_128:
10795 case MCK_SReg_256:
10796 case MCK_SReg_512:
10797 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10798 default:
10799 return Match_InvalidOperand;
10800 }
10801}
10802
10803//===----------------------------------------------------------------------===//
10804// endpgm
10805//===----------------------------------------------------------------------===//
10806
10807ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10808 SMLoc S = getLoc();
10809 int64_t Imm = 0;
10810
10811 if (!parseExpr(Imm)) {
10812 // The operand is optional, if not present default to 0
10813 Imm = 0;
10814 }
10815
10816 if (!isUInt<16>(Imm))
10817 return Error(S, "expected a 16-bit value");
10818
10819 Operands.push_back(
10820 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10821 return ParseStatus::Success;
10822}
10823
10824bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10825
10826//===----------------------------------------------------------------------===//
10827// Split Barrier
10828//===----------------------------------------------------------------------===//
10829
10830bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:317
#define LLVM_ABI
Definition Compiler.h:215
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static unsigned getNumExpectedArgs(VariantKind Kind)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:297
static const fltSemantics & BFloat()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static const fltSemantics & IEEEhalf()
Definition APFloat.h:295
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:361
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5901
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:342
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:352
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:427
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:430
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:448
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:416
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:423
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:439
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:436
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:441
@ OPERAND_REG_IMM_V2INT64
Definition SIDefines.h:426
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:425
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:420
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:415
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:422
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:421
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:424
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:435
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:433
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:427
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:419
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:442
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:453
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:454
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:428
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:418
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:438
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:434
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:440
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:429
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:455
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:437
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:417
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:445
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
constexpr bool hasIntClamp(const T &...O)
Definition SIDefines.h:318
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:571
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1129
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
LLVM_ABI void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:299
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:31
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...