LLVM  15.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/Casting.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56  enum KindTy {
57  Token,
58  Immediate,
59  Register,
61  } Kind;
62 
63  SMLoc StartLoc, EndLoc;
64  const AMDGPUAsmParser *AsmParser;
65 
66 public:
67  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68  : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70  using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72  struct Modifiers {
73  bool Abs = false;
74  bool Neg = false;
75  bool Sext = false;
76 
77  bool hasFPModifiers() const { return Abs || Neg; }
78  bool hasIntModifiers() const { return Sext; }
79  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81  int64_t getFPModifiersOperand() const {
82  int64_t Operand = 0;
83  Operand |= Abs ? SISrcMods::ABS : 0u;
84  Operand |= Neg ? SISrcMods::NEG : 0u;
85  return Operand;
86  }
87 
88  int64_t getIntModifiersOperand() const {
89  int64_t Operand = 0;
90  Operand |= Sext ? SISrcMods::SEXT : 0u;
91  return Operand;
92  }
93 
94  int64_t getModifiersOperand() const {
95  assert(!(hasFPModifiers() && hasIntModifiers())
96  && "fp and int modifiers should not be used simultaneously");
97  if (hasFPModifiers()) {
98  return getFPModifiersOperand();
99  } else if (hasIntModifiers()) {
100  return getIntModifiersOperand();
101  } else {
102  return 0;
103  }
104  }
105 
106  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107  };
108 
109  enum ImmTy {
110  ImmTyNone,
111  ImmTyGDS,
112  ImmTyLDS,
113  ImmTyOffen,
114  ImmTyIdxen,
115  ImmTyAddr64,
116  ImmTyOffset,
117  ImmTyInstOffset,
118  ImmTyOffset0,
119  ImmTyOffset1,
120  ImmTyCPol,
121  ImmTySWZ,
122  ImmTyTFE,
123  ImmTyD16,
124  ImmTyClampSI,
125  ImmTyOModSI,
126  ImmTyDPP8,
127  ImmTyDppCtrl,
128  ImmTyDppRowMask,
129  ImmTyDppBankMask,
130  ImmTyDppBoundCtrl,
131  ImmTyDppFi,
132  ImmTySdwaDstSel,
133  ImmTySdwaSrc0Sel,
134  ImmTySdwaSrc1Sel,
135  ImmTySdwaDstUnused,
136  ImmTyDMask,
137  ImmTyDim,
138  ImmTyUNorm,
139  ImmTyDA,
140  ImmTyR128A16,
141  ImmTyA16,
142  ImmTyLWE,
143  ImmTyExpTgt,
144  ImmTyExpCompr,
145  ImmTyExpVM,
146  ImmTyFORMAT,
147  ImmTyHwreg,
148  ImmTyOff,
149  ImmTySendMsg,
150  ImmTyInterpSlot,
151  ImmTyInterpAttr,
152  ImmTyAttrChan,
153  ImmTyOpSel,
154  ImmTyOpSelHi,
155  ImmTyNegLo,
156  ImmTyNegHi,
157  ImmTySwizzle,
158  ImmTyGprIdxMode,
159  ImmTyHigh,
160  ImmTyBLGP,
161  ImmTyCBSZ,
162  ImmTyABID,
163  ImmTyEndpgm,
164  ImmTyWaitVDST,
165  ImmTyWaitEXP,
166  };
167 
168  enum ImmKindTy {
169  ImmKindTyNone,
170  ImmKindTyLiteral,
171  ImmKindTyConst,
172  };
173 
174 private:
175  struct TokOp {
176  const char *Data;
177  unsigned Length;
178  };
179 
180  struct ImmOp {
181  int64_t Val;
182  ImmTy Type;
183  bool IsFPImm;
184  mutable ImmKindTy Kind;
185  Modifiers Mods;
186  };
187 
188  struct RegOp {
189  unsigned RegNo;
190  Modifiers Mods;
191  };
192 
193  union {
194  TokOp Tok;
195  ImmOp Imm;
196  RegOp Reg;
197  const MCExpr *Expr;
198  };
199 
200 public:
201  bool isToken() const override {
202  if (Kind == Token)
203  return true;
204 
205  // When parsing operands, we can't always tell if something was meant to be
206  // a token, like 'gds', or an expression that references a global variable.
207  // In this case, we assume the string is an expression, and if we need to
208  // interpret is a token, then we treat the symbol name as the token.
209  return isSymbolRefExpr();
210  }
211 
212  bool isSymbolRefExpr() const {
213  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
214  }
215 
216  bool isImm() const override {
217  return Kind == Immediate;
218  }
219 
220  void setImmKindNone() const {
221  assert(isImm());
222  Imm.Kind = ImmKindTyNone;
223  }
224 
225  void setImmKindLiteral() const {
226  assert(isImm());
227  Imm.Kind = ImmKindTyLiteral;
228  }
229 
230  void setImmKindConst() const {
231  assert(isImm());
232  Imm.Kind = ImmKindTyConst;
233  }
234 
235  bool IsImmKindLiteral() const {
236  return isImm() && Imm.Kind == ImmKindTyLiteral;
237  }
238 
239  bool isImmKindConst() const {
240  return isImm() && Imm.Kind == ImmKindTyConst;
241  }
242 
243  bool isInlinableImm(MVT type) const;
244  bool isLiteralImm(MVT type) const;
245 
246  bool isRegKind() const {
247  return Kind == Register;
248  }
249 
250  bool isReg() const override {
251  return isRegKind() && !hasModifiers();
252  }
253 
254  bool isRegOrInline(unsigned RCID, MVT type) const {
255  return isRegClass(RCID) || isInlinableImm(type);
256  }
257 
258  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
259  return isRegOrInline(RCID, type) || isLiteralImm(type);
260  }
261 
262  bool isRegOrImmWithInt16InputMods() const {
263  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
264  }
265 
266  bool isRegOrImmWithInt32InputMods() const {
267  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
268  }
269 
270  bool isRegOrImmWithInt64InputMods() const {
271  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
272  }
273 
274  bool isRegOrImmWithFP16InputMods() const {
275  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
276  }
277 
278  bool isRegOrImmWithFP32InputMods() const {
279  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
280  }
281 
282  bool isRegOrImmWithFP64InputMods() const {
283  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
284  }
285 
286  bool isVReg() const {
287  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
288  isRegClass(AMDGPU::VReg_64RegClassID) ||
289  isRegClass(AMDGPU::VReg_96RegClassID) ||
290  isRegClass(AMDGPU::VReg_128RegClassID) ||
291  isRegClass(AMDGPU::VReg_160RegClassID) ||
292  isRegClass(AMDGPU::VReg_192RegClassID) ||
293  isRegClass(AMDGPU::VReg_256RegClassID) ||
294  isRegClass(AMDGPU::VReg_512RegClassID) ||
295  isRegClass(AMDGPU::VReg_1024RegClassID);
296  }
297 
298  bool isVReg32() const {
299  return isRegClass(AMDGPU::VGPR_32RegClassID);
300  }
301 
302  bool isVReg32OrOff() const {
303  return isOff() || isVReg32();
304  }
305 
306  bool isNull() const {
307  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
308  }
309 
310  bool isVRegWithInputMods() const;
311 
312  bool isSDWAOperand(MVT type) const;
313  bool isSDWAFP16Operand() const;
314  bool isSDWAFP32Operand() const;
315  bool isSDWAInt16Operand() const;
316  bool isSDWAInt32Operand() const;
317 
318  bool isImmTy(ImmTy ImmT) const {
319  return isImm() && Imm.Type == ImmT;
320  }
321 
322  bool isImmModifier() const {
323  return isImm() && Imm.Type != ImmTyNone;
324  }
325 
326  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
327  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
328  bool isDMask() const { return isImmTy(ImmTyDMask); }
329  bool isDim() const { return isImmTy(ImmTyDim); }
330  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
331  bool isDA() const { return isImmTy(ImmTyDA); }
332  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
333  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
334  bool isLWE() const { return isImmTy(ImmTyLWE); }
335  bool isOff() const { return isImmTy(ImmTyOff); }
336  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
337  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
338  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
339  bool isOffen() const { return isImmTy(ImmTyOffen); }
340  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
341  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
342  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
343  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
344  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
345 
346  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
347  bool isGDS() const { return isImmTy(ImmTyGDS); }
348  bool isLDS() const { return isImmTy(ImmTyLDS); }
349  bool isCPol() const { return isImmTy(ImmTyCPol); }
350  bool isSWZ() const { return isImmTy(ImmTySWZ); }
351  bool isTFE() const { return isImmTy(ImmTyTFE); }
352  bool isD16() const { return isImmTy(ImmTyD16); }
353  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
354  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
355  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
356  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
357  bool isFI() const { return isImmTy(ImmTyDppFi); }
358  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
359  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
360  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
361  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
362  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
363  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
364  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
365  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
366  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
367  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
368  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
369  bool isHigh() const { return isImmTy(ImmTyHigh); }
370 
371  bool isMod() const {
372  return isClampSI() || isOModSI();
373  }
374 
375  bool isRegOrImm() const {
376  return isReg() || isImm();
377  }
378 
379  bool isRegClass(unsigned RCID) const;
380 
381  bool isInlineValue() const;
382 
383  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
384  return isRegOrInline(RCID, type) && !hasModifiers();
385  }
386 
387  bool isSCSrcB16() const {
388  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
389  }
390 
391  bool isSCSrcV2B16() const {
392  return isSCSrcB16();
393  }
394 
395  bool isSCSrcB32() const {
396  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
397  }
398 
399  bool isSCSrcB64() const {
400  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
401  }
402 
403  bool isBoolReg() const;
404 
405  bool isSCSrcF16() const {
406  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
407  }
408 
409  bool isSCSrcV2F16() const {
410  return isSCSrcF16();
411  }
412 
413  bool isSCSrcF32() const {
414  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
415  }
416 
417  bool isSCSrcF64() const {
418  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
419  }
420 
421  bool isSSrcB32() const {
422  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
423  }
424 
425  bool isSSrcB16() const {
426  return isSCSrcB16() || isLiteralImm(MVT::i16);
427  }
428 
429  bool isSSrcV2B16() const {
430  llvm_unreachable("cannot happen");
431  return isSSrcB16();
432  }
433 
434  bool isSSrcB64() const {
435  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
436  // See isVSrc64().
437  return isSCSrcB64() || isLiteralImm(MVT::i64);
438  }
439 
440  bool isSSrcF32() const {
441  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
442  }
443 
444  bool isSSrcF64() const {
445  return isSCSrcB64() || isLiteralImm(MVT::f64);
446  }
447 
448  bool isSSrcF16() const {
449  return isSCSrcB16() || isLiteralImm(MVT::f16);
450  }
451 
452  bool isSSrcV2F16() const {
453  llvm_unreachable("cannot happen");
454  return isSSrcF16();
455  }
456 
457  bool isSSrcV2FP32() const {
458  llvm_unreachable("cannot happen");
459  return isSSrcF32();
460  }
461 
462  bool isSCSrcV2FP32() const {
463  llvm_unreachable("cannot happen");
464  return isSCSrcF32();
465  }
466 
467  bool isSSrcV2INT32() const {
468  llvm_unreachable("cannot happen");
469  return isSSrcB32();
470  }
471 
472  bool isSCSrcV2INT32() const {
473  llvm_unreachable("cannot happen");
474  return isSCSrcB32();
475  }
476 
477  bool isSSrcOrLdsB32() const {
478  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
479  isLiteralImm(MVT::i32) || isExpr();
480  }
481 
482  bool isVCSrcB32() const {
483  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
484  }
485 
486  bool isVCSrcB64() const {
487  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
488  }
489 
490  bool isVCSrcB16() const {
491  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
492  }
493 
494  bool isVCSrcV2B16() const {
495  return isVCSrcB16();
496  }
497 
498  bool isVCSrcF32() const {
499  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
500  }
501 
502  bool isVCSrcF64() const {
503  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
504  }
505 
506  bool isVCSrcF16() const {
507  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
508  }
509 
510  bool isVCSrcV2F16() const {
511  return isVCSrcF16();
512  }
513 
514  bool isVSrcB32() const {
515  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
516  }
517 
518  bool isVSrcB64() const {
519  return isVCSrcF64() || isLiteralImm(MVT::i64);
520  }
521 
522  bool isVSrcB16() const {
523  return isVCSrcB16() || isLiteralImm(MVT::i16);
524  }
525 
526  bool isVSrcV2B16() const {
527  return isVSrcB16() || isLiteralImm(MVT::v2i16);
528  }
529 
530  bool isVCSrcV2FP32() const {
531  return isVCSrcF64();
532  }
533 
534  bool isVSrcV2FP32() const {
535  return isVSrcF64() || isLiteralImm(MVT::v2f32);
536  }
537 
538  bool isVCSrcV2INT32() const {
539  return isVCSrcB64();
540  }
541 
542  bool isVSrcV2INT32() const {
543  return isVSrcB64() || isLiteralImm(MVT::v2i32);
544  }
545 
546  bool isVSrcF32() const {
547  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
548  }
549 
550  bool isVSrcF64() const {
551  return isVCSrcF64() || isLiteralImm(MVT::f64);
552  }
553 
554  bool isVSrcF16() const {
555  return isVCSrcF16() || isLiteralImm(MVT::f16);
556  }
557 
558  bool isVSrcV2F16() const {
559  return isVSrcF16() || isLiteralImm(MVT::v2f16);
560  }
561 
562  bool isVISrcB32() const {
563  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
564  }
565 
566  bool isVISrcB16() const {
567  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
568  }
569 
570  bool isVISrcV2B16() const {
571  return isVISrcB16();
572  }
573 
574  bool isVISrcF32() const {
575  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
576  }
577 
578  bool isVISrcF16() const {
579  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
580  }
581 
582  bool isVISrcV2F16() const {
583  return isVISrcF16() || isVISrcB32();
584  }
585 
586  bool isVISrc_64B64() const {
587  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
588  }
589 
590  bool isVISrc_64F64() const {
591  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
592  }
593 
594  bool isVISrc_64V2FP32() const {
595  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
596  }
597 
598  bool isVISrc_64V2INT32() const {
599  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
600  }
601 
602  bool isVISrc_256B64() const {
603  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
604  }
605 
606  bool isVISrc_256F64() const {
607  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
608  }
609 
610  bool isVISrc_128B16() const {
611  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
612  }
613 
614  bool isVISrc_128V2B16() const {
615  return isVISrc_128B16();
616  }
617 
618  bool isVISrc_128B32() const {
619  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
620  }
621 
622  bool isVISrc_128F32() const {
623  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
624  }
625 
626  bool isVISrc_256V2FP32() const {
627  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
628  }
629 
630  bool isVISrc_256V2INT32() const {
631  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
632  }
633 
634  bool isVISrc_512B32() const {
635  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
636  }
637 
638  bool isVISrc_512B16() const {
639  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
640  }
641 
642  bool isVISrc_512V2B16() const {
643  return isVISrc_512B16();
644  }
645 
646  bool isVISrc_512F32() const {
647  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
648  }
649 
650  bool isVISrc_512F16() const {
651  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
652  }
653 
654  bool isVISrc_512V2F16() const {
655  return isVISrc_512F16() || isVISrc_512B32();
656  }
657 
658  bool isVISrc_1024B32() const {
659  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
660  }
661 
662  bool isVISrc_1024B16() const {
663  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
664  }
665 
666  bool isVISrc_1024V2B16() const {
667  return isVISrc_1024B16();
668  }
669 
670  bool isVISrc_1024F32() const {
671  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
672  }
673 
674  bool isVISrc_1024F16() const {
675  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
676  }
677 
678  bool isVISrc_1024V2F16() const {
679  return isVISrc_1024F16() || isVISrc_1024B32();
680  }
681 
682  bool isAISrcB32() const {
683  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
684  }
685 
686  bool isAISrcB16() const {
687  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
688  }
689 
690  bool isAISrcV2B16() const {
691  return isAISrcB16();
692  }
693 
694  bool isAISrcF32() const {
695  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
696  }
697 
698  bool isAISrcF16() const {
699  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
700  }
701 
702  bool isAISrcV2F16() const {
703  return isAISrcF16() || isAISrcB32();
704  }
705 
706  bool isAISrc_64B64() const {
707  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
708  }
709 
710  bool isAISrc_64F64() const {
711  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
712  }
713 
714  bool isAISrc_128B32() const {
715  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
716  }
717 
718  bool isAISrc_128B16() const {
719  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
720  }
721 
722  bool isAISrc_128V2B16() const {
723  return isAISrc_128B16();
724  }
725 
726  bool isAISrc_128F32() const {
727  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
728  }
729 
730  bool isAISrc_128F16() const {
731  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
732  }
733 
734  bool isAISrc_128V2F16() const {
735  return isAISrc_128F16() || isAISrc_128B32();
736  }
737 
738  bool isVISrc_128F16() const {
739  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
740  }
741 
742  bool isVISrc_128V2F16() const {
743  return isVISrc_128F16() || isVISrc_128B32();
744  }
745 
746  bool isAISrc_256B64() const {
747  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
748  }
749 
750  bool isAISrc_256F64() const {
751  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
752  }
753 
754  bool isAISrc_512B32() const {
755  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
756  }
757 
758  bool isAISrc_512B16() const {
759  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
760  }
761 
762  bool isAISrc_512V2B16() const {
763  return isAISrc_512B16();
764  }
765 
766  bool isAISrc_512F32() const {
767  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
768  }
769 
770  bool isAISrc_512F16() const {
771  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
772  }
773 
774  bool isAISrc_512V2F16() const {
775  return isAISrc_512F16() || isAISrc_512B32();
776  }
777 
778  bool isAISrc_1024B32() const {
779  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
780  }
781 
782  bool isAISrc_1024B16() const {
783  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
784  }
785 
786  bool isAISrc_1024V2B16() const {
787  return isAISrc_1024B16();
788  }
789 
790  bool isAISrc_1024F32() const {
791  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
792  }
793 
794  bool isAISrc_1024F16() const {
795  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
796  }
797 
798  bool isAISrc_1024V2F16() const {
799  return isAISrc_1024F16() || isAISrc_1024B32();
800  }
801 
802  bool isKImmFP32() const {
803  return isLiteralImm(MVT::f32);
804  }
805 
806  bool isKImmFP16() const {
807  return isLiteralImm(MVT::f16);
808  }
809 
810  bool isMem() const override {
811  return false;
812  }
813 
814  bool isExpr() const {
815  return Kind == Expression;
816  }
817 
818  bool isSoppBrTarget() const {
819  return isExpr() || isImm();
820  }
821 
822  bool isSWaitCnt() const;
823  bool isDepCtr() const;
824  bool isSDelayAlu() const;
825  bool isHwreg() const;
826  bool isSendMsg() const;
827  bool isSwizzle() const;
828  bool isSMRDOffset8() const;
829  bool isSMEMOffset() const;
830  bool isSMRDLiteralOffset() const;
831  bool isDPP8() const;
832  bool isDPPCtrl() const;
833  bool isBLGP() const;
834  bool isCBSZ() const;
835  bool isABID() const;
836  bool isGPRIdxMode() const;
837  bool isS16Imm() const;
838  bool isU16Imm() const;
839  bool isEndpgm() const;
840  bool isWaitVDST() const;
841  bool isWaitEXP() const;
842 
843  StringRef getExpressionAsToken() const {
844  assert(isExpr());
845  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
846  return S->getSymbol().getName();
847  }
848 
849  StringRef getToken() const {
850  assert(isToken());
851 
852  if (Kind == Expression)
853  return getExpressionAsToken();
854 
855  return StringRef(Tok.Data, Tok.Length);
856  }
857 
858  int64_t getImm() const {
859  assert(isImm());
860  return Imm.Val;
861  }
862 
863  void setImm(int64_t Val) {
864  assert(isImm());
865  Imm.Val = Val;
866  }
867 
868  ImmTy getImmTy() const {
869  assert(isImm());
870  return Imm.Type;
871  }
872 
873  unsigned getReg() const override {
874  assert(isRegKind());
875  return Reg.RegNo;
876  }
877 
878  SMLoc getStartLoc() const override {
879  return StartLoc;
880  }
881 
882  SMLoc getEndLoc() const override {
883  return EndLoc;
884  }
885 
886  SMRange getLocRange() const {
887  return SMRange(StartLoc, EndLoc);
888  }
889 
890  Modifiers getModifiers() const {
891  assert(isRegKind() || isImmTy(ImmTyNone));
892  return isRegKind() ? Reg.Mods : Imm.Mods;
893  }
894 
895  void setModifiers(Modifiers Mods) {
896  assert(isRegKind() || isImmTy(ImmTyNone));
897  if (isRegKind())
898  Reg.Mods = Mods;
899  else
900  Imm.Mods = Mods;
901  }
902 
903  bool hasModifiers() const {
904  return getModifiers().hasModifiers();
905  }
906 
907  bool hasFPModifiers() const {
908  return getModifiers().hasFPModifiers();
909  }
910 
911  bool hasIntModifiers() const {
912  return getModifiers().hasIntModifiers();
913  }
914 
915  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
916 
917  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
918 
919  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
920 
921  template <unsigned Bitwidth>
922  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
923 
924  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
925  addKImmFPOperands<16>(Inst, N);
926  }
927 
928  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
929  addKImmFPOperands<32>(Inst, N);
930  }
931 
932  void addRegOperands(MCInst &Inst, unsigned N) const;
933 
934  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
935  addRegOperands(Inst, N);
936  }
937 
938  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
939  if (isRegKind())
940  addRegOperands(Inst, N);
941  else if (isExpr())
942  Inst.addOperand(MCOperand::createExpr(Expr));
943  else
944  addImmOperands(Inst, N);
945  }
946 
947  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
948  Modifiers Mods = getModifiers();
949  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
950  if (isRegKind()) {
951  addRegOperands(Inst, N);
952  } else {
953  addImmOperands(Inst, N, false);
954  }
955  }
956 
957  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
958  assert(!hasIntModifiers());
959  addRegOrImmWithInputModsOperands(Inst, N);
960  }
961 
962  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
963  assert(!hasFPModifiers());
964  addRegOrImmWithInputModsOperands(Inst, N);
965  }
966 
967  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
968  Modifiers Mods = getModifiers();
969  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
970  assert(isRegKind());
971  addRegOperands(Inst, N);
972  }
973 
974  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
975  assert(!hasIntModifiers());
976  addRegWithInputModsOperands(Inst, N);
977  }
978 
979  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
980  assert(!hasFPModifiers());
981  addRegWithInputModsOperands(Inst, N);
982  }
983 
984  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
985  if (isImm())
986  addImmOperands(Inst, N);
987  else {
988  assert(isExpr());
989  Inst.addOperand(MCOperand::createExpr(Expr));
990  }
991  }
992 
993  static void printImmTy(raw_ostream& OS, ImmTy Type) {
994  switch (Type) {
995  case ImmTyNone: OS << "None"; break;
996  case ImmTyGDS: OS << "GDS"; break;
997  case ImmTyLDS: OS << "LDS"; break;
998  case ImmTyOffen: OS << "Offen"; break;
999  case ImmTyIdxen: OS << "Idxen"; break;
1000  case ImmTyAddr64: OS << "Addr64"; break;
1001  case ImmTyOffset: OS << "Offset"; break;
1002  case ImmTyInstOffset: OS << "InstOffset"; break;
1003  case ImmTyOffset0: OS << "Offset0"; break;
1004  case ImmTyOffset1: OS << "Offset1"; break;
1005  case ImmTyCPol: OS << "CPol"; break;
1006  case ImmTySWZ: OS << "SWZ"; break;
1007  case ImmTyTFE: OS << "TFE"; break;
1008  case ImmTyD16: OS << "D16"; break;
1009  case ImmTyFORMAT: OS << "FORMAT"; break;
1010  case ImmTyClampSI: OS << "ClampSI"; break;
1011  case ImmTyOModSI: OS << "OModSI"; break;
1012  case ImmTyDPP8: OS << "DPP8"; break;
1013  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1014  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1015  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1016  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1017  case ImmTyDppFi: OS << "FI"; break;
1018  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1019  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1020  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1021  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1022  case ImmTyDMask: OS << "DMask"; break;
1023  case ImmTyDim: OS << "Dim"; break;
1024  case ImmTyUNorm: OS << "UNorm"; break;
1025  case ImmTyDA: OS << "DA"; break;
1026  case ImmTyR128A16: OS << "R128A16"; break;
1027  case ImmTyA16: OS << "A16"; break;
1028  case ImmTyLWE: OS << "LWE"; break;
1029  case ImmTyOff: OS << "Off"; break;
1030  case ImmTyExpTgt: OS << "ExpTgt"; break;
1031  case ImmTyExpCompr: OS << "ExpCompr"; break;
1032  case ImmTyExpVM: OS << "ExpVM"; break;
1033  case ImmTyHwreg: OS << "Hwreg"; break;
1034  case ImmTySendMsg: OS << "SendMsg"; break;
1035  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1036  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1037  case ImmTyAttrChan: OS << "AttrChan"; break;
1038  case ImmTyOpSel: OS << "OpSel"; break;
1039  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1040  case ImmTyNegLo: OS << "NegLo"; break;
1041  case ImmTyNegHi: OS << "NegHi"; break;
1042  case ImmTySwizzle: OS << "Swizzle"; break;
1043  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1044  case ImmTyHigh: OS << "High"; break;
1045  case ImmTyBLGP: OS << "BLGP"; break;
1046  case ImmTyCBSZ: OS << "CBSZ"; break;
1047  case ImmTyABID: OS << "ABID"; break;
1048  case ImmTyEndpgm: OS << "Endpgm"; break;
1049  case ImmTyWaitVDST: OS << "WaitVDST"; break;
1050  case ImmTyWaitEXP: OS << "WaitEXP"; break;
1051  }
1052  }
1053 
1054  void print(raw_ostream &OS) const override {
1055  switch (Kind) {
1056  case Register:
1057  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1058  break;
1059  case Immediate:
1060  OS << '<' << getImm();
1061  if (getImmTy() != ImmTyNone) {
1062  OS << " type: "; printImmTy(OS, getImmTy());
1063  }
1064  OS << " mods: " << Imm.Mods << '>';
1065  break;
1066  case Token:
1067  OS << '\'' << getToken() << '\'';
1068  break;
1069  case Expression:
1070  OS << "<expr " << *Expr << '>';
1071  break;
1072  }
1073  }
1074 
1075  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1076  int64_t Val, SMLoc Loc,
1077  ImmTy Type = ImmTyNone,
1078  bool IsFPImm = false) {
1079  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1080  Op->Imm.Val = Val;
1081  Op->Imm.IsFPImm = IsFPImm;
1082  Op->Imm.Kind = ImmKindTyNone;
1083  Op->Imm.Type = Type;
1084  Op->Imm.Mods = Modifiers();
1085  Op->StartLoc = Loc;
1086  Op->EndLoc = Loc;
1087  return Op;
1088  }
1089 
1090  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1091  StringRef Str, SMLoc Loc,
1092  bool HasExplicitEncodingSize = true) {
1093  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1094  Res->Tok.Data = Str.data();
1095  Res->Tok.Length = Str.size();
1096  Res->StartLoc = Loc;
1097  Res->EndLoc = Loc;
1098  return Res;
1099  }
1100 
1101  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1102  unsigned RegNo, SMLoc S,
1103  SMLoc E) {
1104  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1105  Op->Reg.RegNo = RegNo;
1106  Op->Reg.Mods = Modifiers();
1107  Op->StartLoc = S;
1108  Op->EndLoc = E;
1109  return Op;
1110  }
1111 
1112  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1113  const class MCExpr *Expr, SMLoc S) {
1114  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1115  Op->Expr = Expr;
1116  Op->StartLoc = S;
1117  Op->EndLoc = S;
1118  return Op;
1119  }
1120 };
1121 
1122 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1123  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1124  return OS;
1125 }
1126 
1127 //===----------------------------------------------------------------------===//
1128 // AsmParser
1129 //===----------------------------------------------------------------------===//
1130 
1131 // Holds info related to the current kernel, e.g. count of SGPRs used.
1132 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1133 // .amdgpu_hsa_kernel or at EOF.
1134 class KernelScopeInfo {
1135  int SgprIndexUnusedMin = -1;
1136  int VgprIndexUnusedMin = -1;
1137  int AgprIndexUnusedMin = -1;
1138  MCContext *Ctx = nullptr;
1139  MCSubtargetInfo const *MSTI = nullptr;
1140 
1141  void usesSgprAt(int i) {
1142  if (i >= SgprIndexUnusedMin) {
1143  SgprIndexUnusedMin = ++i;
1144  if (Ctx) {
1145  MCSymbol* const Sym =
1146  Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1147  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1148  }
1149  }
1150  }
1151 
1152  void usesVgprAt(int i) {
1153  if (i >= VgprIndexUnusedMin) {
1154  VgprIndexUnusedMin = ++i;
1155  if (Ctx) {
1156  MCSymbol* const Sym =
1157  Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1158  int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1159  VgprIndexUnusedMin);
1160  Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1161  }
1162  }
1163  }
1164 
1165  void usesAgprAt(int i) {
1166  // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1167  if (!hasMAIInsts(*MSTI))
1168  return;
1169 
1170  if (i >= AgprIndexUnusedMin) {
1171  AgprIndexUnusedMin = ++i;
1172  if (Ctx) {
1173  MCSymbol* const Sym =
1174  Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1175  Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1176 
1177  // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1178  MCSymbol* const vSym =
1179  Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1180  int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1181  VgprIndexUnusedMin);
1182  vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1183  }
1184  }
1185  }
1186 
1187 public:
1188  KernelScopeInfo() = default;
1189 
1190  void initialize(MCContext &Context) {
1191  Ctx = &Context;
1192  MSTI = Ctx->getSubtargetInfo();
1193 
1194  usesSgprAt(SgprIndexUnusedMin = -1);
1195  usesVgprAt(VgprIndexUnusedMin = -1);
1196  if (hasMAIInsts(*MSTI)) {
1197  usesAgprAt(AgprIndexUnusedMin = -1);
1198  }
1199  }
1200 
1201  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1202  unsigned RegWidth) {
1203  switch (RegKind) {
1204  case IS_SGPR:
1205  usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206  break;
1207  case IS_AGPR:
1208  usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209  break;
1210  case IS_VGPR:
1211  usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1212  break;
1213  default:
1214  break;
1215  }
1216  }
1217 };
1218 
1219 class AMDGPUAsmParser : public MCTargetAsmParser {
1220  MCAsmParser &Parser;
1221 
1222  // Number of extra operands parsed after the first optional operand.
1223  // This may be necessary to skip hardcoded mandatory operands.
1224  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1225 
1226  unsigned ForcedEncodingSize = 0;
1227  bool ForcedDPP = false;
1228  bool ForcedSDWA = false;
1229  KernelScopeInfo KernelScope;
1230  unsigned CPolSeen;
1231 
1232  /// @name Auto-generated Match Functions
1233  /// {
1234 
1235 #define GET_ASSEMBLER_HEADER
1236 #include "AMDGPUGenAsmMatcher.inc"
1237 
1238  /// }
1239 
1240 private:
1241  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1242  bool OutOfRangeError(SMRange Range);
1243  /// Calculate VGPR/SGPR blocks required for given target, reserved
1244  /// registers, and user-specified NextFreeXGPR values.
1245  ///
1246  /// \param Features [in] Target features, used for bug corrections.
1247  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1248  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1249  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1250  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1251  /// descriptor field, if valid.
1252  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1253  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1254  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1255  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1256  /// \param VGPRBlocks [out] Result VGPR block count.
1257  /// \param SGPRBlocks [out] Result SGPR block count.
1258  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1259  bool FlatScrUsed, bool XNACKUsed,
1260  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1261  SMRange VGPRRange, unsigned NextFreeSGPR,
1262  SMRange SGPRRange, unsigned &VGPRBlocks,
1263  unsigned &SGPRBlocks);
1264  bool ParseDirectiveAMDGCNTarget();
1265  bool ParseDirectiveAMDHSAKernel();
1266  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1267  bool ParseDirectiveHSACodeObjectVersion();
1268  bool ParseDirectiveHSACodeObjectISA();
1269  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1270  bool ParseDirectiveAMDKernelCodeT();
1271  // TODO: Possibly make subtargetHasRegister const.
1272  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1273  bool ParseDirectiveAMDGPUHsaKernel();
1274 
1275  bool ParseDirectiveISAVersion();
1276  bool ParseDirectiveHSAMetadata();
1277  bool ParseDirectivePALMetadataBegin();
1278  bool ParseDirectivePALMetadata();
1279  bool ParseDirectiveAMDGPULDS();
1280 
1281  /// Common code to parse out a block of text (typically YAML) between start and
1282  /// end directives.
1283  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1284  const char *AssemblerDirectiveEnd,
1285  std::string &CollectString);
1286 
1287  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1288  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1289  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1290  unsigned &RegNum, unsigned &RegWidth,
1291  bool RestoreOnFailure = false);
1292  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1293  unsigned &RegNum, unsigned &RegWidth,
1294  SmallVectorImpl<AsmToken> &Tokens);
1295  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1296  unsigned &RegWidth,
1297  SmallVectorImpl<AsmToken> &Tokens);
1298  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1299  unsigned &RegWidth,
1300  SmallVectorImpl<AsmToken> &Tokens);
1301  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1302  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1303  bool ParseRegRange(unsigned& Num, unsigned& Width);
1304  unsigned getRegularReg(RegisterKind RegKind,
1305  unsigned RegNum,
1306  unsigned RegWidth,
1307  SMLoc Loc);
1308 
1309  bool isRegister();
1310  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1311  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1312  void initializeGprCountSymbol(RegisterKind RegKind);
1313  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1314  unsigned RegWidth);
1315  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1316  bool IsAtomic, bool IsLds = false);
1317  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1318  bool IsGdsHardcoded);
1319 
1320 public:
1321  enum AMDGPUMatchResultTy {
1322  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1323  };
1324  enum OperandMode {
1325  OperandMode_Default,
1326  OperandMode_NSA,
1327  };
1328 
1329  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1330 
1331  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1332  const MCInstrInfo &MII,
1333  const MCTargetOptions &Options)
1334  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1335  MCAsmParserExtension::Initialize(Parser);
1336 
1337  if (getFeatureBits().none()) {
1338  // Set default features.
1339  copySTI().ToggleFeature("southern-islands");
1340  }
1341 
1342  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1343 
1344  {
1345  // TODO: make those pre-defined variables read-only.
1346  // Currently there is none suitable machinery in the core llvm-mc for this.
1347  // MCSymbol::isRedefinable is intended for another purpose, and
1348  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1349  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1350  MCContext &Ctx = getContext();
1351  if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352  MCSymbol *Sym =
1353  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1354  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1355  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1356  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1357  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1358  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1359  } else {
1360  MCSymbol *Sym =
1361  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1362  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1363  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1364  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1365  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1366  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1367  }
1368  if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1369  initializeGprCountSymbol(IS_VGPR);
1370  initializeGprCountSymbol(IS_SGPR);
1371  } else
1372  KernelScope.initialize(getContext());
1373  }
1374  }
1375 
1376  bool hasMIMG_R128() const {
1377  return AMDGPU::hasMIMG_R128(getSTI());
1378  }
1379 
1380  bool hasPackedD16() const {
1381  return AMDGPU::hasPackedD16(getSTI());
1382  }
1383 
1384  bool hasGFX10A16() const {
1385  return AMDGPU::hasGFX10A16(getSTI());
1386  }
1387 
1388  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1389 
1390  bool isSI() const {
1391  return AMDGPU::isSI(getSTI());
1392  }
1393 
1394  bool isCI() const {
1395  return AMDGPU::isCI(getSTI());
1396  }
1397 
1398  bool isVI() const {
1399  return AMDGPU::isVI(getSTI());
1400  }
1401 
1402  bool isGFX9() const {
1403  return AMDGPU::isGFX9(getSTI());
1404  }
1405 
1406  // TODO: isGFX90A is also true for GFX940. We need to clean it.
1407  bool isGFX90A() const {
1408  return AMDGPU::isGFX90A(getSTI());
1409  }
1410 
1411  bool isGFX940() const {
1412  return AMDGPU::isGFX940(getSTI());
1413  }
1414 
1415  bool isGFX9Plus() const {
1416  return AMDGPU::isGFX9Plus(getSTI());
1417  }
1418 
1419  bool isGFX10() const {
1420  return AMDGPU::isGFX10(getSTI());
1421  }
1422 
1423  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1424 
1425  bool isGFX11() const {
1426  return AMDGPU::isGFX11(getSTI());
1427  }
1428 
1429  bool isGFX11Plus() const {
1430  return AMDGPU::isGFX11Plus(getSTI());
1431  }
1432 
1433  bool isGFX10_BEncoding() const {
1434  return AMDGPU::isGFX10_BEncoding(getSTI());
1435  }
1436 
1437  bool hasInv2PiInlineImm() const {
1438  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1439  }
1440 
1441  bool hasFlatOffsets() const {
1442  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1443  }
1444 
1445  bool hasArchitectedFlatScratch() const {
1446  return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1447  }
1448 
1449  bool hasSGPR102_SGPR103() const {
1450  return !isVI() && !isGFX9();
1451  }
1452 
1453  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1454 
1455  bool hasIntClamp() const {
1456  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1457  }
1458 
1459  AMDGPUTargetStreamer &getTargetStreamer() {
1460  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1461  return static_cast<AMDGPUTargetStreamer &>(TS);
1462  }
1463 
1464  const MCRegisterInfo *getMRI() const {
1465  // We need this const_cast because for some reason getContext() is not const
1466  // in MCAsmParser.
1467  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1468  }
1469 
1470  const MCInstrInfo *getMII() const {
1471  return &MII;
1472  }
1473 
1474  const FeatureBitset &getFeatureBits() const {
1475  return getSTI().getFeatureBits();
1476  }
1477 
1478  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1479  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1480  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1481 
1482  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1483  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1484  bool isForcedDPP() const { return ForcedDPP; }
1485  bool isForcedSDWA() const { return ForcedSDWA; }
1486  ArrayRef<unsigned> getMatchedVariants() const;
1487  StringRef getMatchedVariantName() const;
1488 
1489  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1490  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1491  bool RestoreOnFailure);
1492  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1493  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1494  SMLoc &EndLoc) override;
1495  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1496  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1497  unsigned Kind) override;
1498  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1499  OperandVector &Operands, MCStreamer &Out,
1500  uint64_t &ErrorInfo,
1501  bool MatchingInlineAsm) override;
1502  bool ParseDirective(AsmToken DirectiveID) override;
1503  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1504  OperandMode Mode = OperandMode_Default);
1505  StringRef parseMnemonicSuffix(StringRef Name);
1506  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1507  SMLoc NameLoc, OperandVector &Operands) override;
1508  //bool ProcessInstruction(MCInst &Inst);
1509 
1510  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1511 
1512  OperandMatchResultTy
1513  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1514  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1515  bool (*ConvertResult)(int64_t &) = nullptr);
1516 
1517  OperandMatchResultTy
1518  parseOperandArrayWithPrefix(const char *Prefix,
1519  OperandVector &Operands,
1520  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1521  bool (*ConvertResult)(int64_t&) = nullptr);
1522 
1523  OperandMatchResultTy
1524  parseNamedBit(StringRef Name, OperandVector &Operands,
1525  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1526  OperandMatchResultTy parseCPol(OperandVector &Operands);
1527  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1528  StringRef &Value,
1529  SMLoc &StringLoc);
1530 
1531  bool isModifier();
1532  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1533  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1534  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1535  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1536  bool parseSP3NegModifier();
1537  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1538  OperandMatchResultTy parseReg(OperandVector &Operands);
1539  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1540  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1541  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1542  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1543  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1544  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1545  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1546  OperandMatchResultTy parseUfmt(int64_t &Format);
1547  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1548  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1549  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1550  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1551  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1552  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1553  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1554 
1555  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1556  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1557  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1558  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1559 
1560  bool parseCnt(int64_t &IntVal);
1561  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1562 
1563  bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1564  void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1565  OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1566 
1567  bool parseDelay(int64_t &Delay);
1568  OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1569 
1570  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1571 
1572 private:
1573  struct OperandInfoTy {
1574  SMLoc Loc;
1575  int64_t Id;
1576  bool IsSymbolic = false;
1577  bool IsDefined = false;
1578 
1579  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1580  };
1581 
1582  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1583  bool validateSendMsg(const OperandInfoTy &Msg,
1584  const OperandInfoTy &Op,
1585  const OperandInfoTy &Stream);
1586 
1587  bool parseHwregBody(OperandInfoTy &HwReg,
1588  OperandInfoTy &Offset,
1589  OperandInfoTy &Width);
1590  bool validateHwreg(const OperandInfoTy &HwReg,
1591  const OperandInfoTy &Offset,
1592  const OperandInfoTy &Width);
1593 
1594  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1595  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1596  SMLoc getBLGPLoc(const OperandVector &Operands) const;
1597 
1598  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1599  const OperandVector &Operands) const;
1600  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1601  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1602  SMLoc getLitLoc(const OperandVector &Operands) const;
1603  SMLoc getConstLoc(const OperandVector &Operands) const;
1604 
1605  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1606  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1607  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1608  bool validateSOPLiteral(const MCInst &Inst) const;
1609  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1610  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1611  bool validateIntClampSupported(const MCInst &Inst);
1612  bool validateMIMGAtomicDMask(const MCInst &Inst);
1613  bool validateMIMGGatherDMask(const MCInst &Inst);
1614  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1615  Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1616  bool validateMIMGAddrSize(const MCInst &Inst);
1617  bool validateMIMGD16(const MCInst &Inst);
1618  bool validateMIMGDim(const MCInst &Inst);
1619  bool validateMIMGMSAA(const MCInst &Inst);
1620  bool validateOpSel(const MCInst &Inst);
1621  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1622  bool validateVccOperand(unsigned Reg) const;
1623  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1624  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1625  bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1626  bool validateAGPRLdSt(const MCInst &Inst) const;
1627  bool validateVGPRAlign(const MCInst &Inst) const;
1628  bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1629  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1630  bool validateDivScale(const MCInst &Inst);
1631  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1632  const SMLoc &IDLoc);
1633  bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1634  const SMLoc &IDLoc);
1635  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1636  unsigned getConstantBusLimit(unsigned Opcode) const;
1637  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1638  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1639  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1640 
1641  bool isSupportedMnemo(StringRef Mnemo,
1642  const FeatureBitset &FBS);
1643  bool isSupportedMnemo(StringRef Mnemo,
1644  const FeatureBitset &FBS,
1645  ArrayRef<unsigned> Variants);
1646  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1647 
1648  bool isId(const StringRef Id) const;
1649  bool isId(const AsmToken &Token, const StringRef Id) const;
1650  bool isToken(const AsmToken::TokenKind Kind) const;
1651  bool trySkipId(const StringRef Id);
1652  bool trySkipId(const StringRef Pref, const StringRef Id);
1653  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1654  bool trySkipToken(const AsmToken::TokenKind Kind);
1655  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1656  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1657  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1658 
1659  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1660  AsmToken::TokenKind getTokenKind() const;
1661  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1662  bool parseExpr(OperandVector &Operands);
1663  StringRef getTokenStr() const;
1664  AsmToken peekToken();
1665  AsmToken getToken() const;
1666  SMLoc getLoc() const;
1667  void lex();
1668 
1669 public:
1670  void onBeginOfFile() override;
1671 
1672  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1673  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1674 
1675  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1676  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1677  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1678  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1679  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1680  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1681 
1682  bool parseSwizzleOperand(int64_t &Op,
1683  const unsigned MinVal,
1684  const unsigned MaxVal,
1685  const StringRef ErrMsg,
1686  SMLoc &Loc);
1687  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1688  const unsigned MinVal,
1689  const unsigned MaxVal,
1690  const StringRef ErrMsg);
1691  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1692  bool parseSwizzleOffset(int64_t &Imm);
1693  bool parseSwizzleMacro(int64_t &Imm);
1694  bool parseSwizzleQuadPerm(int64_t &Imm);
1695  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1696  bool parseSwizzleBroadcast(int64_t &Imm);
1697  bool parseSwizzleSwap(int64_t &Imm);
1698  bool parseSwizzleReverse(int64_t &Imm);
1699 
1700  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1701  int64_t parseGPRIdxMacro();
1702 
1703  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1704  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1705  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1706  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1707 
1708  AMDGPUOperand::Ptr defaultCPol() const;
1709 
1710  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1711  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1712  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1713  AMDGPUOperand::Ptr defaultFlatOffset() const;
1714 
1715  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1716 
1717  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1718  OptionalImmIndexMap &OptionalIdx);
1719  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1720  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1721  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1722  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1723  OptionalImmIndexMap &OptionalIdx);
1724 
1725  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1726  void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1727 
1728  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1729  bool IsAtomic = false);
1730  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1731  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1732 
1733  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1734 
1735  bool parseDimId(unsigned &Encoding);
1736  OperandMatchResultTy parseDim(OperandVector &Operands);
1737  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1738  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1739  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1740  int64_t parseDPPCtrlSel(StringRef Ctrl);
1741  int64_t parseDPPCtrlPerm();
1742  AMDGPUOperand::Ptr defaultRowMask() const;
1743  AMDGPUOperand::Ptr defaultBankMask() const;
1744  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1745  AMDGPUOperand::Ptr defaultFI() const;
1746  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1747  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1748 
1749  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1750  AMDGPUOperand::ImmTy Type);
1751  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1752  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1753  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1754  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1755  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1756  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1757  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1758  uint64_t BasicInstType,
1759  bool SkipDstVcc = false,
1760  bool SkipSrcVcc = false);
1761 
1762  AMDGPUOperand::Ptr defaultBLGP() const;
1763  AMDGPUOperand::Ptr defaultCBSZ() const;
1764  AMDGPUOperand::Ptr defaultABID() const;
1765 
1766  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1767  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1768 
1769  AMDGPUOperand::Ptr defaultWaitVDST() const;
1770  AMDGPUOperand::Ptr defaultWaitEXP() const;
1771 };
1772 
1773 struct OptionalOperand {
1774  const char *Name;
1775  AMDGPUOperand::ImmTy Type;
1776  bool IsBit;
1777  bool (*ConvertResult)(int64_t&);
1778 };
1779 
1780 } // end anonymous namespace
1781 
1782 // May be called with integer type with equivalent bitwidth.
1783 static const fltSemantics *getFltSemantics(unsigned Size) {
1784  switch (Size) {
1785  case 4:
1786  return &APFloat::IEEEsingle();
1787  case 8:
1788  return &APFloat::IEEEdouble();
1789  case 2:
1790  return &APFloat::IEEEhalf();
1791  default:
1792  llvm_unreachable("unsupported fp type");
1793  }
1794 }
1795 
1796 static const fltSemantics *getFltSemantics(MVT VT) {
1797  return getFltSemantics(VT.getSizeInBits() / 8);
1798 }
1799 
1800 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1801  switch (OperandType) {
1802  case AMDGPU::OPERAND_REG_IMM_INT32:
1803  case AMDGPU::OPERAND_REG_IMM_FP32:
1804  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1805  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1806  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1807  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1808  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1809  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1810  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1811  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1812  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1813  case AMDGPU::OPERAND_KIMM32:
1814  return &APFloat::IEEEsingle();
1815  case AMDGPU::OPERAND_REG_IMM_INT64:
1816  case AMDGPU::OPERAND_REG_IMM_FP64:
1817  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1818  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1819  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1820  return &APFloat::IEEEdouble();
1821  case AMDGPU::OPERAND_REG_IMM_INT16:
1822  case AMDGPU::OPERAND_REG_IMM_FP16:
1823  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1824  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1825  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1826  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1827  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1828  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1829  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1830  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1831  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1832  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1833  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1834  case AMDGPU::OPERAND_KIMM16:
1835  return &APFloat::IEEEhalf();
1836  default:
1837  llvm_unreachable("unsupported fp type");
1838  }
1839 }
1840 
1841 //===----------------------------------------------------------------------===//
1842 // Operand
1843 //===----------------------------------------------------------------------===//
1844 
1845 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1846  bool Lost;
1847 
1848  // Convert literal to single precision
1849  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1850  APFloat::rmNearestTiesToEven,
1851  &Lost);
1852  // We allow precision lost but not overflow or underflow
1853  if (Status != APFloat::opOK &&
1854  Lost &&
1855  ((Status & APFloat::opOverflow) != 0 ||
1856  (Status & APFloat::opUnderflow) != 0)) {
1857  return false;
1858  }
1859 
1860  return true;
1861 }
1862 
1863 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1864  return isUIntN(Size, Val) || isIntN(Size, Val);
1865 }
1866 
1867 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1868  if (VT.getScalarType() == MVT::i16) {
1869  // FP immediate values are broken.
1870  return isInlinableIntLiteral(Val);
1871  }
1872 
1873  // f16/v2f16 operands work correctly for all values.
1874  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1875 }
1876 
1877 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1878 
1879  // This is a hack to enable named inline values like
1880  // shared_base with both 32-bit and 64-bit operands.
1881  // Note that these values are defined as
1882  // 32-bit operands only.
1883  if (isInlineValue()) {
1884  return true;
1885  }
1886 
1887  if (!isImmTy(ImmTyNone)) {
1888  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1889  return false;
1890  }
1891  // TODO: We should avoid using host float here. It would be better to
1892  // check the float bit values which is what a few other places do.
1893  // We've had bot failures before due to weird NaN support on mips hosts.
1894 
1895  APInt Literal(64, Imm.Val);
1896 
1897  if (Imm.IsFPImm) { // We got fp literal token
1898  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1899  return AMDGPU::isInlinableLiteral64(Imm.Val,
1900  AsmParser->hasInv2PiInlineImm());
1901  }
1902 
1903  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1904  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1905  return false;
1906 
1907  if (type.getScalarSizeInBits() == 16) {
1908  return isInlineableLiteralOp16(
1909  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1910  type, AsmParser->hasInv2PiInlineImm());
1911  }
1912 
1913  // Check if single precision literal is inlinable
1915  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1916  AsmParser->hasInv2PiInlineImm());
1917  }
1918 
1919  // We got int literal token.
1920  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1921  return AMDGPU::isInlinableLiteral64(Imm.Val,
1922  AsmParser->hasInv2PiInlineImm());
1923  }
1924 
1925  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1926  return false;
1927  }
1928 
1929  if (type.getScalarSizeInBits() == 16) {
1930  return isInlineableLiteralOp16(
1931  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1932  type, AsmParser->hasInv2PiInlineImm());
1933  }
1934 
1936  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1937  AsmParser->hasInv2PiInlineImm());
1938 }
1939 
1940 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1941  // Check that this immediate can be added as literal
1942  if (!isImmTy(ImmTyNone)) {
1943  return false;
1944  }
1945 
1946  if (!Imm.IsFPImm) {
1947  // We got int literal token.
1948 
1949  if (type == MVT::f64 && hasFPModifiers()) {
1950  // Cannot apply fp modifiers to int literals preserving the same semantics
1951  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1952  // disable these cases.
1953  return false;
1954  }
1955 
1956  unsigned Size = type.getSizeInBits();
1957  if (Size == 64)
1958  Size = 32;
1959 
1960  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1961  // types.
1962  return isSafeTruncation(Imm.Val, Size);
1963  }
1964 
1965  // We got fp literal token
1966  if (type == MVT::f64) { // Expected 64-bit fp operand
1967  // We would set low 64-bits of literal to zeroes but we accept this literals
1968  return true;
1969  }
1970 
1971  if (type == MVT::i64) { // Expected 64-bit int operand
1972  // We don't allow fp literals in 64-bit integer instructions. It is
1973  // unclear how we should encode them.
1974  return false;
1975  }
1976 
1977  // We allow fp literals with f16x2 operands assuming that the specified
1978  // literal goes into the lower half and the upper half is zero. We also
1979  // require that the literal may be losslessly converted to f16.
1980  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1981  (type == MVT::v2i16)? MVT::i16 :
1982  (type == MVT::v2f32)? MVT::f32 : type;
1983 
1984  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1985  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1986 }
1987 
1988 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1989  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1990 }
1991 
1992 bool AMDGPUOperand::isVRegWithInputMods() const {
1993  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1994  // GFX90A allows DPP on 64-bit operands.
1995  (isRegClass(AMDGPU::VReg_64RegClassID) &&
1996  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1997 }
1998 
1999 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2000  if (AsmParser->isVI())
2001  return isVReg32();
2002  else if (AsmParser->isGFX9Plus())
2003  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2004  else
2005  return false;
2006 }
2007 
2008 bool AMDGPUOperand::isSDWAFP16Operand() const {
2009  return isSDWAOperand(MVT::f16);
2010 }
2011 
2012 bool AMDGPUOperand::isSDWAFP32Operand() const {
2013  return isSDWAOperand(MVT::f32);
2014 }
2015 
2016 bool AMDGPUOperand::isSDWAInt16Operand() const {
2017  return isSDWAOperand(MVT::i16);
2018 }
2019 
2020 bool AMDGPUOperand::isSDWAInt32Operand() const {
2021  return isSDWAOperand(MVT::i32);
2022 }
2023 
2024 bool AMDGPUOperand::isBoolReg() const {
2025  auto FB = AsmParser->getFeatureBits();
2026  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2027  (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2028 }
2029 
2030 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2031 {
2032  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2033  assert(Size == 2 || Size == 4 || Size == 8);
2034 
2035  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2036 
2037  if (Imm.Mods.Abs) {
2038  Val &= ~FpSignMask;
2039  }
2040  if (Imm.Mods.Neg) {
2041  Val ^= FpSignMask;
2042  }
2043 
2044  return Val;
2045 }
2046 
2047 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2048  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2049  Inst.getNumOperands())) {
2050  addLiteralImmOperand(Inst, Imm.Val,
2051  ApplyModifiers &
2052  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2053  } else {
2054  assert(!isImmTy(ImmTyNone) || !hasModifiers());
2055  Inst.addOperand(MCOperand::createImm(Imm.Val));
2056  setImmKindNone();
2057  }
2058 }
2059 
2060 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2061  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2062  auto OpNum = Inst.getNumOperands();
2063  // Check that this operand accepts literals
2064  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2065 
2066  if (ApplyModifiers) {
2067  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2068  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2069  Val = applyInputFPModifiers(Val, Size);
2070  }
2071 
2072  APInt Literal(64, Val);
2073  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2074 
2075  if (Imm.IsFPImm) { // We got fp literal token
2076  switch (OpTy) {
2082  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2083  AsmParser->hasInv2PiInlineImm())) {
2084  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2085  setImmKindConst();
2086  return;
2087  }
2088 
2089  // Non-inlineable
2090  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2091  // For fp operands we check if low 32 bits are zeros
2092  if (Literal.getLoBits(32) != 0) {
2093  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2094  "Can't encode literal as exact 64-bit floating-point operand. "
2095  "Low 32-bits will be set to zero");
2096  }
2097 
2098  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2099  setImmKindLiteral();
2100  return;
2101  }
2102 
2103  // We don't allow fp literals in 64-bit integer instructions. It is
2104  // unclear how we should encode them. This case should be checked earlier
2105  // in predicate methods (isLiteralImm())
2106  llvm_unreachable("fp literal in 64-bit integer instruction.");
2107 
2133  case AMDGPU::OPERAND_KIMM16: {
2134  bool lost;
2135  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2136  // Convert literal to single precision
2137  FPLiteral.convert(*getOpFltSemantics(OpTy),
2139  // We allow precision lost but not overflow or underflow. This should be
2140  // checked earlier in isLiteralImm()
2141 
2142  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2143  Inst.addOperand(MCOperand::createImm(ImmVal));
2144  setImmKindLiteral();
2145  return;
2146  }
2147  default:
2148  llvm_unreachable("invalid operand size");
2149  }
2150 
2151  return;
2152  }
2153 
2154  // We got int literal token.
2155  // Only sign extend inline immediates.
2156  switch (OpTy) {
2170  if (isSafeTruncation(Val, 32) &&
2171  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2172  AsmParser->hasInv2PiInlineImm())) {
2173  Inst.addOperand(MCOperand::createImm(Val));
2174  setImmKindConst();
2175  return;
2176  }
2177 
2178  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2179  setImmKindLiteral();
2180  return;
2181 
2187  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2188  Inst.addOperand(MCOperand::createImm(Val));
2189  setImmKindConst();
2190  return;
2191  }
2192 
2194  setImmKindLiteral();
2195  return;
2196 
2204  if (isSafeTruncation(Val, 16) &&
2205  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2206  AsmParser->hasInv2PiInlineImm())) {
2207  Inst.addOperand(MCOperand::createImm(Val));
2208  setImmKindConst();
2209  return;
2210  }
2211 
2212  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2213  setImmKindLiteral();
2214  return;
2215 
2220  assert(isSafeTruncation(Val, 16));
2221  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2222  AsmParser->hasInv2PiInlineImm()));
2223 
2224  Inst.addOperand(MCOperand::createImm(Val));
2225  return;
2226  }
2228  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2229  setImmKindNone();
2230  return;
2232  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2233  setImmKindNone();
2234  return;
2235  default:
2236  llvm_unreachable("invalid operand size");
2237  }
2238 }
2239 
2240 template <unsigned Bitwidth>
2241 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2242  APInt Literal(64, Imm.Val);
2243  setImmKindNone();
2244 
2245  if (!Imm.IsFPImm) {
2246  // We got int literal token.
2247  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2248  return;
2249  }
2250 
2251  bool Lost;
2252  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2253  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2255  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2256 }
2257 
2258 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2259  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2260 }
2261 
2262 static bool isInlineValue(unsigned Reg) {
2263  switch (Reg) {
2264  case AMDGPU::SRC_SHARED_BASE:
2265  case AMDGPU::SRC_SHARED_LIMIT:
2266  case AMDGPU::SRC_PRIVATE_BASE:
2267  case AMDGPU::SRC_PRIVATE_LIMIT:
2268  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2269  return true;
2270  case AMDGPU::SRC_VCCZ:
2271  case AMDGPU::SRC_EXECZ:
2272  case AMDGPU::SRC_SCC:
2273  return true;
2274  case AMDGPU::SGPR_NULL:
2275  return true;
2276  default:
2277  return false;
2278  }
2279 }
2280 
2281 bool AMDGPUOperand::isInlineValue() const {
2282  return isRegKind() && ::isInlineValue(getReg());
2283 }
2284 
2285 //===----------------------------------------------------------------------===//
2286 // AsmParser
2287 //===----------------------------------------------------------------------===//
2288 
2289 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2290  if (Is == IS_VGPR) {
2291  switch (RegWidth) {
2292  default: return -1;
2293  case 32:
2294  return AMDGPU::VGPR_32RegClassID;
2295  case 64:
2296  return AMDGPU::VReg_64RegClassID;
2297  case 96:
2298  return AMDGPU::VReg_96RegClassID;
2299  case 128:
2300  return AMDGPU::VReg_128RegClassID;
2301  case 160:
2302  return AMDGPU::VReg_160RegClassID;
2303  case 192:
2304  return AMDGPU::VReg_192RegClassID;
2305  case 224:
2306  return AMDGPU::VReg_224RegClassID;
2307  case 256:
2308  return AMDGPU::VReg_256RegClassID;
2309  case 512:
2310  return AMDGPU::VReg_512RegClassID;
2311  case 1024:
2312  return AMDGPU::VReg_1024RegClassID;
2313  }
2314  } else if (Is == IS_TTMP) {
2315  switch (RegWidth) {
2316  default: return -1;
2317  case 32:
2318  return AMDGPU::TTMP_32RegClassID;
2319  case 64:
2320  return AMDGPU::TTMP_64RegClassID;
2321  case 128:
2322  return AMDGPU::TTMP_128RegClassID;
2323  case 256:
2324  return AMDGPU::TTMP_256RegClassID;
2325  case 512:
2326  return AMDGPU::TTMP_512RegClassID;
2327  }
2328  } else if (Is == IS_SGPR) {
2329  switch (RegWidth) {
2330  default: return -1;
2331  case 32:
2332  return AMDGPU::SGPR_32RegClassID;
2333  case 64:
2334  return AMDGPU::SGPR_64RegClassID;
2335  case 96:
2336  return AMDGPU::SGPR_96RegClassID;
2337  case 128:
2338  return AMDGPU::SGPR_128RegClassID;
2339  case 160:
2340  return AMDGPU::SGPR_160RegClassID;
2341  case 192:
2342  return AMDGPU::SGPR_192RegClassID;
2343  case 224:
2344  return AMDGPU::SGPR_224RegClassID;
2345  case 256:
2346  return AMDGPU::SGPR_256RegClassID;
2347  case 512:
2348  return AMDGPU::SGPR_512RegClassID;
2349  }
2350  } else if (Is == IS_AGPR) {
2351  switch (RegWidth) {
2352  default: return -1;
2353  case 32:
2354  return AMDGPU::AGPR_32RegClassID;
2355  case 64:
2356  return AMDGPU::AReg_64RegClassID;
2357  case 96:
2358  return AMDGPU::AReg_96RegClassID;
2359  case 128:
2360  return AMDGPU::AReg_128RegClassID;
2361  case 160:
2362  return AMDGPU::AReg_160RegClassID;
2363  case 192:
2364  return AMDGPU::AReg_192RegClassID;
2365  case 224:
2366  return AMDGPU::AReg_224RegClassID;
2367  case 256:
2368  return AMDGPU::AReg_256RegClassID;
2369  case 512:
2370  return AMDGPU::AReg_512RegClassID;
2371  case 1024:
2372  return AMDGPU::AReg_1024RegClassID;
2373  }
2374  }
2375  return -1;
2376 }
2377 
2380  .Case("exec", AMDGPU::EXEC)
2381  .Case("vcc", AMDGPU::VCC)
2382  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2383  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2384  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2385  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2386  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2387  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2388  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2389  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2390  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2391  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2392  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2393  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2394  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2395  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2396  .Case("m0", AMDGPU::M0)
2397  .Case("vccz", AMDGPU::SRC_VCCZ)
2398  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2399  .Case("execz", AMDGPU::SRC_EXECZ)
2400  .Case("src_execz", AMDGPU::SRC_EXECZ)
2401  .Case("scc", AMDGPU::SRC_SCC)
2402  .Case("src_scc", AMDGPU::SRC_SCC)
2403  .Case("tba", AMDGPU::TBA)
2404  .Case("tma", AMDGPU::TMA)
2405  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2406  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2407  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2408  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2409  .Case("vcc_lo", AMDGPU::VCC_LO)
2410  .Case("vcc_hi", AMDGPU::VCC_HI)
2411  .Case("exec_lo", AMDGPU::EXEC_LO)
2412  .Case("exec_hi", AMDGPU::EXEC_HI)
2413  .Case("tma_lo", AMDGPU::TMA_LO)
2414  .Case("tma_hi", AMDGPU::TMA_HI)
2415  .Case("tba_lo", AMDGPU::TBA_LO)
2416  .Case("tba_hi", AMDGPU::TBA_HI)
2417  .Case("pc", AMDGPU::PC_REG)
2418  .Case("null", AMDGPU::SGPR_NULL)
2419  .Default(AMDGPU::NoRegister);
2420 }
2421 
2422 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2423  SMLoc &EndLoc, bool RestoreOnFailure) {
2424  auto R = parseRegister();
2425  if (!R) return true;
2426  assert(R->isReg());
2427  RegNo = R->getReg();
2428  StartLoc = R->getStartLoc();
2429  EndLoc = R->getEndLoc();
2430  return false;
2431 }
2432 
2433 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2434  SMLoc &EndLoc) {
2435  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2436 }
2437 
2438 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2439  SMLoc &StartLoc,
2440  SMLoc &EndLoc) {
2441  bool Result =
2442  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2443  bool PendingErrors = getParser().hasPendingError();
2444  getParser().clearPendingErrors();
2445  if (PendingErrors)
2446  return MatchOperand_ParseFail;
2447  if (Result)
2448  return MatchOperand_NoMatch;
2449  return MatchOperand_Success;
2450 }
2451 
2452 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2453  RegisterKind RegKind, unsigned Reg1,
2454  SMLoc Loc) {
2455  switch (RegKind) {
2456  case IS_SPECIAL:
2457  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2458  Reg = AMDGPU::EXEC;
2459  RegWidth = 64;
2460  return true;
2461  }
2462  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2463  Reg = AMDGPU::FLAT_SCR;
2464  RegWidth = 64;
2465  return true;
2466  }
2467  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2468  Reg = AMDGPU::XNACK_MASK;
2469  RegWidth = 64;
2470  return true;
2471  }
2472  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2473  Reg = AMDGPU::VCC;
2474  RegWidth = 64;
2475  return true;
2476  }
2477  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2478  Reg = AMDGPU::TBA;
2479  RegWidth = 64;
2480  return true;
2481  }
2482  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2483  Reg = AMDGPU::TMA;
2484  RegWidth = 64;
2485  return true;
2486  }
2487  Error(Loc, "register does not fit in the list");
2488  return false;
2489  case IS_VGPR:
2490  case IS_SGPR:
2491  case IS_AGPR:
2492  case IS_TTMP:
2493  if (Reg1 != Reg + RegWidth / 32) {
2494  Error(Loc, "registers in a list must have consecutive indices");
2495  return false;
2496  }
2497  RegWidth += 32;
2498  return true;
2499  default:
2500  llvm_unreachable("unexpected register kind");
2501  }
2502 }
2503 
2504 struct RegInfo {
2506  RegisterKind Kind;
2507 };
2508 
2509 static constexpr RegInfo RegularRegisters[] = {
2510  {{"v"}, IS_VGPR},
2511  {{"s"}, IS_SGPR},
2512  {{"ttmp"}, IS_TTMP},
2513  {{"acc"}, IS_AGPR},
2514  {{"a"}, IS_AGPR},
2515 };
2516 
2517 static bool isRegularReg(RegisterKind Kind) {
2518  return Kind == IS_VGPR ||
2519  Kind == IS_SGPR ||
2520  Kind == IS_TTMP ||
2521  Kind == IS_AGPR;
2522 }
2523 
2524 static const RegInfo* getRegularRegInfo(StringRef Str) {
2525  for (const RegInfo &Reg : RegularRegisters)
2526  if (Str.startswith(Reg.Name))
2527  return &Reg;
2528  return nullptr;
2529 }
2530 
2531 static bool getRegNum(StringRef Str, unsigned& Num) {
2532  return !Str.getAsInteger(10, Num);
2533 }
2534 
2535 bool
2536 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2537  const AsmToken &NextToken) const {
2538 
2539  // A list of consecutive registers: [s0,s1,s2,s3]
2540  if (Token.is(AsmToken::LBrac))
2541  return true;
2542 
2543  if (!Token.is(AsmToken::Identifier))
2544  return false;
2545 
2546  // A single register like s0 or a range of registers like s[0:1]
2547 
2548  StringRef Str = Token.getString();
2549  const RegInfo *Reg = getRegularRegInfo(Str);
2550  if (Reg) {
2551  StringRef RegName = Reg->Name;
2552  StringRef RegSuffix = Str.substr(RegName.size());
2553  if (!RegSuffix.empty()) {
2554  unsigned Num;
2555  // A single register with an index: rXX
2556  if (getRegNum(RegSuffix, Num))
2557  return true;
2558  } else {
2559  // A range of registers: r[XX:YY].
2560  if (NextToken.is(AsmToken::LBrac))
2561  return true;
2562  }
2563  }
2564 
2565  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2566 }
2567 
2568 bool
2569 AMDGPUAsmParser::isRegister()
2570 {
2571  return isRegister(getToken(), peekToken());
2572 }
2573 
2574 unsigned
2575 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2576  unsigned RegNum,
2577  unsigned RegWidth,
2578  SMLoc Loc) {
2579 
2580  assert(isRegularReg(RegKind));
2581 
2582  unsigned AlignSize = 1;
2583  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2584  // SGPR and TTMP registers must be aligned.
2585  // Max required alignment is 4 dwords.
2586  AlignSize = std::min(RegWidth / 32, 4u);
2587  }
2588 
2589  if (RegNum % AlignSize != 0) {
2590  Error(Loc, "invalid register alignment");
2591  return AMDGPU::NoRegister;
2592  }
2593 
2594  unsigned RegIdx = RegNum / AlignSize;
2595  int RCID = getRegClass(RegKind, RegWidth);
2596  if (RCID == -1) {
2597  Error(Loc, "invalid or unsupported register size");
2598  return AMDGPU::NoRegister;
2599  }
2600 
2601  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2602  const MCRegisterClass RC = TRI->getRegClass(RCID);
2603  if (RegIdx >= RC.getNumRegs()) {
2604  Error(Loc, "register index is out of range");
2605  return AMDGPU::NoRegister;
2606  }
2607 
2608  return RC.getRegister(RegIdx);
2609 }
2610 
2611 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2612  int64_t RegLo, RegHi;
2613  if (!skipToken(AsmToken::LBrac, "missing register index"))
2614  return false;
2615 
2616  SMLoc FirstIdxLoc = getLoc();
2617  SMLoc SecondIdxLoc;
2618 
2619  if (!parseExpr(RegLo))
2620  return false;
2621 
2622  if (trySkipToken(AsmToken::Colon)) {
2623  SecondIdxLoc = getLoc();
2624  if (!parseExpr(RegHi))
2625  return false;
2626  } else {
2627  RegHi = RegLo;
2628  }
2629 
2630  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2631  return false;
2632 
2633  if (!isUInt<32>(RegLo)) {
2634  Error(FirstIdxLoc, "invalid register index");
2635  return false;
2636  }
2637 
2638  if (!isUInt<32>(RegHi)) {
2639  Error(SecondIdxLoc, "invalid register index");
2640  return false;
2641  }
2642 
2643  if (RegLo > RegHi) {
2644  Error(FirstIdxLoc, "first register index should not exceed second index");
2645  return false;
2646  }
2647 
2648  Num = static_cast<unsigned>(RegLo);
2649  RegWidth = 32 * ((RegHi - RegLo) + 1);
2650  return true;
2651 }
2652 
2653 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2654  unsigned &RegNum, unsigned &RegWidth,
2655  SmallVectorImpl<AsmToken> &Tokens) {
2656  assert(isToken(AsmToken::Identifier));
2657  unsigned Reg = getSpecialRegForName(getTokenStr());
2658  if (Reg) {
2659  RegNum = 0;
2660  RegWidth = 32;
2661  RegKind = IS_SPECIAL;
2662  Tokens.push_back(getToken());
2663  lex(); // skip register name
2664  }
2665  return Reg;
2666 }
2667 
2668 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2669  unsigned &RegNum, unsigned &RegWidth,
2670  SmallVectorImpl<AsmToken> &Tokens) {
2671  assert(isToken(AsmToken::Identifier));
2672  StringRef RegName = getTokenStr();
2673  auto Loc = getLoc();
2674 
2675  const RegInfo *RI = getRegularRegInfo(RegName);
2676  if (!RI) {
2677  Error(Loc, "invalid register name");
2678  return AMDGPU::NoRegister;
2679  }
2680 
2681  Tokens.push_back(getToken());
2682  lex(); // skip register name
2683 
2684  RegKind = RI->Kind;
2685  StringRef RegSuffix = RegName.substr(RI->Name.size());
2686  if (!RegSuffix.empty()) {
2687  // Single 32-bit register: vXX.
2688  if (!getRegNum(RegSuffix, RegNum)) {
2689  Error(Loc, "invalid register index");
2690  return AMDGPU::NoRegister;
2691  }
2692  RegWidth = 32;
2693  } else {
2694  // Range of registers: v[XX:YY]. ":YY" is optional.
2695  if (!ParseRegRange(RegNum, RegWidth))
2696  return AMDGPU::NoRegister;
2697  }
2698 
2699  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2700 }
2701 
2702 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2703  unsigned &RegWidth,
2704  SmallVectorImpl<AsmToken> &Tokens) {
2705  unsigned Reg = AMDGPU::NoRegister;
2706  auto ListLoc = getLoc();
2707 
2708  if (!skipToken(AsmToken::LBrac,
2709  "expected a register or a list of registers")) {
2710  return AMDGPU::NoRegister;
2711  }
2712 
2713  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2714 
2715  auto Loc = getLoc();
2716  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2717  return AMDGPU::NoRegister;
2718  if (RegWidth != 32) {
2719  Error(Loc, "expected a single 32-bit register");
2720  return AMDGPU::NoRegister;
2721  }
2722 
2723  for (; trySkipToken(AsmToken::Comma); ) {
2724  RegisterKind NextRegKind;
2725  unsigned NextReg, NextRegNum, NextRegWidth;
2726  Loc = getLoc();
2727 
2728  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2729  NextRegNum, NextRegWidth,
2730  Tokens)) {
2731  return AMDGPU::NoRegister;
2732  }
2733  if (NextRegWidth != 32) {
2734  Error(Loc, "expected a single 32-bit register");
2735  return AMDGPU::NoRegister;
2736  }
2737  if (NextRegKind != RegKind) {
2738  Error(Loc, "registers in a list must be of the same kind");
2739  return AMDGPU::NoRegister;
2740  }
2741  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2742  return AMDGPU::NoRegister;
2743  }
2744 
2745  if (!skipToken(AsmToken::RBrac,
2746  "expected a comma or a closing square bracket")) {
2747  return AMDGPU::NoRegister;
2748  }
2749 
2750  if (isRegularReg(RegKind))
2751  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2752 
2753  return Reg;
2754 }
2755 
2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2757  unsigned &RegNum, unsigned &RegWidth,
2758  SmallVectorImpl<AsmToken> &Tokens) {
2759  auto Loc = getLoc();
2760  Reg = AMDGPU::NoRegister;
2761 
2762  if (isToken(AsmToken::Identifier)) {
2763  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2764  if (Reg == AMDGPU::NoRegister)
2765  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2766  } else {
2767  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2768  }
2769 
2770  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2771  if (Reg == AMDGPU::NoRegister) {
2772  assert(Parser.hasPendingError());
2773  return false;
2774  }
2775 
2776  if (!subtargetHasRegister(*TRI, Reg)) {
2777  if (Reg == AMDGPU::SGPR_NULL) {
2778  Error(Loc, "'null' operand is not supported on this GPU");
2779  } else {
2780  Error(Loc, "register not available on this GPU");
2781  }
2782  return false;
2783  }
2784 
2785  return true;
2786 }
2787 
2788 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2789  unsigned &RegNum, unsigned &RegWidth,
2790  bool RestoreOnFailure /*=false*/) {
2791  Reg = AMDGPU::NoRegister;
2792 
2793  SmallVector<AsmToken, 1> Tokens;
2794  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2795  if (RestoreOnFailure) {
2796  while (!Tokens.empty()) {
2797  getLexer().UnLex(Tokens.pop_back_val());
2798  }
2799  }
2800  return true;
2801  }
2802  return false;
2803 }
2804 
2806 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2807  switch (RegKind) {
2808  case IS_VGPR:
2809  return StringRef(".amdgcn.next_free_vgpr");
2810  case IS_SGPR:
2811  return StringRef(".amdgcn.next_free_sgpr");
2812  default:
2813  return None;
2814  }
2815 }
2816 
2817 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2818  auto SymbolName = getGprCountSymbolName(RegKind);
2819  assert(SymbolName && "initializing invalid register kind");
2820  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2821  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2822 }
2823 
2824 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2825  unsigned DwordRegIndex,
2826  unsigned RegWidth) {
2827  // Symbols are only defined for GCN targets
2828  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2829  return true;
2830 
2831  auto SymbolName = getGprCountSymbolName(RegKind);
2832  if (!SymbolName)
2833  return true;
2834  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2835 
2836  int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2837  int64_t OldCount;
2838 
2839  if (!Sym->isVariable())
2840  return !Error(getLoc(),
2841  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2842  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2843  return !Error(
2844  getLoc(),
2845  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2846 
2847  if (OldCount <= NewMax)
2848  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2849 
2850  return true;
2851 }
2852 
2853 std::unique_ptr<AMDGPUOperand>
2854 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2855  const auto &Tok = getToken();
2856  SMLoc StartLoc = Tok.getLoc();
2857  SMLoc EndLoc = Tok.getEndLoc();
2858  RegisterKind RegKind;
2859  unsigned Reg, RegNum, RegWidth;
2860 
2861  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2862  return nullptr;
2863  }
2864  if (isHsaAbiVersion3AndAbove(&getSTI())) {
2865  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2866  return nullptr;
2867  } else
2868  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2869  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2870 }
2871 
2873 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2874  // TODO: add syntactic sugar for 1/(2*PI)
2875 
2876  assert(!isRegister());
2877  assert(!isModifier());
2878 
2879  const auto& Tok = getToken();
2880  const auto& NextTok = peekToken();
2881  bool IsReal = Tok.is(AsmToken::Real);
2882  SMLoc S = getLoc();
2883  bool Negate = false;
2884 
2885  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2886  lex();
2887  IsReal = true;
2888  Negate = true;
2889  }
2890 
2891  if (IsReal) {
2892  // Floating-point expressions are not supported.
2893  // Can only allow floating-point literals with an
2894  // optional sign.
2895 
2896  StringRef Num = getTokenStr();
2897  lex();
2898 
2899  APFloat RealVal(APFloat::IEEEdouble());
2900  auto roundMode = APFloat::rmNearestTiesToEven;
2901  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2902  return MatchOperand_ParseFail;
2903  }
2904  if (Negate)
2905  RealVal.changeSign();
2906 
2907  Operands.push_back(
2908  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2909  AMDGPUOperand::ImmTyNone, true));
2910 
2911  return MatchOperand_Success;
2912 
2913  } else {
2914  int64_t IntVal;
2915  const MCExpr *Expr;
2916  SMLoc S = getLoc();
2917 
2918  if (HasSP3AbsModifier) {
2919  // This is a workaround for handling expressions
2920  // as arguments of SP3 'abs' modifier, for example:
2921  // |1.0|
2922  // |-1|
2923  // |1+x|
2924  // This syntax is not compatible with syntax of standard
2925  // MC expressions (due to the trailing '|').
2926  SMLoc EndLoc;
2927  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2928  return MatchOperand_ParseFail;
2929  } else {
2930  if (Parser.parseExpression(Expr))
2931  return MatchOperand_ParseFail;
2932  }
2933 
2934  if (Expr->evaluateAsAbsolute(IntVal)) {
2935  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2936  } else {
2937  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2938  }
2939 
2940  return MatchOperand_Success;
2941  }
2942 
2943  return MatchOperand_NoMatch;
2944 }
2945 
2947 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2948  if (!isRegister())
2949  return MatchOperand_NoMatch;
2950 
2951  if (auto R = parseRegister()) {
2952  assert(R->isReg());
2953  Operands.push_back(std::move(R));
2954  return MatchOperand_Success;
2955  }
2956  return MatchOperand_ParseFail;
2957 }
2958 
2960 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2961  auto res = parseReg(Operands);
2962  if (res != MatchOperand_NoMatch) {
2963  return res;
2964  } else if (isModifier()) {
2965  return MatchOperand_NoMatch;
2966  } else {
2967  return parseImm(Operands, HasSP3AbsMod);
2968  }
2969 }
2970 
2971 bool
2972 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2973  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2974  const auto &str = Token.getString();
2975  return str == "abs" || str == "neg" || str == "sext";
2976  }
2977  return false;
2978 }
2979 
2980 bool
2981 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2982  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2983 }
2984 
2985 bool
2986 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2987  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2988 }
2989 
2990 bool
2991 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2992  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2993 }
2994 
2995 // Check if this is an operand modifier or an opcode modifier
2996 // which may look like an expression but it is not. We should
2997 // avoid parsing these modifiers as expressions. Currently
2998 // recognized sequences are:
2999 // |...|
3000 // abs(...)
3001 // neg(...)
3002 // sext(...)
3003 // -reg
3004 // -|...|
3005 // -abs(...)
3006 // name:...
3007 // Note that simple opcode modifiers like 'gds' may be parsed as
3008 // expressions; this is a special case. See getExpressionAsToken.
3009 //
3010 bool
3011 AMDGPUAsmParser::isModifier() {
3012 
3013  AsmToken Tok = getToken();
3014  AsmToken NextToken[2];
3015  peekTokens(NextToken);
3016 
3017  return isOperandModifier(Tok, NextToken[0]) ||
3018  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3019  isOpcodeModifierWithVal(Tok, NextToken[0]);
3020 }
3021 
3022 // Check if the current token is an SP3 'neg' modifier.
3023 // Currently this modifier is allowed in the following context:
3024 //
3025 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3026 // 2. Before an 'abs' modifier: -abs(...)
3027 // 3. Before an SP3 'abs' modifier: -|...|
3028 //
3029 // In all other cases "-" is handled as a part
3030 // of an expression that follows the sign.
3031 //
3032 // Note: When "-" is followed by an integer literal,
3033 // this is interpreted as integer negation rather
3034 // than a floating-point NEG modifier applied to N.
3035 // Beside being contr-intuitive, such use of floating-point
3036 // NEG modifier would have resulted in different meaning
3037 // of integer literals used with VOP1/2/C and VOP3,
3038 // for example:
3039 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3040 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3041 // Negative fp literals with preceding "-" are
3042 // handled likewise for uniformity
3043 //
3044 bool
3045 AMDGPUAsmParser::parseSP3NegModifier() {
3046 
3047  AsmToken NextToken[2];
3048  peekTokens(NextToken);
3049 
3050  if (isToken(AsmToken::Minus) &&
3051  (isRegister(NextToken[0], NextToken[1]) ||
3052  NextToken[0].is(AsmToken::Pipe) ||
3053  isId(NextToken[0], "abs"))) {
3054  lex();
3055  return true;
3056  }
3057 
3058  return false;
3059 }
3060 
3062 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3063  bool AllowImm) {
3064  bool Neg, SP3Neg;
3065  bool Abs, SP3Abs;
3066  SMLoc Loc;
3067 
3068  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3069  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3070  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3071  return MatchOperand_ParseFail;
3072  }
3073 
3074  SP3Neg = parseSP3NegModifier();
3075 
3076  Loc = getLoc();
3077  Neg = trySkipId("neg");
3078  if (Neg && SP3Neg) {
3079  Error(Loc, "expected register or immediate");
3080  return MatchOperand_ParseFail;
3081  }
3082  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3083  return MatchOperand_ParseFail;
3084 
3085  Abs = trySkipId("abs");
3086  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3087  return MatchOperand_ParseFail;
3088 
3089  Loc = getLoc();
3090  SP3Abs = trySkipToken(AsmToken::Pipe);
3091  if (Abs && SP3Abs) {
3092  Error(Loc, "expected register or immediate");
3093  return MatchOperand_ParseFail;
3094  }
3095 
3097  if (AllowImm) {
3098  Res = parseRegOrImm(Operands, SP3Abs);
3099  } else {
3100  Res = parseReg(Operands);
3101  }
3102  if (Res != MatchOperand_Success) {
3103  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3104  }
3105 
3106  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3107  return MatchOperand_ParseFail;
3108  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3109  return MatchOperand_ParseFail;
3110  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3111  return MatchOperand_ParseFail;
3112 
3113  AMDGPUOperand::Modifiers Mods;
3114  Mods.Abs = Abs || SP3Abs;
3115  Mods.Neg = Neg || SP3Neg;
3116 
3117  if (Mods.hasFPModifiers()) {
3118  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3119  if (Op.isExpr()) {
3120  Error(Op.getStartLoc(), "expected an absolute expression");
3121  return MatchOperand_ParseFail;
3122  }
3123  Op.setModifiers(Mods);
3124  }
3125  return MatchOperand_Success;
3126 }
3127 
3129 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3130  bool AllowImm) {
3131  bool Sext = trySkipId("sext");
3132  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3133  return MatchOperand_ParseFail;
3134 
3136  if (AllowImm) {
3137  Res = parseRegOrImm(Operands);
3138  } else {
3139  Res = parseReg(Operands);
3140  }
3141  if (Res != MatchOperand_Success) {
3142  return Sext? MatchOperand_ParseFail : Res;
3143  }
3144 
3145  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146  return MatchOperand_ParseFail;
3147 
3148  AMDGPUOperand::Modifiers Mods;
3149  Mods.Sext = Sext;
3150 
3151  if (Mods.hasIntModifiers()) {
3152  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3153  if (Op.isExpr()) {
3154  Error(Op.getStartLoc(), "expected an absolute expression");
3155  return MatchOperand_ParseFail;
3156  }
3157  Op.setModifiers(Mods);
3158  }
3159 
3160  return MatchOperand_Success;
3161 }
3162 
3164 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3165  return parseRegOrImmWithFPInputMods(Operands, false);
3166 }
3167 
3169 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3170  return parseRegOrImmWithIntInputMods(Operands, false);
3171 }
3172 
3173 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3174  auto Loc = getLoc();
3175  if (trySkipId("off")) {
3176  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3177  AMDGPUOperand::ImmTyOff, false));
3178  return MatchOperand_Success;
3179  }
3180 
3181  if (!isRegister())
3182  return MatchOperand_NoMatch;
3183 
3184  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3185  if (Reg) {
3186  Operands.push_back(std::move(Reg));
3187  return MatchOperand_Success;
3188  }
3189 
3190  return MatchOperand_ParseFail;
3191 
3192 }
3193 
3194 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3195  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3196 
3197  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3198  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3199  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3200  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3201  return Match_InvalidOperand;
3202 
3203  if ((TSFlags & SIInstrFlags::VOP3) &&
3204  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3205  getForcedEncodingSize() != 64)
3206  return Match_PreferE32;
3207 
3208  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3209  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3210  // v_mac_f32/16 allow only dst_sel == DWORD;
3211  auto OpNum =
3212  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3213  const auto &Op = Inst.getOperand(OpNum);
3214  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3215  return Match_InvalidOperand;
3216  }
3217  }
3218 
3219  return Match_Success;
3220 }
3221 
3223  static const unsigned Variants[] = {
3226  };
3227 
3228  return makeArrayRef(Variants);
3229 }
3230 
3231 // What asm variants we should check
3232 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3233  if (getForcedEncodingSize() == 32) {
3234  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3235  return makeArrayRef(Variants);
3236  }
3237 
3238  if (isForcedVOP3()) {
3239  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3240  return makeArrayRef(Variants);
3241  }
3242 
3243  if (isForcedSDWA()) {
3244  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3246  return makeArrayRef(Variants);
3247  }
3248 
3249  if (isForcedDPP()) {
3250  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3251  return makeArrayRef(Variants);
3252  }
3253 
3254  return getAllVariants();
3255 }
3256 
3257 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3258  if (getForcedEncodingSize() == 32)
3259  return "e32";
3260 
3261  if (isForcedVOP3())
3262  return "e64";
3263 
3264  if (isForcedSDWA())
3265  return "sdwa";
3266 
3267  if (isForcedDPP())
3268  return "dpp";
3269 
3270  return "";
3271 }
3272 
3273 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3274  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3275  const unsigned Num = Desc.getNumImplicitUses();
3276  for (unsigned i = 0; i < Num; ++i) {
3277  unsigned Reg = Desc.ImplicitUses[i];
3278  switch (Reg) {
3279  case AMDGPU::FLAT_SCR:
3280  case AMDGPU::VCC:
3281  case AMDGPU::VCC_LO:
3282  case AMDGPU::VCC_HI:
3283  case AMDGPU::M0:
3284  return Reg;
3285  default:
3286  break;
3287  }
3288  }
3289  return AMDGPU::NoRegister;
3290 }
3291 
3292 // NB: This code is correct only when used to check constant
3293 // bus limitations because GFX7 support no f16 inline constants.
3294 // Note that there are no cases when a GFX7 opcode violates
3295 // constant bus limitations due to the use of an f16 constant.
3296 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3297  unsigned OpIdx) const {
3298  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3299 
3300  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3301  return false;
3302  }
3303 
3304  const MCOperand &MO = Inst.getOperand(OpIdx);
3305 
3306  int64_t Val = MO.getImm();
3307  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3308 
3309  switch (OpSize) { // expected operand size
3310  case 8:
3311  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3312  case 4:
3313  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3314  case 2: {
3315  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3319  return AMDGPU::isInlinableIntLiteral(Val);
3320 
3325 
3329  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3330 
3331  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3332  }
3333  default:
3334  llvm_unreachable("invalid operand size");
3335  }
3336 }
3337 
3338 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3339  if (!isGFX10Plus())
3340  return 1;
3341 
3342  switch (Opcode) {
3343  // 64-bit shift instructions can use only one scalar value input
3344  case AMDGPU::V_LSHLREV_B64_e64:
3345  case AMDGPU::V_LSHLREV_B64_gfx10:
3346  case AMDGPU::V_LSHRREV_B64_e64:
3347  case AMDGPU::V_LSHRREV_B64_gfx10:
3348  case AMDGPU::V_ASHRREV_I64_e64:
3349  case AMDGPU::V_ASHRREV_I64_gfx10:
3350  case AMDGPU::V_LSHL_B64_e64:
3351  case AMDGPU::V_LSHR_B64_e64:
3352  case AMDGPU::V_ASHR_I64_e64:
3353  return 1;
3354  default:
3355  return 2;
3356  }
3357 }
3358 
3359 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3360  const MCOperand &MO = Inst.getOperand(OpIdx);
3361  if (MO.isImm()) {
3362  return !isInlineConstant(Inst, OpIdx);
3363  } else if (MO.isReg()) {
3364  auto Reg = MO.getReg();
3365  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3366  auto PReg = mc2PseudoReg(Reg);
3367  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3368  } else {
3369  return true;
3370  }
3371 }
3372 
3373 bool
3374 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3375  const OperandVector &Operands) {
3376  const unsigned Opcode = Inst.getOpcode();
3377  const MCInstrDesc &Desc = MII.get(Opcode);
3378  unsigned LastSGPR = AMDGPU::NoRegister;
3379  unsigned ConstantBusUseCount = 0;
3380  unsigned NumLiterals = 0;
3381  unsigned LiteralSize;
3382 
3383  if (Desc.TSFlags &
3387  SIInstrFlags::SDWA)) {
3388  // Check special imm operands (used by madmk, etc)
3389  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3390  ++NumLiterals;
3391  LiteralSize = 4;
3392  }
3393 
3394  SmallDenseSet<unsigned> SGPRsUsed;
3395  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3396  if (SGPRUsed != AMDGPU::NoRegister) {
3397  SGPRsUsed.insert(SGPRUsed);
3398  ++ConstantBusUseCount;
3399  }
3400 
3401  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3402  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3403  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3404 
3405  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3406 
3407  for (int OpIdx : OpIndices) {
3408  if (OpIdx == -1) break;
3409 
3410  const MCOperand &MO = Inst.getOperand(OpIdx);
3411  if (usesConstantBus(Inst, OpIdx)) {
3412  if (MO.isReg()) {
3413  LastSGPR = mc2PseudoReg(MO.getReg());
3414  // Pairs of registers with a partial intersections like these
3415  // s0, s[0:1]
3416  // flat_scratch_lo, flat_scratch
3417  // flat_scratch_lo, flat_scratch_hi
3418  // are theoretically valid but they are disabled anyway.
3419  // Note that this code mimics SIInstrInfo::verifyInstruction
3420  if (!SGPRsUsed.count(LastSGPR)) {
3421  SGPRsUsed.insert(LastSGPR);
3422  ++ConstantBusUseCount;
3423  }
3424  } else { // Expression or a literal
3425 
3426  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3427  continue; // special operand like VINTERP attr_chan
3428 
3429  // An instruction may use only one literal.
3430  // This has been validated on the previous step.
3431  // See validateVOPLiteral.
3432  // This literal may be used as more than one operand.
3433  // If all these operands are of the same size,
3434  // this literal counts as one scalar value.
3435  // Otherwise it counts as 2 scalar values.
3436  // See "GFX10 Shader Programming", section 3.6.2.3.
3437 
3438  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3439  if (Size < 4) Size = 4;
3440 
3441  if (NumLiterals == 0) {
3442  NumLiterals = 1;
3443  LiteralSize = Size;
3444  } else if (LiteralSize != Size) {
3445  NumLiterals = 2;
3446  }
3447  }
3448  }
3449  }
3450  }
3451  ConstantBusUseCount += NumLiterals;
3452 
3453  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3454  return true;
3455 
3456  SMLoc LitLoc = getLitLoc(Operands);
3457  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3458  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3459  Error(Loc, "invalid operand (violates constant bus restrictions)");
3460  return false;
3461 }
3462 
3463 bool
3464 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3465  const OperandVector &Operands) {
3466  const unsigned Opcode = Inst.getOpcode();
3467  const MCInstrDesc &Desc = MII.get(Opcode);
3468 
3469  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3470  if (DstIdx == -1 ||
3471  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3472  return true;
3473  }
3474 
3475  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3476 
3477  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3478  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3479  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3480 
3481  assert(DstIdx != -1);
3482  const MCOperand &Dst = Inst.getOperand(DstIdx);
3483  assert(Dst.isReg());
3484 
3485  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3486 
3487  for (int SrcIdx : SrcIndices) {
3488  if (SrcIdx == -1) break;
3489  const MCOperand &Src = Inst.getOperand(SrcIdx);
3490  if (Src.isReg()) {
3491  if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3492  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3493  Error(getRegLoc(SrcReg, Operands),
3494  "destination must be different than all sources");
3495  return false;
3496  }
3497  }
3498  }
3499 
3500  return true;
3501 }
3502 
3503 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3504 
3505  const unsigned Opc = Inst.getOpcode();
3506  const MCInstrDesc &Desc = MII.get(Opc);
3507 
3508  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3510  assert(ClampIdx != -1);
3511  return Inst.getOperand(ClampIdx).getImm() == 0;
3512  }
3513 
3514  return true;
3515 }
3516 
3517 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3518 
3519  const unsigned Opc = Inst.getOpcode();
3520  const MCInstrDesc &Desc = MII.get(Opc);
3521 
3522  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3523  return None;
3524 
3525  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3526  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3527  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3528 
3529  assert(VDataIdx != -1);
3530 
3531  if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3532  return None;
3533 
3534  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3535  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3536  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3537  if (DMask == 0)
3538  DMask = 1;
3539 
3540  bool isPackedD16 = false;
3541  unsigned DataSize =
3542  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3543  if (hasPackedD16()) {
3544  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3545  isPackedD16 = D16Idx >= 0;
3546  if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3547  DataSize = (DataSize + 1) / 2;
3548  }
3549 
3550  if ((VDataSize / 4) == DataSize + TFESize)
3551  return None;
3552 
3553  return StringRef(isPackedD16
3554  ? "image data size does not match dmask, d16 and tfe"
3555  : "image data size does not match dmask and tfe");
3556 }
3557 
3558 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3559  const unsigned Opc = Inst.getOpcode();
3560  const MCInstrDesc &Desc = MII.get(Opc);
3561 
3562  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3563  return true;
3564 
3565  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3566 
3567  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3568  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3569  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3570  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3571  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3572  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3573 
3574  assert(VAddr0Idx != -1);
3575  assert(SrsrcIdx != -1);
3576  assert(SrsrcIdx > VAddr0Idx);
3577 
3578  if (DimIdx == -1)
3579  return true; // intersect_ray
3580 
3581  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3582  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3583  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3584  unsigned ActualAddrSize =
3585  IsNSA ? SrsrcIdx - VAddr0Idx
3586  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3587  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3588 
3589  unsigned ExpectedAddrSize =
3590  AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3591 
3592  if (!IsNSA) {
3593  if (ExpectedAddrSize > 8)
3594  ExpectedAddrSize = 16;
3595 
3596  // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3597  // This provides backward compatibility for assembly created
3598  // before 160b/192b/224b types were directly supported.
3599  if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3600  return true;
3601  }
3602 
3603  return ActualAddrSize == ExpectedAddrSize;
3604 }
3605 
3606 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3607 
3608  const unsigned Opc = Inst.getOpcode();
3609  const MCInstrDesc &Desc = MII.get(Opc);
3610 
3611  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3612  return true;
3613  if (!Desc.mayLoad() || !Desc.mayStore())
3614  return true; // Not atomic
3615 
3616  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3617  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3618 
3619  // This is an incomplete check because image_atomic_cmpswap
3620  // may only use 0x3 and 0xf while other atomic operations
3621  // may use 0x1 and 0x3. However these limitations are
3622  // verified when we check that dmask matches dst size.
3623  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3624 }
3625 
3626 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3627 
3628  const unsigned Opc = Inst.getOpcode();
3629  const MCInstrDesc &Desc = MII.get(Opc);
3630 
3631  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3632  return true;
3633 
3634  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3635  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3636 
3637  // GATHER4 instructions use dmask in a different fashion compared to
3638  // other MIMG instructions. The only useful DMASK values are
3639  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3640  // (red,red,red,red) etc.) The ISA document doesn't mention
3641  // this.
3642  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3643 }
3644 
3645 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3646  const unsigned Opc = Inst.getOpcode();
3647  const MCInstrDesc &Desc = MII.get(Opc);
3648 
3649  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3650  return true;
3651 
3652  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3653  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3654  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3655 
3656  if (!BaseOpcode->MSAA)
3657  return true;
3658 
3659  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3660  assert(DimIdx != -1);
3661 
3662  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3663  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3664 
3665  return DimInfo->MSAA;
3666 }
3667 
3668 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3669 {
3670  switch (Opcode) {
3671  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3672  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3673  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3674  return true;
3675  default:
3676  return false;
3677  }
3678 }
3679 
3680 // movrels* opcodes should only allow VGPRS as src0.
3681 // This is specified in .td description for vop1/vop3,
3682 // but sdwa is handled differently. See isSDWAOperand.
3683 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3684  const OperandVector &Operands) {
3685 
3686  const unsigned Opc = Inst.getOpcode();
3687  const MCInstrDesc &Desc = MII.get(Opc);
3688 
3689  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3690  return true;
3691 
3692  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3693  assert(Src0Idx != -1);
3694 
3695  SMLoc ErrLoc;
3696  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3697  if (Src0.isReg()) {
3698  auto Reg = mc2PseudoReg(Src0.getReg());
3699  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3700  if (!isSGPR(Reg, TRI))
3701  return true;
3702  ErrLoc = getRegLoc(Reg, Operands);
3703  } else {
3704  ErrLoc = getConstLoc(Operands);
3705  }
3706 
3707  Error(ErrLoc, "source operand must be a VGPR");
3708  return false;
3709 }
3710 
3711 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3712  const OperandVector &Operands) {
3713 
3714  const unsigned Opc = Inst.getOpcode();
3715 
3716  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3717  return true;
3718 
3719  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3720  assert(Src0Idx != -1);
3721 
3722  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3723  if (!Src0.isReg())
3724  return true;
3725 
3726  auto Reg = mc2PseudoReg(Src0.getReg());
3727  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3728  if (!isGFX90A() && isSGPR(Reg, TRI)) {
3729  Error(getRegLoc(Reg, Operands),
3730  "source operand must be either a VGPR or an inline constant");
3731  return false;
3732  }
3733 
3734  return true;
3735 }
3736 
3737 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3738  const OperandVector &Operands) {
3739  const unsigned Opc = Inst.getOpcode();
3740  const MCInstrDesc &Desc = MII.get(Opc);
3741 
3742  if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3743  return true;
3744 
3745  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3746  if (Src2Idx == -1)
3747  return true;
3748 
3749  const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3750  if (!Src2.isReg())
3751  return true;
3752 
3753  MCRegister Src2Reg = Src2.getReg();
3754  MCRegister DstReg = Inst.getOperand(0).getReg();
3755  if (Src2Reg == DstReg)
3756  return true;
3757 
3758  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3759  if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3760  return true;
3761 
3762  if (TRI->regsOverlap(Src2Reg, DstReg)) {
3763  Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3764  "source 2 operand must not partially overlap with dst");
3765  return false;
3766  }
3767 
3768  return true;
3769 }
3770 
3771 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3772  switch (Inst.getOpcode()) {
3773  default:
3774  return true;
3775  case V_DIV_SCALE_F32_gfx6_gfx7:
3776  case V_DIV_SCALE_F32_vi:
3777  case V_DIV_SCALE_F32_gfx10:
3778  case V_DIV_SCALE_F64_gfx6_gfx7:
3779  case V_DIV_SCALE_F64_vi:
3780  case V_DIV_SCALE_F64_gfx10:
3781  break;
3782  }
3783 
3784  // TODO: Check that src0 = src1 or src2.
3785 
3786  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3787  AMDGPU::OpName::src2_modifiers,
3788  AMDGPU::OpName::src2_modifiers}) {
3790  .getImm() &
3791  SISrcMods::ABS) {
3792  return false;
3793  }
3794  }
3795 
3796  return true;
3797 }
3798 
3799 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3800 
3801  const unsigned Opc = Inst.getOpcode();
3802  const MCInstrDesc &Desc = MII.get(Opc);
3803 
3804  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3805  return true;
3806 
3807  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3808  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3809  if (isCI() || isSI())
3810  return false;
3811  }
3812 
3813  return true;
3814 }
3815 
3816 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3817  const unsigned Opc = Inst.getOpcode();
3818  const MCInstrDesc &Desc = MII.get(Opc);
3819 
3820  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3821  return true;
3822 
3823  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3824  if (DimIdx < 0)
3825  return true;
3826 
3827  long Imm = Inst.getOperand(DimIdx).getImm();
3828  if (Imm < 0 || Imm >= 8)
3829  return false;
3830 
3831  return true;
3832 }
3833 
3834 static bool IsRevOpcode(const unsigned Opcode)
3835 {
3836  switch (Opcode) {
3837  case AMDGPU::V_SUBREV_F32_e32:
3838  case AMDGPU::V_SUBREV_F32_e64:
3839  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3840  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3841  case AMDGPU::V_SUBREV_F32_e32_vi:
3842  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3843  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3844  case AMDGPU::V_SUBREV_F32_e64_vi:
3845 
3846  case AMDGPU::V_SUBREV_CO_U32_e32:
3847  case AMDGPU::V_SUBREV_CO_U32_e64:
3848  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3849  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3850 
3851  case AMDGPU::V_SUBBREV_U32_e32:
3852  case AMDGPU::V_SUBBREV_U32_e64:
3853  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3854  case AMDGPU::V_SUBBREV_U32_e32_vi:
3855  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3856  case AMDGPU::V_SUBBREV_U32_e64_vi:
3857 
3858  case AMDGPU::V_SUBREV_U32_e32:
3859  case AMDGPU::V_SUBREV_U32_e64:
3860  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3861  case AMDGPU::V_SUBREV_U32_e32_vi:
3862  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3863  case AMDGPU::V_SUBREV_U32_e64_vi:
3864 
3865  case AMDGPU::V_SUBREV_F16_e32:
3866  case AMDGPU::V_SUBREV_F16_e64:
3867  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3868  case AMDGPU::V_SUBREV_F16_e32_vi:
3869  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3870  case AMDGPU::V_SUBREV_F16_e64_vi:
3871 
3872  case AMDGPU::V_SUBREV_U16_e32:
3873  case AMDGPU::V_SUBREV_U16_e64:
3874  case AMDGPU::V_SUBREV_U16_e32_vi:
3875  case AMDGPU::V_SUBREV_U16_e64_vi:
3876 
3877  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3878  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3879  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3880 
3881  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3882  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3883 
3884  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3885  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3886 
3887  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3888  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3889 
3890  case AMDGPU::V_LSHRREV_B32_e32:
3891  case AMDGPU::V_LSHRREV_B32_e64:
3892  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3893  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3894  case AMDGPU::V_LSHRREV_B32_e32_vi:
3895  case AMDGPU::V_LSHRREV_B32_e64_vi:
3896  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3897  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3898 
3899  case AMDGPU::V_ASHRREV_I32_e32:
3900  case AMDGPU::V_ASHRREV_I32_e64:
3901  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3902  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3903  case AMDGPU::V_ASHRREV_I32_e32_vi:
3904  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3905  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3906  case AMDGPU::V_ASHRREV_I32_e64_vi:
3907 
3908  case AMDGPU::V_LSHLREV_B32_e32:
3909  case AMDGPU::V_LSHLREV_B32_e64:
3910  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3911  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3912  case AMDGPU::V_LSHLREV_B32_e32_vi:
3913  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3914  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3915  case AMDGPU::V_LSHLREV_B32_e64_vi:
3916 
3917  case AMDGPU::V_LSHLREV_B16_e32:
3918  case AMDGPU::V_LSHLREV_B16_e64:
3919  case AMDGPU::V_LSHLREV_B16_e32_vi:
3920  case AMDGPU::V_LSHLREV_B16_e64_vi:
3921  case AMDGPU::V_LSHLREV_B16_gfx10:
3922 
3923  case AMDGPU::V_LSHRREV_B16_e32:
3924  case AMDGPU::V_LSHRREV_B16_e64:
3925  case AMDGPU::V_LSHRREV_B16_e32_vi:
3926  case AMDGPU::V_LSHRREV_B16_e64_vi:
3927  case AMDGPU::V_LSHRREV_B16_gfx10:
3928 
3929  case AMDGPU::V_ASHRREV_I16_e32:
3930  case AMDGPU::V_ASHRREV_I16_e64:
3931  case AMDGPU::V_ASHRREV_I16_e32_vi:
3932  case AMDGPU::V_ASHRREV_I16_e64_vi:
3933  case AMDGPU::V_ASHRREV_I16_gfx10:
3934 
3935  case AMDGPU::V_LSHLREV_B64_e64:
3936  case AMDGPU::V_LSHLREV_B64_gfx10:
3937  case AMDGPU::V_LSHLREV_B64_vi:
3938 
3939  case AMDGPU::V_LSHRREV_B64_e64:
3940  case AMDGPU::V_LSHRREV_B64_gfx10:
3941  case AMDGPU::V_LSHRREV_B64_vi:
3942 
3943  case AMDGPU::V_ASHRREV_I64_e64:
3944  case AMDGPU::V_ASHRREV_I64_gfx10:
3945  case AMDGPU::V_ASHRREV_I64_vi:
3946 
3947  case AMDGPU::V_PK_LSHLREV_B16:
3948  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3949  case AMDGPU::V_PK_LSHLREV_B16_vi:
3950 
3951  case AMDGPU::V_PK_LSHRREV_B16:
3952  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3953  case AMDGPU::V_PK_LSHRREV_B16_vi:
3954  case AMDGPU::V_PK_ASHRREV_I16:
3955  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3956  case AMDGPU::V_PK_ASHRREV_I16_vi:
3957  return true;
3958  default:
3959  return false;
3960  }
3961 }
3962 
3963 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3964 
3965  using namespace SIInstrFlags;
3966  const unsigned Opcode = Inst.getOpcode();
3967  const MCInstrDesc &Desc = MII.get(Opcode);
3968 
3969  // lds_direct register is defined so that it can be used
3970  // with 9-bit operands only. Ignore encodings which do not accept these.
3971  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3972  if ((Desc.TSFlags & Enc) == 0)
3973  return None;
3974 
3975  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3976  auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3977  if (SrcIdx == -1)
3978  break;
3979  const auto &Src = Inst.getOperand(SrcIdx);
3980  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3981 
3982  if (isGFX90A() || isGFX11Plus())
3983  return StringRef("lds_direct is not supported on this GPU");
3984 
3985  if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3986  return StringRef("lds_direct cannot be used with this instruction");
3987 
3988  if (SrcName != OpName::src0)
3989  return StringRef("lds_direct may be used as src0 only");
3990  }
3991  }
3992 
3993  return None;
3994 }
3995 
3996 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3997  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3998  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3999  if (Op.isFlatOffset())
4000  return Op.getStartLoc();
4001  }
4002  return getLoc();
4003 }
4004 
4005 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4006  const OperandVector &Operands) {
4007  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4008  if ((TSFlags & SIInstrFlags::FLAT) == 0)
4009  return true;
4010 
4011  auto Opcode = Inst.getOpcode();
4012  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4013  assert(OpNum != -1);
4014 
4015  const auto &Op = Inst.getOperand(OpNum);
4016  if (!hasFlatOffsets() && Op.getImm() != 0) {
4017  Error(getFlatOffsetLoc(Operands),
4018  "flat offset modifier is not supported on this GPU");
4019  return false;
4020  }
4021 
4022  // For FLAT segment the offset must be positive;
4023  // MSB is ignored and forced to zero.
4025  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4026  if (!isIntN(OffsetSize, Op.getImm())) {
4027  Error(getFlatOffsetLoc(Operands),
4028  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4029  return false;
4030  }
4031  } else {
4032  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4033  if (!isUIntN(OffsetSize, Op.getImm())) {
4034  Error(getFlatOffsetLoc(Operands),
4035  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4036  return false;
4037  }
4038  }
4039 
4040  return true;
4041 }
4042 
4043 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4044  // Start with second operand because SMEM Offset cannot be dst or src0.
4045  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4046  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4047  if (Op.isSMEMOffset())
4048  return Op.getStartLoc();
4049  }
4050  return getLoc();
4051 }
4052 
4053 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4054  const OperandVector &Operands) {
4055  if (isCI() || isSI())
4056  return true;
4057 
4058  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4059  if ((TSFlags & SIInstrFlags::SMRD) == 0)
4060  return true;
4061 
4062  auto Opcode = Inst.getOpcode();
4063  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4064  if (OpNum == -1)
4065  return true;
4066 
4067  const auto &Op = Inst.getOperand(OpNum);
4068  if (!Op.isImm())
4069  return true;
4070 
4071  uint64_t Offset = Op.getImm();
4072  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4073  if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4074  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4075  return true;
4076 
4077  Error(getSMEMOffsetLoc(Operands),
4078  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4079  "expected a 21-bit signed offset");
4080 
4081  return false;
4082 }
4083 
4084 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4085  unsigned Opcode = Inst.getOpcode();
4086  const MCInstrDesc &Desc = MII.get(Opcode);
4087  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4088  return true;
4089 
4090  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4091  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4092 
4093  const int OpIndices[] = { Src0Idx, Src1Idx };
4094 
4095  unsigned NumExprs = 0;
4096  unsigned NumLiterals = 0;
4098 
4099  for (int OpIdx : OpIndices) {
4100  if (OpIdx == -1) break;
4101 
4102  const MCOperand &MO = Inst.getOperand(OpIdx);
4103  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4104  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4105  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4106  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4107  if (NumLiterals == 0 || LiteralValue != Value) {
4108  LiteralValue = Value;
4109  ++NumLiterals;
4110  }
4111  } else if (MO.isExpr()) {
4112  ++NumExprs;
4113  }
4114  }
4115  }
4116 
4117  return NumLiterals + NumExprs <= 1;
4118 }
4119 
4120 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4121  const unsigned Opc = Inst.getOpcode();
4122  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4123  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4124  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4125  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4126 
4127  if (OpSel & ~3)
4128  return false;
4129  }
4130 
4131  if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4132  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4133  if (OpSelIdx != -1) {
4134  if (Inst.getOperand(OpSelIdx).getImm() != 0)
4135  return false;
4136  }
4137  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4138  if (OpSelHiIdx != -1) {
4139  if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4140  return false;
4141  }
4142  }
4143 
4144  return true;
4145 }
4146 
4147 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4148  const OperandVector &Operands) {
4149  const unsigned Opc = Inst.getOpcode();
4150  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4151  if (DppCtrlIdx < 0)
4152  return true;
4153  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4154 
4156  // DPP64 is supported for row_newbcast only.
4157  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4158  if (Src0Idx >= 0 &&
4159  getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4160  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4161  Error(S, "64 bit dpp only supports row_newbcast");
4162  return false;
4163  }
4164  }
4165 
4166  return true;
4167 }
4168 
4169 // Check if VCC register matches wavefront size
4170 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4171  auto FB = getFeatureBits();
4172  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4173  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4174 }
4175 
4176 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4177 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4178  const OperandVector &Operands) {
4179  unsigned Opcode = Inst.getOpcode();
4180  const MCInstrDesc &Desc = MII.get(Opcode);
4181  const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4182  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4183  ImmIdx == -1)
4184  return true;
4185 
4186  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4187  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4188  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4189 
4190  const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4191 
4192  unsigned NumExprs = 0;
4193  unsigned NumLiterals = 0;
4195 
4196  for (int OpIdx : OpIndices) {
4197  if (OpIdx == -1)
4198  continue;
4199 
4200  const MCOperand &MO = Inst.getOperand(OpIdx);
4201  if (!MO.isImm() && !MO.isExpr())
4202  continue;
4203  if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4204  continue;
4205 
4206  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4207  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4208  Error(getConstLoc(Operands),
4209  "inline constants are not allowed for this operand");
4210  return false;
4211  }
4212 
4213  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4214  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4215  if (NumLiterals == 0 || LiteralValue != Value) {
4216  LiteralValue = Value;
4217  ++NumLiterals;
4218  }
4219  } else if (MO.isExpr()) {
4220  ++NumExprs;
4221  }
4222  }
4223  NumLiterals += NumExprs;
4224 
4225  if (!NumLiterals)
4226  return true;
4227 
4228  if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4229  Error(getLitLoc(Operands), "literal operands are not supported");
4230  return false;
4231  }
4232 
4233  if (NumLiterals > 1) {
4234  Error(getLitLoc(Operands), "only one literal operand is allowed");
4235  return false;
4236  }
4237 
4238  return true;
4239 }
4240 
4241 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4242 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4243  const MCRegisterInfo *MRI) {
4244  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4245  if (OpIdx < 0)
4246  return -1;
4247 
4248  const MCOperand &Op = Inst.getOperand(OpIdx);
4249  if (!Op.isReg())
4250  return -1;
4251 
4252  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4253  auto Reg = Sub ? Sub : Op.getReg();
4254  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4255  return AGPR32.contains(Reg) ? 1 : 0;
4256 }
4257 
4258 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4259  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4260  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4262  SIInstrFlags::DS)) == 0)
4263  return true;
4264 
4265  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4266  : AMDGPU::OpName::vdata;
4267 
4268  const MCRegisterInfo *MRI = getMRI();
4269  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4270  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4271 
4272  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4273  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4274  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4275  return false;
4276  }
4277 
4278  auto FB = getFeatureBits();
4279  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4280  if (DataAreg < 0 || DstAreg < 0)
4281  return true;
4282  return DstAreg == DataAreg;
4283  }
4284 
4285  return DstAreg < 1 && DataAreg < 1;
4286 }
4287 
4288 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4289  auto FB = getFeatureBits();
4290  if (!FB[AMDGPU::FeatureGFX90AInsts])
4291  return true;
4292 
4293  const MCRegisterInfo *MRI = getMRI();
4294  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4295  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4296  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4297  const MCOperand &Op = Inst.getOperand(I);
4298  if (!Op.isReg())
4299  continue;
4300 
4301  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4302  if (!Sub)
4303  continue;
4304 
4305  if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4306  return false;
4307  if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4308  return false;
4309  }
4310 
4311  return true;
4312 }
4313 
4314 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4315  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4316  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4317  if (Op.isBLGP())
4318  return Op.getStartLoc();
4319  }
4320  return SMLoc();
4321 }
4322 
4323 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4324  const OperandVector &Operands) {
4325  unsigned Opc = Inst.getOpcode();
4326  int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4327  if (BlgpIdx == -1)
4328  return true;
4329  SMLoc BLGPLoc = getBLGPLoc(Operands);
4330  if (!BLGPLoc.isValid())
4331  return true;
4332  bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4333  auto FB = getFeatureBits();
4334  bool UsesNeg = false;
4335  if (FB[AMDGPU::FeatureGFX940Insts]) {
4336  switch (Opc) {
4337  case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4338  case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4339  case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4340  case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4341  UsesNeg = true;
4342  }
4343  }
4344 
4345  if (IsNeg == UsesNeg)
4346  return true;
4347 
4348  Error(BLGPLoc,
4349  UsesNeg ? "invalid modifier: blgp is not supported"
4350  : "invalid modifier: neg is not supported");
4351 
4352  return false;
4353 }
4354 
4355 // gfx90a has an undocumented limitation:
4356 // DS_GWS opcodes must use even aligned registers.
4357 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4358  const OperandVector &Operands) {
4359  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4360  return true;
4361 
4362  int Opc = Inst.getOpcode();
4363  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4364  Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4365  return true;
4366 
4367  const MCRegisterInfo *MRI = getMRI();
4368  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4369  int Data0Pos =
4370  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4371  assert(Data0Pos != -1);
4372  auto Reg = Inst.getOperand(Data0Pos).getReg();
4373  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4374  if (RegIdx & 1) {
4375  SMLoc RegLoc = getRegLoc(Reg, Operands);
4376  Error(RegLoc, "vgpr must be even aligned");
4377  return false;
4378  }
4379 
4380  return true;
4381 }
4382 
4383 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4384  const OperandVector &Operands,
4385  const SMLoc &IDLoc) {
4386  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4387  AMDGPU::OpName::cpol);
4388  if (CPolPos == -1)
4389  return true;
4390 
4391  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4392 
4393  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4394  if (TSFlags & SIInstrFlags::SMRD) {
4395  if (CPol && (isSI() || isCI())) {
4396  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4397  Error(S, "cache policy is not supported for SMRD instructions");
4398  return false;
4399  }
4401  Error(IDLoc, "invalid cache policy for SMEM instruction");
4402  return false;
4403  }
4404  }
4405 
4406  if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4407  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4408  StringRef CStr(S.getPointer());
4409  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4410  Error(S, "scc is not supported on this GPU");
4411  return false;
4412  }
4413 
4415  return true;
4416 
4417  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4418  if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4419  Error(IDLoc, isGFX940() ? "instruction must use sc0"
4420  : "instruction must use glc");
4421  return false;
4422  }
4423  } else {
4424  if (CPol & CPol::GLC) {
4425  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4426  StringRef CStr(S.getPointer());
4428  &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4429  Error(S, isGFX940() ? "instruction must not use sc0"
4430  : "instruction must not use glc");
4431  return false;
4432  }
4433  }
4434 
4435  return true;
4436 }
4437 
4438 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4439  const OperandVector &Operands,
4440  const SMLoc &IDLoc) {
4441  if (isGFX940())
4442  return true;
4443 
4444  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4445  if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4447  return true;
4448  // This is FLAT LDS DMA.
4449 
4450  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4451  StringRef CStr(S.getPointer());
4452  if (!CStr.startswith("lds")) {
4453  // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4454  // And LDS version should have 'lds' modifier, but it follows optional
4455  // operands so its absense is ignored by the matcher.
4456  Error(IDLoc, "invalid operands for instruction");
4457  return false;
4458  }
4459 
4460  return true;
4461 }
4462 
4463 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4464  const SMLoc &IDLoc,
4465  const OperandVector &Operands) {
4466  if (auto ErrMsg = validateLdsDirect(Inst)) {
4467  Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4468  return false;
4469  }
4470  if (!validateSOPLiteral(Inst)) {
4471  Error(getLitLoc(Operands),
4472  "only one literal operand is allowed");
4473  return false;
4474  }
4475  if (!validateVOPLiteral(Inst, Operands)) {
4476  return false;
4477  }
4478  if (!validateConstantBusLimitations(Inst, Operands)) {
4479  return false;
4480  }
4481  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4482  return false;
4483  }
4484  if (!validateIntClampSupported(Inst)) {
4485  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4486  "integer clamping is not supported on this GPU");
4487  return false;
4488  }
4489  if (!validateOpSel(Inst)) {
4490  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4491  "invalid op_sel operand");
4492  return false;
4493  }
4494  if (!validateDPP(Inst, Operands)) {
4495  return false;
4496  }
4497  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4498  if (!validateMIMGD16(Inst)) {
4499  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4500  "d16 modifier is not supported on this GPU");
4501  return false;
4502  }
4503  if (!validateMIMGDim(Inst)) {
4504  Error(IDLoc, "dim modifier is required on this GPU");
4505  return false;
4506  }
4507  if (!validateMIMGMSAA(Inst)) {
4508  Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4509  "invalid dim; must be MSAA type");
4510  return false;
4511  }
4512  if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4513  Error(IDLoc, *ErrMsg);
4514  return false;
4515  }
4516  if (!validateMIMGAddrSize(Inst)) {
4517  Error(IDLoc,
4518  "image address size does not match dim and a16");
4519  return false;
4520  }
4521  if (!validateMIMGAtomicDMask(Inst)) {
4522  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4523  "invalid atomic image dmask");
4524  return false;
4525  }
4526  if (!validateMIMGGatherDMask(Inst)) {
4527  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4528  "invalid image_gather dmask: only one bit must be set");
4529  return false;
4530  }
4531  if (!validateMovrels(Inst, Operands)) {
4532  return false;
4533  }
4534  if (!validateFlatOffset(Inst, Operands)) {
4535  return false;
4536  }
4537  if (!validateSMEMOffset(Inst, Operands)) {
4538  return false;
4539  }
4540  if (!validateMAIAccWrite(Inst, Operands)) {
4541  return false;
4542  }
4543  if (!validateMFMA(Inst, Operands)) {
4544  return false;
4545  }
4546  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4547  return false;
4548  }
4549 
4550  if (!validateAGPRLdSt(Inst)) {
4551  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4552  ? "invalid register class: data and dst should be all VGPR or AGPR"
4553  : "invalid register class: agpr loads and stores not supported on this GPU"
4554  );
4555  return false;
4556  }
4557  if (!validateVGPRAlign(Inst)) {
4558  Error(IDLoc,
4559  "invalid register class: vgpr tuples must be 64 bit aligned");
4560  return false;
4561  }
4562  if (!validateGWS(Inst, Operands)) {
4563  return false;
4564  }
4565 
4566  if (!validateBLGP(Inst, Operands)) {
4567  return false;
4568  }
4569 
4570  if (!validateDivScale(Inst)) {
4571  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4572  return false;
4573  }
4574  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4575  return false;
4576  }
4577 
4578  if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4579  return false;
4580  }
4581 
4582  return true;
4583 }
4584 
4585 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4586  const FeatureBitset &FBS,
4587  unsigned VariantID = 0);
4588 
4589 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4590  const FeatureBitset &AvailableFeatures,
4591  unsigned VariantID);
4592 
4593 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4594  const FeatureBitset &FBS) {
4595  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4596 }
4597 
4598 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4599  const FeatureBitset &FBS,
4600  ArrayRef<unsigned> Variants) {
4601  for (auto Variant : Variants) {
4602  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4603  return true;
4604  }
4605 
4606  return false;
4607 }
4608 
4609 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4610  const SMLoc &IDLoc) {
4611  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4612 
4613  // Check if requested instruction variant is supported.
4614  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4615  return false;
4616 
4617  // This instruction is not supported.
4618  // Clear any other pending errors because they are no longer relevant.
4619  getParser().clearPendingErrors();
4620 
4621  // Requested instruction variant is not supported.
4622  // Check if any other variants are supported.
4623  StringRef VariantName = getMatchedVariantName();
4624  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4625  return Error(IDLoc,
4626  Twine(VariantName,
4627  " variant of this instruction is not supported"));
4628  }
4629 
4630  // Finally check if this instruction is supported on any other GPU.
4631  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4632  return Error(IDLoc, "instruction not supported on this GPU");
4633  }
4634 
4635  // Instruction not supported on any GPU. Probably a typo.
4636  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4637  return Error(IDLoc, "invalid instruction" + Suggestion);
4638 }
4639 
4640 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4642  MCStreamer &Out,
4644  bool MatchingInlineAsm) {
4645  MCInst Inst;
4646  unsigned Result = Match_Success;
4647  for (auto Variant : getMatchedVariants()) {
4648  uint64_t EI;
4649  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4650  Variant);
4651  // We order match statuses from least to most specific. We use most specific
4652  // status as resulting
4653  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4654  if ((R == Match_Success) ||
4655  (R == Match_PreferE32) ||
4656  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4657  (R == Match_InvalidOperand && Result != Match_MissingFeature
4658  && Result != Match_PreferE32) ||
4659  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4660  && Result != Match_MissingFeature
4661  && Result != Match_PreferE32)) {
4662  Result = R;
4663  ErrorInfo = EI;
4664  }
4665  if (R == Match_Success)
4666  break;
4667  }
4668 
4669  if (Result == Match_Success) {
4670  if (!validateInstruction(Inst, IDLoc, Operands)) {
4671  return true;
4672  }
4673  Inst.setLoc(IDLoc);
4674  Out.emitInstruction(Inst, getSTI());
4675  return false;
4676  }
4677 
4678  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4679  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4680  return true;
4681  }
4682 
4683  switch (Result) {
4684  default: break;
4685  case Match_MissingFeature:
4686  // It has been verified that the specified instruction
4687  // mnemonic is valid. A match was found but it requires
4688  // features which are not supported on this GPU.
4689  return Error(IDLoc, "operands are not valid for this GPU or mode");
4690 
4691  case Match_InvalidOperand: {
4692  SMLoc ErrorLoc = IDLoc;
4693  if (ErrorInfo != ~0ULL) {
4694  if (ErrorInfo >= Operands.size()) {
4695  return Error(IDLoc, "too few operands for instruction");
4696  }
4697  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4698  if (ErrorLoc == SMLoc())
4699  ErrorLoc = IDLoc;
4700  }
4701  return Error(ErrorLoc, "invalid operand for instruction");
4702  }
4703 
4704  case Match_PreferE32:
4705  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4706  "should be encoded as e32");
4707  case Match_MnemonicFail:
4708  llvm_unreachable("Invalid instructions should have been handled already");
4709  }
4710  llvm_unreachable("Implement any new match types added!");
4711 }
4712 
4713 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4714  int64_t Tmp = -1;
4715  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4716  return true;
4717  }
4718  if (getParser().parseAbsoluteExpression(Tmp)) {
4719  return true;
4720  }
4721  Ret = static_cast<uint32_t>(Tmp);
4722  return false;
4723 }
4724 
4725 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4726  uint32_t &Minor) {
4727  if (ParseAsAbsoluteExpression(Major))
4728  return TokError("invalid major version");
4729 
4730  if (!trySkipToken(AsmToken::Comma))
4731  return TokError("minor version number required, comma expected");
4732 
4733  if (ParseAsAbsoluteExpression(Minor))
4734  return TokError("invalid minor version");
4735 
4736  return false;
4737 }
4738 
4739 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4740  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4741  return TokError("directive only supported for amdgcn architecture");
4742 
4743  std::string TargetIDDirective;
4744  SMLoc TargetStart = getTok().getLoc();
4745  if (getParser().parseEscapedString(TargetIDDirective))
4746  return true;
4747 
4748  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4749  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4750  return getParser().Error(TargetRange.Start,
4751  (Twine(".amdgcn_target directive's target id ") +
4752  Twine(TargetIDDirective) +
4753  Twine(" does not match the specified target id ") +
4754  Twine(getTargetStreamer().getTargetID()->toString())).str());
4755 
4756  return false;
4757 }
4758 
4759 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4760  return Error(Range.Start, "value out of range", Range);
4761 }
4762 
4763 bool AMDGPUAsmParser::calculateGPRBlocks(
4764  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4765  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4766  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4767  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4768  // TODO(scott.linder): These calculations are duplicated from
4769  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4770  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4771 
4772  unsigned NumVGPRs = NextFreeVGPR;
4773  unsigned NumSGPRs = NextFreeSGPR;
4774 
4775  if (Version.Major >= 10)
4776  NumSGPRs = 0;
4777  else {
4778  unsigned MaxAddressableNumSGPRs =
4780 
4781  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4782  NumSGPRs > MaxAddressableNumSGPRs)
4783  return OutOfRangeError(SGPRRange);
4784 
4785  NumSGPRs +=
4786  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4787 
4788  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4789  NumSGPRs > MaxAddressableNumSGPRs)
4790  return OutOfRangeError(SGPRRange);
4791 
4792  if (Features.test(FeatureSGPRInitBug))
4794  }
4795 
4796  VGPRBlocks =
4797  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4798  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4799 
4800  return false;
4801 }
4802 
4803 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4804  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4805  return TokError("directive only supported for amdgcn architecture");
4806 
4807  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4808  return TokError("directive only supported for amdhsa OS");
4809 
4810  StringRef KernelName;
4811  if (getParser().parseIdentifier(KernelName))
4812  return true;
4813 
4815 
4816  StringSet<> Seen;
4817 
4818  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4819 
4820  SMRange VGPRRange;
4821  uint64_t NextFreeVGPR = 0;
4822  uint64_t AccumOffset = 0;
4823  uint64_t SharedVGPRCount = 0;
4824  SMRange SGPRRange;
4825  uint64_t NextFreeSGPR = 0;
4826 
4827  // Count the number of user SGPRs implied from the enabled feature bits.
4828  unsigned ImpliedUserSGPRCount = 0;
4829 
4830  // Track if the asm explicitly contains the directive for the user SGPR
4831  // count.
4832  Optional<unsigned> ExplicitUserSGPRCount;
4833  bool ReserveVCC = true;
4834  bool ReserveFlatScr = true;
4835  Optional<bool> EnableWavefrontSize32;
4836 
4837  while (true) {
4838  while (trySkipToken(AsmToken::EndOfStatement));
4839 
4840  StringRef ID;
4841  SMRange IDRange = getTok().getLocRange();
4842  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4843  return true;
4844 
4845  if (ID == ".end_amdhsa_kernel")
4846  break;
4847 
4848  if (Seen.find(ID) != Seen.end())
4849  return TokError(".amdhsa_ directives cannot be repeated");
4850  Seen.insert(ID);
4851 
4852  SMLoc ValStart = getLoc();
4853  int64_t IVal;
4854  if (getParser().parseAbsoluteExpression(IVal))
4855  return true;
4856  SMLoc ValEnd = getLoc();
4857  SMRange ValRange = SMRange(ValStart, ValEnd);
4858 
4859  if (IVal < 0)
4860  return OutOfRangeError(ValRange);
4861 
4862  uint64_t Val = IVal;
4863 
4864 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4865  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4866  return OutOfRangeError(RANGE); \
4867  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4868 
4869  if (ID == ".amdhsa_group_segment_fixed_size") {
4870  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4871  return OutOfRangeError(ValRange);
4872  KD.group_segment_fixed_size = Val;
4873  } else if (ID == ".amdhsa_private_segment_fixed_size") {
4874  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4875  return OutOfRangeError(ValRange);
4876  KD.private_segment_fixed_size = Val;
4877  } else if (ID == ".amdhsa_kernarg_size") {
4878  if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4879  return OutOfRangeError(ValRange);
4880  KD.kernarg_size = Val;
4881  } else if (ID == ".amdhsa_user_sgpr_count") {
4882  ExplicitUserSGPRCount = Val;
4883  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4885  return Error(IDRange.Start,
4886  "directive is not supported with architected flat scratch",
4887  IDRange);
4889  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4890  Val, ValRange);
4891  if (Val)
4892  ImpliedUserSGPRCount += 4;
4893  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4895  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4896  ValRange);
4897  if (Val)
4898  ImpliedUserSGPRCount += 2;
4899  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4901  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4902  ValRange);
4903  if (Val)
4904  ImpliedUserSGPRCount += 2;
4905  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4907  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4908  Val, ValRange);
4909  if (Val)
4910  ImpliedUserSGPRCount += 2;
4911  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4913  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4914  ValRange);
4915  if (Val)
4916  ImpliedUserSGPRCount += 2;
4917  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4919  return Error(IDRange.Start,
4920  "directive is not supported with architected flat scratch",
4921  IDRange);
4923  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4924  ValRange);
4925  if (Val)
4926  ImpliedUserSGPRCount += 2;
4927  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4929  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4930  Val, ValRange);
4931  if (Val)
4932  ImpliedUserSGPRCount += 1;
4933  } else if (ID == ".amdhsa_wavefront_size32") {
4934  if (IVersion.Major < 10)
4935  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4936  EnableWavefrontSize32 = Val;
4938  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4939  Val, ValRange);
4940  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4942  return Error(IDRange.Start,
4943  "directive is not supported with architected flat scratch",
4944  IDRange);
4946  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4947  } else if (ID == ".amdhsa_enable_private_segment") {
4949  return Error(
4950  IDRange.Start,
4951  "directive is not supported without architected flat scratch",
4952  IDRange);
4954  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4955  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4957  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4958  ValRange);
4959  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4961  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4962  ValRange);
4963  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4965  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4966  ValRange);
4967  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4969  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4970  ValRange);
4971  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4973  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4974  ValRange);
4975  } else if (ID == ".amdhsa_next_free_vgpr") {
4976  VGPRRange = ValRange;
4977  NextFreeVGPR = Val;
4978  } else if (ID == ".amdhsa_next_free_sgpr") {
4979  SGPRRange = ValRange;
4980  NextFreeSGPR = Val;
4981  } else if (ID == ".amdhsa_accum_offset") {
4982  if (!isGFX90A())
4983  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4984  AccumOffset = Val;
4985  } else if (ID == ".amdhsa_reserve_vcc") {
4986  if (!isUInt<1>(Val))
4987  return OutOfRangeError(ValRange);
4988  ReserveVCC = Val;
4989  } else if (ID == ".amdhsa_reserve_flat_scratch") {
4990  if (IVersion.Major < 7)
4991  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4993  return Error(IDRange.Start,
4994  "directive is not supported with architected flat scratch",
4995  IDRange);
4996  if (!isUInt<1>(Val))
4997  return OutOfRangeError(ValRange);
4998  ReserveFlatScr = Val;
4999  } else if (ID == ".amdhsa_reserve_xnack_mask") {
5000  if (IVersion.Major < 8)
5001  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5002  if (!isUInt<1>(Val))
5003  return OutOfRangeError(ValRange);
5004  if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5005  return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5006  IDRange);
5007  } else if (ID == ".amdhsa_float_round_mode_32") {
5009  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5010  } else if (ID == ".amdhsa_float_round_mode_16_64") {
5012  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5013  } else if (ID == ".amdhsa_float_denorm_mode_32") {
5015  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5016  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5018  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5019  ValRange);
5020  } else if (ID == ".amdhsa_dx10_clamp") {
5022  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5023  } else if (ID == ".amdhsa_ieee_mode") {
5024  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5025  Val, ValRange);
5026  } else if (ID == ".amdhsa_fp16_overflow") {
5027  if (IVersion.Major < 9)
5028  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5029  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5030  ValRange);
5031  } else if (ID == ".amdhsa_tg_split") {
5032  if (!isGFX90A())
5033  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5034  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5035  ValRange);
5036  } else if (ID == ".amdhsa_workgroup_processor_mode") {
5037  if (IVersion.Major < 10)
5038  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5039  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5040  ValRange);
5041  } else if (ID == ".amdhsa_memory_ordered") {
5042  if (IVersion.Major < 10)
5043  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5044  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5045  ValRange);
5046  } else if (ID == ".amdhsa_forward_progress") {
5047  if (IVersion.Major < 10)
5048  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5049  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5050  ValRange);
5051  } else if (ID == ".amdhsa_shared_vgpr_count") {
5052  if (IVersion.Major < 10)
5053  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5054  SharedVGPRCount = Val;
5056  COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5057  ValRange);
5058  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5060  KD.compute_pgm_rsrc2,
5061  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5062  ValRange);
5063  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5065  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5066  Val, ValRange);
5067  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5069  KD.compute_pgm_rsrc2,
5070  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5071  ValRange);
5072  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5074  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5075  Val, ValRange);
5076  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5078  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5079  Val, ValRange);
5080  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5082  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5083  Val, ValRange);
5084  } else if (ID == ".amdhsa_exception_int_div_zero") {
5086  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5087  Val, ValRange);
5088  } else {
5089  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5090  }
5091 
5092 #undef PARSE_BITS_ENTRY
5093  }
5094 
5095  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5096  return TokError(".amdhsa_next_free_vgpr directive is required");
5097 
5098  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5099  return TokError(".amdhsa_next_free_sgpr directive is required");
5100 
5101  unsigned VGPRBlocks;
5102  unsigned SGPRBlocks;
5103  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5104  getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5105  EnableWavefrontSize32, NextFreeVGPR,
5106  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5107  SGPRBlocks))
5108  return true;
5109 
5110  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5111  VGPRBlocks))
5112  return OutOfRangeError(VGPRRange);
5114  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5115 
5116  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5117  SGPRBlocks))
5118  return OutOfRangeError(SGPRRange);
5120  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5121  SGPRBlocks);
5122 
5123  if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5124  return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5125  "enabled user SGPRs");
5126 
5127  unsigned UserSGPRCount =
5128  ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5129 
5130  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5131  return TokError("too many user SGPRs enabled");
5132  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5133  UserSGPRCount);
5134 
5135  if (isGFX90A()) {
5136  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5137  return TokError(".amdhsa_accum_offset directive is required");
5138  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5139  return TokError("accum_offset should be in range [4..256] in "
5140  "increments of 4");
5141  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5142  return TokError("accum_offset exceeds total VGPR allocation");
5143  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5144  (AccumOffset / 4 - 1));
5145  }
5146 
5147  if (IVersion.Major == 10) {
5148  // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5149  if (SharedVGPRCount && EnableWavefrontSize32) {
5150  return TokError("shared_vgpr_count directive not valid on "
5151  "wavefront size 32");
5152  }
5153  if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5154  return TokError("shared_vgpr_count*2 + "
5155  "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5156  "exceed 63\n");
5157  }
5158  }
5159 
5160  getTargetStreamer().EmitAmdhsaKernelDescriptor(
5161  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5162  ReserveFlatScr);
5163  return false;
5164 }
5165 
5166 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5167  uint32_t Major;
5168  uint32_t Minor;
5169 
5170  if (ParseDirectiveMajorMinor(Major, Minor))
5171  return true;
5172 
5173  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5174  return false;
5175 }
5176 
5177 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5178  uint32_t Major;
5179  uint32_t Minor;
5180  uint32_t Stepping;
5181  StringRef VendorName;
5182  StringRef ArchName;
5183 
5184  // If this directive has no arguments, then use the ISA version for the
5185  // targeted GPU.
5186  if (isToken(AsmToken::EndOfStatement)) {
5187  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5188  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5189  ISA.Stepping,
5190  "AMD", "AMDGPU");
5191  return false;
5192  }
5193 
5194  if (ParseDirectiveMajorMinor(Major, Minor))
5195  return true;
5196 
5197  if (!trySkipToken(AsmToken::Comma))
5198  return TokError("stepping version number required, comma expected");
5199 
5200  if (ParseAsAbsoluteExpression(Stepping))
5201  return TokError("invalid stepping version");
5202 
5203  if (!trySkipToken(AsmToken::Comma))
5204  return TokError("vendor name required, comma expected");
5205 
5206  if (!parseString(VendorName, "invalid vendor name"))
5207  return true;
5208 
5209  if (!trySkipToken(AsmToken::Comma))
5210  return TokError("arch name required, comma expected");
5211 
5212  if (!parseString(ArchName, "invalid arch name"))
5213  return true;
5214 
5215  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5216  VendorName, ArchName);
5217  return false;
5218 }
5219 
5220 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5221  amd_kernel_code_t &Header) {
5222  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5223  // assembly for backwards compatibility.
5224  if (ID == "max_scratch_backing_memory_byte_size") {
5225  Parser.eatToEndOfStatement();
5226  return false;
5227  }
5228 
5229  SmallString<40> ErrStr;
5230  raw_svector_ostream Err(ErrStr);
5231  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5232  return TokError(Err.str());
5233  }
5234  Lex();
5235 
5236  if (ID == "enable_wavefront_size32") {
5238  if (!isGFX10Plus())
5239  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5240  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5241  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5242  } else {
5243  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5244  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5245  }
5246  }
5247 
5248  if (ID == "wavefront_size") {
5249  if (Header.wavefront_size == 5) {
5250  if (!isGFX10Plus())
5251  return TokError("wavefront_size=5 is only allowed on GFX10+");
5252  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5253  return TokError("wavefront_size=5 requires +WavefrontSize32");
5254  } else if (Header.wavefront_size == 6) {
5255  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5256  return TokError("wavefront_size=6 requires +WavefrontSize64");
5257  }
5258  }
5259 
5260  if (ID == "enable_wgp_mode") {
5262  !isGFX10Plus())
5263  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5264  }
5265 
5266  if (ID == "enable_mem_ordered") {
5268  !isGFX10Plus())
5269  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5270  }
5271 
5272  if (ID == "enable_fwd_progress") {
5274  !isGFX10Plus())
5275  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5276  }
5277 
5278  return false;
5279 }
5280 
5281 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5282  amd_kernel_code_t Header;
5283  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5284 
5285  while (true) {
5286  // Lex EndOfStatement. This is in a while loop, because lexing a comment
5287  // will set the current token to EndOfStatement.
5288  while(trySkipToken(AsmToken::EndOfStatement));
5289 
5290  StringRef ID;
5291  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5292  return true;
5293 
5294  if (ID == ".end_amd_kernel_code_t")
5295  break;
5296 
5297  if (ParseAMDKernelCodeTValue(ID, Header))
5298  return true;
5299  }
5300 
5301  getTargetStreamer().EmitAMDKernelCodeT(Header);
5302 
5303  return false;
5304 }
5305 
5306 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5307  StringRef KernelName;
5308  if (!parseId(KernelName, "expected symbol name"))
5309  return true;
5310 
5311  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5313 
5314  KernelScope.initialize(getContext());
5315  return false;
5316 }
5317 
5318 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5319  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5320  return Error(getLoc(),
5321  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5322  "architectures");
5323  }
5324 
5325  auto TargetIDDirective = getLexer().getTok().getStringContents();
5326  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5327  return Error(getParser().getTok().getLoc(), "target id must match options");
5328 
5329  getTargetStreamer().EmitISAVersion();
5330  Lex();
5331 
5332  return false;
5333 }
5334 
5335 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5336  const char *AssemblerDirectiveBegin;
5337  const char *AssemblerDirectiveEnd;
5339  isHsaAbiVersion3AndAbove(&getSTI())
5340  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5342  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5343  HSAMD::AssemblerDirectiveEnd);
5344 
5345  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5346  return Error(getLoc(),
5347  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5348  "not available on non-amdhsa OSes")).str());
5349  }
5350 
5351  std::string HSAMetadataString;
5352  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5353  HSAMetadataString))
5354  return true;
5355 
5356  if (isHsaAbiVersion3AndAbove(&getSTI())) {
5357  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5358  return Error(getLoc(), "invalid HSA metadata");
5359  } else {
5360  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5361  return Error(getLoc(), "invalid HSA metadata");
5362  }
5363 
5364  return false;
5365 }
5366 
5367 /// Common code to parse out a block of text (typically YAML) between start and
5368 /// end directives.
5369 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5370  const char *AssemblerDirectiveEnd,
5371  std::string &CollectString) {
5372 
5373  raw_string_ostream CollectStream(CollectString);
5374 
5375  getLexer().setSkipSpace(false);
5376 
5377  bool FoundEnd = false;
5378  while (!isToken(AsmToken::Eof)) {
5379  while (isToken(AsmToken::Space)) {
5380  CollectStream << getTokenStr();
5381  Lex();
5382  }
5383 
5384  if (trySkipId(AssemblerDirectiveEnd)) {
5385  FoundEnd = true;
5386  break;
5387  }
5388 
5389  CollectStream << Parser.parseStringToEndOfStatement()
5390  << getContext().getAsmInfo()->getSeparatorString();
5391 
5392  Parser.eatToEndOfStatement();
5393  }
5394 
5395  getLexer().setSkipSpace(true);
5396 
5397  if (isToken(AsmToken::Eof) && !FoundEnd) {
5398  return TokError(Twine("expected directive ") +
5399  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5400  }
5401 
5402  CollectStream.flush();
5403  return false;
5404 }
5405 
5406 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5407 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5408  std::string String;
5409  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5411  return true;
5412 
5413  auto PALMetadata = getTargetStreamer().getPALMetadata();
5414  if (!PALMetadata->setFromString(String))
5415  return Error(getLoc(), "invalid PAL metadata");
5416  return false;
5417 }
5418 
5419 /// Parse the assembler directive for old linear-format PAL metadata.
5420 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5421  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5422  return Error(getLoc(),
5423  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5424  "not available on non-amdpal OSes")).str());
5425  }
5426 
5427  auto PALMetadata = getTargetStreamer().getPALMetadata();
5428  PALMetadata->setLegacy();
5429  for (;;) {
5430  uint32_t Key, Value;
5431  if (ParseAsAbsoluteExpression(Key)) {
5432  return TokError(Twine("invalid value in ") +
5434  }
5435  if (!trySkipToken(AsmToken::Comma)) {
5436  return TokError(Twine("expected an even number of values in ") +
5438  }
5439  if (ParseAsAbsoluteExpression(Value)) {
5440  return TokError(Twine("invalid value in ") +
5442  }
5443  PALMetadata->setRegister(Key, Value);
5444  if (!trySkipToken(AsmToken::Comma))
5445  break;
5446  }
5447  return false;
5448 }
5449 
5450 /// ParseDirectiveAMDGPULDS
5451 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5452 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5453  if (getParser().checkForValidSection())
5454  return true;
5455 
5456  StringRef Name;
5457  SMLoc NameLoc = getLoc();
5458  if (getParser().parseIdentifier(Name))
5459  return TokError("expected identifier in directive");
5460 
5461  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5462  if (parseToken(AsmToken::Comma, "expected ','"))
5463  return true;
5464 
5465  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5466 
5467  int64_t Size;
5468  SMLoc SizeLoc = getLoc();
5469  if (getParser().parseAbsoluteExpression(Size))
5470  return true;
5471  if (Size < 0)
5472  return Error(SizeLoc, "size must be non-negative");
5473  if (Size > LocalMemorySize)
5474  return Error(SizeLoc, "size is too large");
5475 
5476  int64_t Alignment = 4;
5477  if (trySkipToken(AsmToken::Comma)) {
5478  SMLoc AlignLoc = getLoc();
5479  if (getParser().parseAbsoluteExpression(Alignment))
5480  return true;
5481  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5482  return Error(AlignLoc, "alignment must be a power of two");
5483 
5484  // Alignment larger than the size of LDS is possible in theory, as long
5485  // as the linker manages to place to symbol at address 0, but we do want
5486  // to make sure the alignment fits nicely into a 32-bit integer.
5487  if (Alignment >= 1u << 31)
5488  return Error(AlignLoc, "alignment is too large");
5489  }
5490 
5491  if (parseToken(AsmToken::EndOfStatement,
5492  "unexpected token in '.amdgpu_lds' directive"))
5493  return true;
5494 
5495  Symbol->redefineIfPossible();
5496  if (!Symbol->isUndefined())
5497  return Error(NameLoc, "invalid symbol redefinition");
5498 
5499  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5500  return false;
5501 }
5502 
5503 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5504  StringRef IDVal = DirectiveID.getString();
5505 
5506  if (isHsaAbiVersion3AndAbove(&getSTI())) {
5507  if (IDVal == ".amdhsa_kernel")
5508  return ParseDirectiveAMDHSAKernel();
5509 
5510  // TODO: Restructure/combine with PAL metadata directive.
5512  return ParseDirectiveHSAMetadata();
5513  } else {
5514  if (IDVal == ".hsa_code_object_version")
5515  return ParseDirectiveHSACodeObjectVersion();
5516 
5517  if (IDVal == ".hsa_code_object_isa")
5518  return ParseDirectiveHSACodeObjectISA();
5519 
5520  if (IDVal == ".amd_kernel_code_t")
5521  return ParseDirectiveAMDKernelCodeT();
5522 
5523  if (IDVal == ".amdgpu_hsa_kernel")
5524  return ParseDirectiveAMDGPUHsaKernel();
5525 
5526  if (IDVal == ".amd_amdgpu_isa")
5527  return ParseDirectiveISAVersion();
5528 
5530  return ParseDirectiveHSAMetadata();
5531  }
5532 
5533  if (IDVal == ".amdgcn_target")
5534  return ParseDirectiveAMDGCNTarget();
5535 
5536  if (IDVal == ".amdgpu_lds")
5537  return ParseDirectiveAMDGPULDS();
5538 
5539  if (IDVal == PALMD::AssemblerDirectiveBegin)
5540  return ParseDirectivePALMetadataBegin();
5541 
5542  if (IDVal == PALMD::AssemblerDirective)
5543  return ParseDirectivePALMetadata();
5544 
5545  return true;
5546 }
5547 
5548 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5549  unsigned RegNo) {
5550 
5551  if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5552  return isGFX9Plus();
5553 
5554  // GFX10 has 2 more SGPRs 104 and 105.
5555  if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5556  return hasSGPR104_SGPR105();
5557 
5558  switch (RegNo) {
5559  case AMDGPU::SRC_SHARED_BASE:
5560  case AMDGPU::SRC_SHARED_LIMIT:
5561  case AMDGPU::SRC_PRIVATE_BASE:
5562  case AMDGPU::SRC_PRIVATE_LIMIT:
5563  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5564  return isGFX9Plus();
5565  case AMDGPU::TBA:
5566  case AMDGPU::TBA_LO:
5567  case AMDGPU::TBA_HI:
5568  case AMDGPU::TMA:
5569  case AMDGPU::TMA_LO:
5570  case AMDGPU::TMA_HI:
5571  return !isGFX9Plus();
5572  case AMDGPU::XNACK_MASK:
5573  case AMDGPU::XNACK_MASK_LO:
5574  case AMDGPU::XNACK_MASK_HI:
5575  return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5576  case AMDGPU::SGPR_NULL:
5577  return isGFX10Plus();
5578  default:
5579  break;
5580  }
5581 
5582  if (isCI())
5583  return true;
5584 
5585  if (isSI() || isGFX10Plus()) {
5586  // No flat_scr on SI.
5587  // On GFX10 flat scratch is not a valid register operand and can only be
5588  // accessed with s_setreg/s_getreg.
5589  switch (RegNo) {
5590  case AMDGPU::FLAT_SCR:
5591  case AMDGPU::FLAT_SCR_LO:
5592  case AMDGPU::FLAT_SCR_HI:
5593  return false;
5594  default:
5595  return true;
5596  }
5597  }
5598 
5599  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5600  // SI/CI have.
5601  if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5602  return hasSGPR102_SGPR103();
5603 
5604  return true;
5605 }
5606 
5608 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5609  OperandMode Mode) {
5610  // Try to parse with a custom parser
5611  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5612 
5613  // If we successfully parsed the operand or if there as an error parsing,
5614  // we are done.
5615  //
5616  // If we are parsing after we reach EndOfStatement then this means we
5617  // are appending default values to the Operands list. This is only done
5618  // by custom parser, so we shouldn't continue on to the generic parsing.
5619  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5620  isToken(AsmToken::EndOfStatement))
5621  return ResTy;
5622 
5623  SMLoc RBraceLoc;
5624  SMLoc LBraceLoc = getLoc();
5625  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5626  unsigned Prefix = Operands.size();
5627 
5628  for (;;) {
5629  auto Loc = getLoc();
5630  ResTy = parseReg(Operands);
5631  if (ResTy == MatchOperand_NoMatch)
5632  Error(Loc, "expected a register");
5633  if (ResTy != MatchOperand_Success)
5634  return MatchOperand_ParseFail;
5635 
5636  RBraceLoc = getLoc();
5637  if (trySkipToken(AsmToken::RBrac))
5638  break;
5639 
5640  if (!skipToken(AsmToken::Comma,
5641  "expected a comma or a closing square bracket")) {
5642  return MatchOperand_ParseFail;
5643  }
5644  }
5645 
5646  if (Operands.size() - Prefix > 1) {
5647  Operands.insert(Operands.begin() + Prefix,
5648  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5649  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5650  }
5651 
5652  return MatchOperand_Success;
5653  }
5654 
5655  return parseRegOrImm(Operands);
5656 }
5657 
5658 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5659  // Clear any forced encodings from the previous instruction.
5660  setForcedEncodingSize(0);
5661  setForcedDPP(false);
5662  setForcedSDWA(false);
5663 
5664  if (Name.endswith("_e64")) {
5665  setForcedEncodingSize(64);
5666  return Name.substr(0, Name.size() - 4);
5667  } else if (Name.endswith("_e32")) {
5668  setForcedEncodingSize(32);
5669  return Name.substr(0, Name.size() - 4);
5670  } else if (Name.endswith("_dpp")) {
5671  setForcedDPP(true);
5672  return Name.substr(0, Name.size() - 4);
5673  } else if (Name.endswith("_sdwa")) {
5674  setForcedSDWA(true);
5675  return Name.substr(0, Name.size() - 5);
5676  }
5677  return Name;
5678 }
5679 
5680 static void applyMnemonicAliases(StringRef &Mnemonic,
5681  const FeatureBitset &Features,
5682  unsigned VariantID);
5683 
5684 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5685  StringRef Name,
5686  SMLoc NameLoc, OperandVector &Operands) {
5687  // Add the instruction mnemonic
5688  Name = parseMnemonicSuffix(Name);
5689 
5690  // If the target architecture uses MnemonicAlias, call it here to parse
5691  // operands correctly.
5692  applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5693 
5694  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5695 
5696  bool IsMIMG = Name.startswith("image_");
5697 
5698  while (!trySkipToken(AsmToken::EndOfStatement)) {
5699  OperandMode Mode = OperandMode_Default;
5700  if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5701  Mode = OperandMode_NSA;
5702  CPolSeen = 0;
5703  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5704 
5705  if (Res != MatchOperand_Success) {
5706  checkUnsupportedInstruction(Name, NameLoc);
5707  if (!Parser.hasPendingError()) {
5708  // FIXME: use real operand location rather than the current location.
5709  StringRef Msg =
5710  (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5711  "not a valid operand.";
5712  Error(getLoc(), Msg);
5713  }
5714  while (!trySkipToken(AsmToken::EndOfStatement)) {
5715  lex();
5716  }
5717  return true;
5718  }
5719 
5720  // Eat the comma or space if there is one.
5721  trySkipToken(AsmToken::Comma);
5722  }
5723 
5724  return false;
5725 }
5726 
5727 //===----------------------------------------------------------------------===//
5728 // Utility functions
5729 //===----------------------------------------------------------------------===//
5730 
5732 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5733 
5734  if (!trySkipId(Prefix, AsmToken::Colon))
5735  return MatchOperand_NoMatch;
5736 
5737  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5738 }
5739 
5741 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5742  AMDGPUOperand::ImmTy ImmTy,
5743  bool (*ConvertResult)(int64_t&)) {
5744  SMLoc S = getLoc();
5745  int64_t Value = 0;
5746 
5747  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5748  if (Res != MatchOperand_Success)
5749  return Res;
5750 
5751  if (ConvertResult && !ConvertResult(Value)) {
5752  Error(S, "invalid " + StringRef(Prefix) + " value.");
5753  }
5754 
5755  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5756  return MatchOperand_Success;
5757 }
5758 
5760 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5762  AMDGPUOperand::ImmTy ImmTy,
5763  bool (*ConvertResult)(int64_t&)) {
5764  SMLoc S = getLoc();
5765  if (!trySkipId(Prefix, AsmToken::Colon))
5766  return MatchOperand_NoMatch;
5767 
5768  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5769  return MatchOperand_ParseFail;
5770 
5771  unsigned Val = 0;
5772  const unsigned MaxSize = 4;
5773 
5774  // FIXME: How to verify the number of elements matches the number of src
5775  // operands?
5776  for (int I = 0; ; ++I) {
5777  int64_t Op;
5778  SMLoc Loc = getLoc();
5779  if (!parseExpr(Op))
5780  return MatchOperand_ParseFail;
5781 
5782  if (Op != 0 && Op != 1) {
5783  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5784  return MatchOperand_ParseFail;
5785  }
5786 
5787  Val |= (Op << I);
5788 
5789  if (trySkipToken(AsmToken::RBrac))
5790  break;
5791 
5792  if (I + 1 == MaxSize) {
5793  Error(getLoc(), "expected a closing square bracket");
5794  return MatchOperand_ParseFail;
5795  }
5796 
5797  if (!skipToken(AsmToken::Comma, "expected a comma"))
5798  return MatchOperand_ParseFail;
5799  }
5800 
5801  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5802  return MatchOperand_Success;
5803 }
5804 
5806 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5807  AMDGPUOperand::ImmTy ImmTy) {
5808  int64_t Bit;
5809  SMLoc S = getLoc();
5810 
5811  if (trySkipId(Name)) {
5812  Bit = 1;
5813  } else if (trySkipId("no", Name)) {
5814  Bit = 0;
5815  } else {
5816  return MatchOperand_NoMatch;
5817  }
5818 
5819  if (Name == "r128" && !hasMIMG_R128()) {
5820  Error(S, "r128 modifier is not supported on this GPU");
5821  return MatchOperand_ParseFail;
5822  }
5823  if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5824  Error(S, "a16 modifier is not supported on this GPU");
5825  return MatchOperand_ParseFail;
5826  }
5827 
5828  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5829  ImmTy = AMDGPUOperand::ImmTyR128A16;
5830 
5831  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5832  return MatchOperand_Success;
5833 }
5834 
5836 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5837  unsigned CPolOn = 0;
5838  unsigned CPolOff = 0;
5839  SMLoc S = getLoc();
5840 
5841  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5842  if (isGFX940() && !Mnemo.startswith("s_")) {
5843  if (trySkipId("sc0"))
5844  CPolOn = AMDGPU::CPol::SC0;
5845  else if (trySkipId("nosc0"))
5846  CPolOff = AMDGPU::CPol::SC0;
5847  else if (trySkipId("nt"))
5848  CPolOn = AMDGPU::CPol::NT;
5849  else if (trySkipId("nont"))
5850  CPolOff = AMDGPU::CPol::NT;
5851  else if (trySkipId("sc1"))
5852  CPolOn = AMDGPU::CPol::SC1;
5853  else if (trySkipId("nosc1"))
5854  CPolOff = AMDGPU::CPol::SC1;
5855  else
5856  return MatchOperand_NoMatch;
5857  }
5858  else if (trySkipId("glc"))
5859  CPolOn = AMDGPU::CPol::GLC;
5860  else if (trySkipId("noglc"))
5861  CPolOff = AMDGPU::CPol::GLC;
5862  else if (trySkipId("slc"))
5863  CPolOn = AMDGPU::CPol::SLC;
5864  else if (trySkipId("noslc"))
5865  CPolOff = AMDGPU::CPol::SLC;
5866  else if (trySkipId("dlc"</