LLVM  13.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
30 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/Support/Casting.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53  enum KindTy {
54  Token,
55  Immediate,
56  Register,
58  } Kind;
59 
60  SMLoc StartLoc, EndLoc;
61  const AMDGPUAsmParser *AsmParser;
62 
63 public:
64  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65  : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67  using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69  struct Modifiers {
70  bool Abs = false;
71  bool Neg = false;
72  bool Sext = false;
73 
74  bool hasFPModifiers() const { return Abs || Neg; }
75  bool hasIntModifiers() const { return Sext; }
76  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78  int64_t getFPModifiersOperand() const {
79  int64_t Operand = 0;
80  Operand |= Abs ? SISrcMods::ABS : 0u;
81  Operand |= Neg ? SISrcMods::NEG : 0u;
82  return Operand;
83  }
84 
85  int64_t getIntModifiersOperand() const {
86  int64_t Operand = 0;
87  Operand |= Sext ? SISrcMods::SEXT : 0u;
88  return Operand;
89  }
90 
91  int64_t getModifiersOperand() const {
92  assert(!(hasFPModifiers() && hasIntModifiers())
93  && "fp and int modifiers should not be used simultaneously");
94  if (hasFPModifiers()) {
95  return getFPModifiersOperand();
96  } else if (hasIntModifiers()) {
97  return getIntModifiersOperand();
98  } else {
99  return 0;
100  }
101  }
102 
103  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104  };
105 
106  enum ImmTy {
107  ImmTyNone,
108  ImmTyGDS,
109  ImmTyLDS,
110  ImmTyOffen,
111  ImmTyIdxen,
112  ImmTyAddr64,
113  ImmTyOffset,
114  ImmTyInstOffset,
115  ImmTyOffset0,
116  ImmTyOffset1,
117  ImmTyCPol,
118  ImmTySWZ,
119  ImmTyTFE,
120  ImmTyD16,
121  ImmTyClampSI,
122  ImmTyOModSI,
123  ImmTyDPP8,
124  ImmTyDppCtrl,
125  ImmTyDppRowMask,
126  ImmTyDppBankMask,
127  ImmTyDppBoundCtrl,
128  ImmTyDppFi,
129  ImmTySdwaDstSel,
130  ImmTySdwaSrc0Sel,
131  ImmTySdwaSrc1Sel,
132  ImmTySdwaDstUnused,
133  ImmTyDMask,
134  ImmTyDim,
135  ImmTyUNorm,
136  ImmTyDA,
137  ImmTyR128A16,
138  ImmTyA16,
139  ImmTyLWE,
140  ImmTyExpTgt,
141  ImmTyExpCompr,
142  ImmTyExpVM,
143  ImmTyFORMAT,
144  ImmTyHwreg,
145  ImmTyOff,
146  ImmTySendMsg,
147  ImmTyInterpSlot,
148  ImmTyInterpAttr,
149  ImmTyAttrChan,
150  ImmTyOpSel,
151  ImmTyOpSelHi,
152  ImmTyNegLo,
153  ImmTyNegHi,
154  ImmTySwizzle,
155  ImmTyGprIdxMode,
156  ImmTyHigh,
157  ImmTyBLGP,
158  ImmTyCBSZ,
159  ImmTyABID,
160  ImmTyEndpgm,
161  };
162 
163  enum ImmKindTy {
164  ImmKindTyNone,
165  ImmKindTyLiteral,
166  ImmKindTyConst,
167  };
168 
169 private:
170  struct TokOp {
171  const char *Data;
172  unsigned Length;
173  };
174 
175  struct ImmOp {
176  int64_t Val;
177  ImmTy Type;
178  bool IsFPImm;
179  mutable ImmKindTy Kind;
180  Modifiers Mods;
181  };
182 
183  struct RegOp {
184  unsigned RegNo;
185  Modifiers Mods;
186  };
187 
188  union {
189  TokOp Tok;
190  ImmOp Imm;
191  RegOp Reg;
192  const MCExpr *Expr;
193  };
194 
195 public:
196  bool isToken() const override {
197  if (Kind == Token)
198  return true;
199 
200  // When parsing operands, we can't always tell if something was meant to be
201  // a token, like 'gds', or an expression that references a global variable.
202  // In this case, we assume the string is an expression, and if we need to
203  // interpret is a token, then we treat the symbol name as the token.
204  return isSymbolRefExpr();
205  }
206 
207  bool isSymbolRefExpr() const {
208  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209  }
210 
211  bool isImm() const override {
212  return Kind == Immediate;
213  }
214 
215  void setImmKindNone() const {
216  assert(isImm());
217  Imm.Kind = ImmKindTyNone;
218  }
219 
220  void setImmKindLiteral() const {
221  assert(isImm());
222  Imm.Kind = ImmKindTyLiteral;
223  }
224 
225  void setImmKindConst() const {
226  assert(isImm());
227  Imm.Kind = ImmKindTyConst;
228  }
229 
230  bool IsImmKindLiteral() const {
231  return isImm() && Imm.Kind == ImmKindTyLiteral;
232  }
233 
234  bool isImmKindConst() const {
235  return isImm() && Imm.Kind == ImmKindTyConst;
236  }
237 
238  bool isInlinableImm(MVT type) const;
239  bool isLiteralImm(MVT type) const;
240 
241  bool isRegKind() const {
242  return Kind == Register;
243  }
244 
245  bool isReg() const override {
246  return isRegKind() && !hasModifiers();
247  }
248 
249  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250  return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251  }
252 
253  bool isRegOrImmWithInt16InputMods() const {
254  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255  }
256 
257  bool isRegOrImmWithInt32InputMods() const {
258  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259  }
260 
261  bool isRegOrImmWithInt64InputMods() const {
262  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263  }
264 
265  bool isRegOrImmWithFP16InputMods() const {
266  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267  }
268 
269  bool isRegOrImmWithFP32InputMods() const {
270  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271  }
272 
273  bool isRegOrImmWithFP64InputMods() const {
274  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275  }
276 
277  bool isVReg() const {
278  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279  isRegClass(AMDGPU::VReg_64RegClassID) ||
280  isRegClass(AMDGPU::VReg_96RegClassID) ||
281  isRegClass(AMDGPU::VReg_128RegClassID) ||
282  isRegClass(AMDGPU::VReg_160RegClassID) ||
283  isRegClass(AMDGPU::VReg_192RegClassID) ||
284  isRegClass(AMDGPU::VReg_256RegClassID) ||
285  isRegClass(AMDGPU::VReg_512RegClassID) ||
286  isRegClass(AMDGPU::VReg_1024RegClassID);
287  }
288 
289  bool isVReg32() const {
290  return isRegClass(AMDGPU::VGPR_32RegClassID);
291  }
292 
293  bool isVReg32OrOff() const {
294  return isOff() || isVReg32();
295  }
296 
297  bool isNull() const {
298  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299  }
300 
301  bool isVRegWithInputMods() const;
302 
303  bool isSDWAOperand(MVT type) const;
304  bool isSDWAFP16Operand() const;
305  bool isSDWAFP32Operand() const;
306  bool isSDWAInt16Operand() const;
307  bool isSDWAInt32Operand() const;
308 
309  bool isImmTy(ImmTy ImmT) const {
310  return isImm() && Imm.Type == ImmT;
311  }
312 
313  bool isImmModifier() const {
314  return isImm() && Imm.Type != ImmTyNone;
315  }
316 
317  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319  bool isDMask() const { return isImmTy(ImmTyDMask); }
320  bool isDim() const { return isImmTy(ImmTyDim); }
321  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322  bool isDA() const { return isImmTy(ImmTyDA); }
323  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325  bool isLWE() const { return isImmTy(ImmTyLWE); }
326  bool isOff() const { return isImmTy(ImmTyOff); }
327  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330  bool isOffen() const { return isImmTy(ImmTyOffen); }
331  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338  bool isGDS() const { return isImmTy(ImmTyGDS); }
339  bool isLDS() const { return isImmTy(ImmTyLDS); }
340  bool isCPol() const { return isImmTy(ImmTyCPol); }
341  bool isSWZ() const { return isImmTy(ImmTySWZ); }
342  bool isTFE() const { return isImmTy(ImmTyTFE); }
343  bool isD16() const { return isImmTy(ImmTyD16); }
344  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348  bool isFI() const { return isImmTy(ImmTyDppFi); }
349  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360  bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362  bool isMod() const {
363  return isClampSI() || isOModSI();
364  }
365 
366  bool isRegOrImm() const {
367  return isReg() || isImm();
368  }
369 
370  bool isRegClass(unsigned RCID) const;
371 
372  bool isInlineValue() const;
373 
374  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375  return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376  }
377 
378  bool isSCSrcB16() const {
379  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380  }
381 
382  bool isSCSrcV2B16() const {
383  return isSCSrcB16();
384  }
385 
386  bool isSCSrcB32() const {
387  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388  }
389 
390  bool isSCSrcB64() const {
391  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392  }
393 
394  bool isBoolReg() const;
395 
396  bool isSCSrcF16() const {
397  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398  }
399 
400  bool isSCSrcV2F16() const {
401  return isSCSrcF16();
402  }
403 
404  bool isSCSrcF32() const {
405  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406  }
407 
408  bool isSCSrcF64() const {
409  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410  }
411 
412  bool isSSrcB32() const {
413  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414  }
415 
416  bool isSSrcB16() const {
417  return isSCSrcB16() || isLiteralImm(MVT::i16);
418  }
419 
420  bool isSSrcV2B16() const {
421  llvm_unreachable("cannot happen");
422  return isSSrcB16();
423  }
424 
425  bool isSSrcB64() const {
426  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427  // See isVSrc64().
428  return isSCSrcB64() || isLiteralImm(MVT::i64);
429  }
430 
431  bool isSSrcF32() const {
432  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433  }
434 
435  bool isSSrcF64() const {
436  return isSCSrcB64() || isLiteralImm(MVT::f64);
437  }
438 
439  bool isSSrcF16() const {
440  return isSCSrcB16() || isLiteralImm(MVT::f16);
441  }
442 
443  bool isSSrcV2F16() const {
444  llvm_unreachable("cannot happen");
445  return isSSrcF16();
446  }
447 
448  bool isSSrcV2FP32() const {
449  llvm_unreachable("cannot happen");
450  return isSSrcF32();
451  }
452 
453  bool isSCSrcV2FP32() const {
454  llvm_unreachable("cannot happen");
455  return isSCSrcF32();
456  }
457 
458  bool isSSrcV2INT32() const {
459  llvm_unreachable("cannot happen");
460  return isSSrcB32();
461  }
462 
463  bool isSCSrcV2INT32() const {
464  llvm_unreachable("cannot happen");
465  return isSCSrcB32();
466  }
467 
468  bool isSSrcOrLdsB32() const {
469  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470  isLiteralImm(MVT::i32) || isExpr();
471  }
472 
473  bool isVCSrcB32() const {
474  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475  }
476 
477  bool isVCSrcB64() const {
478  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479  }
480 
481  bool isVCSrcB16() const {
482  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483  }
484 
485  bool isVCSrcV2B16() const {
486  return isVCSrcB16();
487  }
488 
489  bool isVCSrcF32() const {
490  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491  }
492 
493  bool isVCSrcF64() const {
494  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495  }
496 
497  bool isVCSrcF16() const {
498  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499  }
500 
501  bool isVCSrcV2F16() const {
502  return isVCSrcF16();
503  }
504 
505  bool isVSrcB32() const {
506  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507  }
508 
509  bool isVSrcB64() const {
510  return isVCSrcF64() || isLiteralImm(MVT::i64);
511  }
512 
513  bool isVSrcB16() const {
514  return isVCSrcB16() || isLiteralImm(MVT::i16);
515  }
516 
517  bool isVSrcV2B16() const {
518  return isVSrcB16() || isLiteralImm(MVT::v2i16);
519  }
520 
521  bool isVCSrcV2FP32() const {
522  return isVCSrcF64();
523  }
524 
525  bool isVSrcV2FP32() const {
526  return isVSrcF64() || isLiteralImm(MVT::v2f32);
527  }
528 
529  bool isVCSrcV2INT32() const {
530  return isVCSrcB64();
531  }
532 
533  bool isVSrcV2INT32() const {
534  return isVSrcB64() || isLiteralImm(MVT::v2i32);
535  }
536 
537  bool isVSrcF32() const {
538  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539  }
540 
541  bool isVSrcF64() const {
542  return isVCSrcF64() || isLiteralImm(MVT::f64);
543  }
544 
545  bool isVSrcF16() const {
546  return isVCSrcF16() || isLiteralImm(MVT::f16);
547  }
548 
549  bool isVSrcV2F16() const {
550  return isVSrcF16() || isLiteralImm(MVT::v2f16);
551  }
552 
553  bool isVISrcB32() const {
554  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555  }
556 
557  bool isVISrcB16() const {
558  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559  }
560 
561  bool isVISrcV2B16() const {
562  return isVISrcB16();
563  }
564 
565  bool isVISrcF32() const {
566  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567  }
568 
569  bool isVISrcF16() const {
570  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571  }
572 
573  bool isVISrcV2F16() const {
574  return isVISrcF16() || isVISrcB32();
575  }
576 
577  bool isVISrc_64B64() const {
578  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579  }
580 
581  bool isVISrc_64F64() const {
582  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583  }
584 
585  bool isVISrc_64V2FP32() const {
586  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587  }
588 
589  bool isVISrc_64V2INT32() const {
590  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591  }
592 
593  bool isVISrc_256B64() const {
594  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595  }
596 
597  bool isVISrc_256F64() const {
598  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599  }
600 
601  bool isVISrc_128B16() const {
602  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603  }
604 
605  bool isVISrc_128V2B16() const {
606  return isVISrc_128B16();
607  }
608 
609  bool isVISrc_128B32() const {
610  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611  }
612 
613  bool isVISrc_128F32() const {
614  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615  }
616 
617  bool isVISrc_256V2FP32() const {
618  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619  }
620 
621  bool isVISrc_256V2INT32() const {
622  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623  }
624 
625  bool isVISrc_512B32() const {
626  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627  }
628 
629  bool isVISrc_512B16() const {
630  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631  }
632 
633  bool isVISrc_512V2B16() const {
634  return isVISrc_512B16();
635  }
636 
637  bool isVISrc_512F32() const {
638  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639  }
640 
641  bool isVISrc_512F16() const {
642  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643  }
644 
645  bool isVISrc_512V2F16() const {
646  return isVISrc_512F16() || isVISrc_512B32();
647  }
648 
649  bool isVISrc_1024B32() const {
650  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651  }
652 
653  bool isVISrc_1024B16() const {
654  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655  }
656 
657  bool isVISrc_1024V2B16() const {
658  return isVISrc_1024B16();
659  }
660 
661  bool isVISrc_1024F32() const {
662  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663  }
664 
665  bool isVISrc_1024F16() const {
666  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667  }
668 
669  bool isVISrc_1024V2F16() const {
670  return isVISrc_1024F16() || isVISrc_1024B32();
671  }
672 
673  bool isAISrcB32() const {
674  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675  }
676 
677  bool isAISrcB16() const {
678  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679  }
680 
681  bool isAISrcV2B16() const {
682  return isAISrcB16();
683  }
684 
685  bool isAISrcF32() const {
686  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687  }
688 
689  bool isAISrcF16() const {
690  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691  }
692 
693  bool isAISrcV2F16() const {
694  return isAISrcF16() || isAISrcB32();
695  }
696 
697  bool isAISrc_64B64() const {
698  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699  }
700 
701  bool isAISrc_64F64() const {
702  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703  }
704 
705  bool isAISrc_128B32() const {
706  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707  }
708 
709  bool isAISrc_128B16() const {
710  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711  }
712 
713  bool isAISrc_128V2B16() const {
714  return isAISrc_128B16();
715  }
716 
717  bool isAISrc_128F32() const {
718  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719  }
720 
721  bool isAISrc_128F16() const {
722  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723  }
724 
725  bool isAISrc_128V2F16() const {
726  return isAISrc_128F16() || isAISrc_128B32();
727  }
728 
729  bool isVISrc_128F16() const {
730  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731  }
732 
733  bool isVISrc_128V2F16() const {
734  return isVISrc_128F16() || isVISrc_128B32();
735  }
736 
737  bool isAISrc_256B64() const {
738  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739  }
740 
741  bool isAISrc_256F64() const {
742  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743  }
744 
745  bool isAISrc_512B32() const {
746  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747  }
748 
749  bool isAISrc_512B16() const {
750  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751  }
752 
753  bool isAISrc_512V2B16() const {
754  return isAISrc_512B16();
755  }
756 
757  bool isAISrc_512F32() const {
758  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759  }
760 
761  bool isAISrc_512F16() const {
762  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763  }
764 
765  bool isAISrc_512V2F16() const {
766  return isAISrc_512F16() || isAISrc_512B32();
767  }
768 
769  bool isAISrc_1024B32() const {
770  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771  }
772 
773  bool isAISrc_1024B16() const {
774  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775  }
776 
777  bool isAISrc_1024V2B16() const {
778  return isAISrc_1024B16();
779  }
780 
781  bool isAISrc_1024F32() const {
782  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783  }
784 
785  bool isAISrc_1024F16() const {
786  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787  }
788 
789  bool isAISrc_1024V2F16() const {
790  return isAISrc_1024F16() || isAISrc_1024B32();
791  }
792 
793  bool isKImmFP32() const {
794  return isLiteralImm(MVT::f32);
795  }
796 
797  bool isKImmFP16() const {
798  return isLiteralImm(MVT::f16);
799  }
800 
801  bool isMem() const override {
802  return false;
803  }
804 
805  bool isExpr() const {
806  return Kind == Expression;
807  }
808 
809  bool isSoppBrTarget() const {
810  return isExpr() || isImm();
811  }
812 
813  bool isSWaitCnt() const;
814  bool isHwreg() const;
815  bool isSendMsg() const;
816  bool isSwizzle() const;
817  bool isSMRDOffset8() const;
818  bool isSMEMOffset() const;
819  bool isSMRDLiteralOffset() const;
820  bool isDPP8() const;
821  bool isDPPCtrl() const;
822  bool isBLGP() const;
823  bool isCBSZ() const;
824  bool isABID() const;
825  bool isGPRIdxMode() const;
826  bool isS16Imm() const;
827  bool isU16Imm() const;
828  bool isEndpgm() const;
829 
830  StringRef getExpressionAsToken() const {
831  assert(isExpr());
832  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833  return S->getSymbol().getName();
834  }
835 
836  StringRef getToken() const {
837  assert(isToken());
838 
839  if (Kind == Expression)
840  return getExpressionAsToken();
841 
842  return StringRef(Tok.Data, Tok.Length);
843  }
844 
845  int64_t getImm() const {
846  assert(isImm());
847  return Imm.Val;
848  }
849 
850  void setImm(int64_t Val) {
851  assert(isImm());
852  Imm.Val = Val;
853  }
854 
855  ImmTy getImmTy() const {
856  assert(isImm());
857  return Imm.Type;
858  }
859 
860  unsigned getReg() const override {
861  assert(isRegKind());
862  return Reg.RegNo;
863  }
864 
865  SMLoc getStartLoc() const override {
866  return StartLoc;
867  }
868 
869  SMLoc getEndLoc() const override {
870  return EndLoc;
871  }
872 
873  SMRange getLocRange() const {
874  return SMRange(StartLoc, EndLoc);
875  }
876 
877  Modifiers getModifiers() const {
878  assert(isRegKind() || isImmTy(ImmTyNone));
879  return isRegKind() ? Reg.Mods : Imm.Mods;
880  }
881 
882  void setModifiers(Modifiers Mods) {
883  assert(isRegKind() || isImmTy(ImmTyNone));
884  if (isRegKind())
885  Reg.Mods = Mods;
886  else
887  Imm.Mods = Mods;
888  }
889 
890  bool hasModifiers() const {
891  return getModifiers().hasModifiers();
892  }
893 
894  bool hasFPModifiers() const {
895  return getModifiers().hasFPModifiers();
896  }
897 
898  bool hasIntModifiers() const {
899  return getModifiers().hasIntModifiers();
900  }
901 
902  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908  template <unsigned Bitwidth>
909  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912  addKImmFPOperands<16>(Inst, N);
913  }
914 
915  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916  addKImmFPOperands<32>(Inst, N);
917  }
918 
919  void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922  addRegOperands(Inst, N);
923  }
924 
925  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926  if (isRegKind())
927  addRegOperands(Inst, N);
928  else if (isExpr())
929  Inst.addOperand(MCOperand::createExpr(Expr));
930  else
931  addImmOperands(Inst, N);
932  }
933 
934  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935  Modifiers Mods = getModifiers();
936  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937  if (isRegKind()) {
938  addRegOperands(Inst, N);
939  } else {
940  addImmOperands(Inst, N, false);
941  }
942  }
943 
944  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945  assert(!hasIntModifiers());
946  addRegOrImmWithInputModsOperands(Inst, N);
947  }
948 
949  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950  assert(!hasFPModifiers());
951  addRegOrImmWithInputModsOperands(Inst, N);
952  }
953 
954  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955  Modifiers Mods = getModifiers();
956  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957  assert(isRegKind());
958  addRegOperands(Inst, N);
959  }
960 
961  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962  assert(!hasIntModifiers());
963  addRegWithInputModsOperands(Inst, N);
964  }
965 
966  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967  assert(!hasFPModifiers());
968  addRegWithInputModsOperands(Inst, N);
969  }
970 
971  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972  if (isImm())
973  addImmOperands(Inst, N);
974  else {
975  assert(isExpr());
976  Inst.addOperand(MCOperand::createExpr(Expr));
977  }
978  }
979 
980  static void printImmTy(raw_ostream& OS, ImmTy Type) {
981  switch (Type) {
982  case ImmTyNone: OS << "None"; break;
983  case ImmTyGDS: OS << "GDS"; break;
984  case ImmTyLDS: OS << "LDS"; break;
985  case ImmTyOffen: OS << "Offen"; break;
986  case ImmTyIdxen: OS << "Idxen"; break;
987  case ImmTyAddr64: OS << "Addr64"; break;
988  case ImmTyOffset: OS << "Offset"; break;
989  case ImmTyInstOffset: OS << "InstOffset"; break;
990  case ImmTyOffset0: OS << "Offset0"; break;
991  case ImmTyOffset1: OS << "Offset1"; break;
992  case ImmTyCPol: OS << "CPol"; break;
993  case ImmTySWZ: OS << "SWZ"; break;
994  case ImmTyTFE: OS << "TFE"; break;
995  case ImmTyD16: OS << "D16"; break;
996  case ImmTyFORMAT: OS << "FORMAT"; break;
997  case ImmTyClampSI: OS << "ClampSI"; break;
998  case ImmTyOModSI: OS << "OModSI"; break;
999  case ImmTyDPP8: OS << "DPP8"; break;
1000  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004  case ImmTyDppFi: OS << "FI"; break;
1005  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009  case ImmTyDMask: OS << "DMask"; break;
1010  case ImmTyDim: OS << "Dim"; break;
1011  case ImmTyUNorm: OS << "UNorm"; break;
1012  case ImmTyDA: OS << "DA"; break;
1013  case ImmTyR128A16: OS << "R128A16"; break;
1014  case ImmTyA16: OS << "A16"; break;
1015  case ImmTyLWE: OS << "LWE"; break;
1016  case ImmTyOff: OS << "Off"; break;
1017  case ImmTyExpTgt: OS << "ExpTgt"; break;
1018  case ImmTyExpCompr: OS << "ExpCompr"; break;
1019  case ImmTyExpVM: OS << "ExpVM"; break;
1020  case ImmTyHwreg: OS << "Hwreg"; break;
1021  case ImmTySendMsg: OS << "SendMsg"; break;
1022  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024  case ImmTyAttrChan: OS << "AttrChan"; break;
1025  case ImmTyOpSel: OS << "OpSel"; break;
1026  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027  case ImmTyNegLo: OS << "NegLo"; break;
1028  case ImmTyNegHi: OS << "NegHi"; break;
1029  case ImmTySwizzle: OS << "Swizzle"; break;
1030  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031  case ImmTyHigh: OS << "High"; break;
1032  case ImmTyBLGP: OS << "BLGP"; break;
1033  case ImmTyCBSZ: OS << "CBSZ"; break;
1034  case ImmTyABID: OS << "ABID"; break;
1035  case ImmTyEndpgm: OS << "Endpgm"; break;
1036  }
1037  }
1038 
1039  void print(raw_ostream &OS) const override {
1040  switch (Kind) {
1041  case Register:
1042  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043  break;
1044  case Immediate:
1045  OS << '<' << getImm();
1046  if (getImmTy() != ImmTyNone) {
1047  OS << " type: "; printImmTy(OS, getImmTy());
1048  }
1049  OS << " mods: " << Imm.Mods << '>';
1050  break;
1051  case Token:
1052  OS << '\'' << getToken() << '\'';
1053  break;
1054  case Expression:
1055  OS << "<expr " << *Expr << '>';
1056  break;
1057  }
1058  }
1059 
1060  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061  int64_t Val, SMLoc Loc,
1062  ImmTy Type = ImmTyNone,
1063  bool IsFPImm = false) {
1064  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065  Op->Imm.Val = Val;
1066  Op->Imm.IsFPImm = IsFPImm;
1067  Op->Imm.Kind = ImmKindTyNone;
1068  Op->Imm.Type = Type;
1069  Op->Imm.Mods = Modifiers();
1070  Op->StartLoc = Loc;
1071  Op->EndLoc = Loc;
1072  return Op;
1073  }
1074 
1075  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076  StringRef Str, SMLoc Loc,
1077  bool HasExplicitEncodingSize = true) {
1078  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079  Res->Tok.Data = Str.data();
1080  Res->Tok.Length = Str.size();
1081  Res->StartLoc = Loc;
1082  Res->EndLoc = Loc;
1083  return Res;
1084  }
1085 
1086  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087  unsigned RegNo, SMLoc S,
1088  SMLoc E) {
1089  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090  Op->Reg.RegNo = RegNo;
1091  Op->Reg.Mods = Modifiers();
1092  Op->StartLoc = S;
1093  Op->EndLoc = E;
1094  return Op;
1095  }
1096 
1097  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098  const class MCExpr *Expr, SMLoc S) {
1099  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100  Op->Expr = Expr;
1101  Op->StartLoc = S;
1102  Op->EndLoc = S;
1103  return Op;
1104  }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109  return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120  int SgprIndexUnusedMin = -1;
1121  int VgprIndexUnusedMin = -1;
1122  MCContext *Ctx = nullptr;
1123 
1124  void usesSgprAt(int i) {
1125  if (i >= SgprIndexUnusedMin) {
1126  SgprIndexUnusedMin = ++i;
1127  if (Ctx) {
1128  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130  }
1131  }
1132  }
1133 
1134  void usesVgprAt(int i) {
1135  if (i >= VgprIndexUnusedMin) {
1136  VgprIndexUnusedMin = ++i;
1137  if (Ctx) {
1138  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139  Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140  }
1141  }
1142  }
1143 
1144 public:
1145  KernelScopeInfo() = default;
1146 
1147  void initialize(MCContext &Context) {
1148  Ctx = &Context;
1149  usesSgprAt(SgprIndexUnusedMin = -1);
1150  usesVgprAt(VgprIndexUnusedMin = -1);
1151  }
1152 
1153  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154  switch (RegKind) {
1155  case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156  case IS_AGPR: // fall through
1157  case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158  default: break;
1159  }
1160  }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164  MCAsmParser &Parser;
1165 
1166  // Number of extra operands parsed after the first optional operand.
1167  // This may be necessary to skip hardcoded mandatory operands.
1168  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170  unsigned ForcedEncodingSize = 0;
1171  bool ForcedDPP = false;
1172  bool ForcedSDWA = false;
1173  KernelScopeInfo KernelScope;
1174  unsigned CPolSeen;
1175 
1176  /// @name Auto-generated Match Functions
1177  /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182  /// }
1183 
1184 private:
1185  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186  bool OutOfRangeError(SMRange Range);
1187  /// Calculate VGPR/SGPR blocks required for given target, reserved
1188  /// registers, and user-specified NextFreeXGPR values.
1189  ///
1190  /// \param Features [in] Target features, used for bug corrections.
1191  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195  /// descriptor field, if valid.
1196  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200  /// \param VGPRBlocks [out] Result VGPR block count.
1201  /// \param SGPRBlocks [out] Result SGPR block count.
1202  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203  bool FlatScrUsed, bool XNACKUsed,
1204  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205  SMRange VGPRRange, unsigned NextFreeSGPR,
1206  SMRange SGPRRange, unsigned &VGPRBlocks,
1207  unsigned &SGPRBlocks);
1208  bool ParseDirectiveAMDGCNTarget();
1209  bool ParseDirectiveAMDHSAKernel();
1210  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211  bool ParseDirectiveHSACodeObjectVersion();
1212  bool ParseDirectiveHSACodeObjectISA();
1213  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214  bool ParseDirectiveAMDKernelCodeT();
1215  // TODO: Possibly make subtargetHasRegister const.
1216  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217  bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219  bool ParseDirectiveISAVersion();
1220  bool ParseDirectiveHSAMetadata();
1221  bool ParseDirectivePALMetadataBegin();
1222  bool ParseDirectivePALMetadata();
1223  bool ParseDirectiveAMDGPULDS();
1224 
1225  /// Common code to parse out a block of text (typically YAML) between start and
1226  /// end directives.
1227  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228  const char *AssemblerDirectiveEnd,
1229  std::string &CollectString);
1230 
1231  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234  unsigned &RegNum, unsigned &RegWidth,
1235  bool RestoreOnFailure = false);
1236  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237  unsigned &RegNum, unsigned &RegWidth,
1238  SmallVectorImpl<AsmToken> &Tokens);
1239  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240  unsigned &RegWidth,
1241  SmallVectorImpl<AsmToken> &Tokens);
1242  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243  unsigned &RegWidth,
1244  SmallVectorImpl<AsmToken> &Tokens);
1245  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247  bool ParseRegRange(unsigned& Num, unsigned& Width);
1248  unsigned getRegularReg(RegisterKind RegKind,
1249  unsigned RegNum,
1250  unsigned RegWidth,
1251  SMLoc Loc);
1252 
1253  bool isRegister();
1254  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256  void initializeGprCountSymbol(RegisterKind RegKind);
1257  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258  unsigned RegWidth);
1259  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260  bool IsAtomic, bool IsLds = false);
1261  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262  bool IsGdsHardcoded);
1263 
1264 public:
1265  enum AMDGPUMatchResultTy {
1266  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267  };
1268  enum OperandMode {
1269  OperandMode_Default,
1270  OperandMode_NSA,
1271  };
1272 
1273  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276  const MCInstrInfo &MII,
1277  const MCTargetOptions &Options)
1278  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279  MCAsmParserExtension::Initialize(Parser);
1280 
1281  if (getFeatureBits().none()) {
1282  // Set default features.
1283  copySTI().ToggleFeature("southern-islands");
1284  }
1285 
1286  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288  {
1289  // TODO: make those pre-defined variables read-only.
1290  // Currently there is none suitable machinery in the core llvm-mc for this.
1291  // MCSymbol::isRedefinable is intended for another purpose, and
1292  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294  MCContext &Ctx = getContext();
1295  if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296  MCSymbol *Sym =
1297  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303  } else {
1304  MCSymbol *Sym =
1305  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311  }
1312  if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313  initializeGprCountSymbol(IS_VGPR);
1314  initializeGprCountSymbol(IS_SGPR);
1315  } else
1316  KernelScope.initialize(getContext());
1317  }
1318  }
1319 
1320  bool hasMIMG_R128() const {
1321  return AMDGPU::hasMIMG_R128(getSTI());
1322  }
1323 
1324  bool hasPackedD16() const {
1325  return AMDGPU::hasPackedD16(getSTI());
1326  }
1327 
1328  bool hasGFX10A16() const {
1329  return AMDGPU::hasGFX10A16(getSTI());
1330  }
1331 
1332  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333 
1334  bool isSI() const {
1335  return AMDGPU::isSI(getSTI());
1336  }
1337 
1338  bool isCI() const {
1339  return AMDGPU::isCI(getSTI());
1340  }
1341 
1342  bool isVI() const {
1343  return AMDGPU::isVI(getSTI());
1344  }
1345 
1346  bool isGFX9() const {
1347  return AMDGPU::isGFX9(getSTI());
1348  }
1349 
1350  bool isGFX90A() const {
1351  return AMDGPU::isGFX90A(getSTI());
1352  }
1353 
1354  bool isGFX9Plus() const {
1355  return AMDGPU::isGFX9Plus(getSTI());
1356  }
1357 
1358  bool isGFX10() const {
1359  return AMDGPU::isGFX10(getSTI());
1360  }
1361 
1362  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363 
1364  bool isGFX10_BEncoding() const {
1365  return AMDGPU::isGFX10_BEncoding(getSTI());
1366  }
1367 
1368  bool hasInv2PiInlineImm() const {
1369  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370  }
1371 
1372  bool hasFlatOffsets() const {
1373  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374  }
1375 
1376  bool hasArchitectedFlatScratch() const {
1377  return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378  }
1379 
1380  bool hasSGPR102_SGPR103() const {
1381  return !isVI() && !isGFX9();
1382  }
1383 
1384  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386  bool hasIntClamp() const {
1387  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388  }
1389 
1390  AMDGPUTargetStreamer &getTargetStreamer() {
1391  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392  return static_cast<AMDGPUTargetStreamer &>(TS);
1393  }
1394 
1395  const MCRegisterInfo *getMRI() const {
1396  // We need this const_cast because for some reason getContext() is not const
1397  // in MCAsmParser.
1398  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399  }
1400 
1401  const MCInstrInfo *getMII() const {
1402  return &MII;
1403  }
1404 
1405  const FeatureBitset &getFeatureBits() const {
1406  return getSTI().getFeatureBits();
1407  }
1408 
1409  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415  bool isForcedDPP() const { return ForcedDPP; }
1416  bool isForcedSDWA() const { return ForcedSDWA; }
1417  ArrayRef<unsigned> getMatchedVariants() const;
1418  StringRef getMatchedVariantName() const;
1419 
1420  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422  bool RestoreOnFailure);
1423  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425  SMLoc &EndLoc) override;
1426  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428  unsigned Kind) override;
1429  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430  OperandVector &Operands, MCStreamer &Out,
1431  uint64_t &ErrorInfo,
1432  bool MatchingInlineAsm) override;
1433  bool ParseDirective(AsmToken DirectiveID) override;
1434  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435  OperandMode Mode = OperandMode_Default);
1436  StringRef parseMnemonicSuffix(StringRef Name);
1437  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438  SMLoc NameLoc, OperandVector &Operands) override;
1439  //bool ProcessInstruction(MCInst &Inst);
1440 
1441  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443  OperandMatchResultTy
1444  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446  bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448  OperandMatchResultTy
1449  parseOperandArrayWithPrefix(const char *Prefix,
1450  OperandVector &Operands,
1451  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452  bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454  OperandMatchResultTy
1455  parseNamedBit(StringRef Name, OperandVector &Operands,
1456  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457  OperandMatchResultTy parseCPol(OperandVector &Operands);
1458  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459  StringRef &Value,
1460  SMLoc &StringLoc);
1461 
1462  bool isModifier();
1463  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467  bool parseSP3NegModifier();
1468  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469  OperandMatchResultTy parseReg(OperandVector &Operands);
1470  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477  OperandMatchResultTy parseUfmt(int64_t &Format);
1478  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491  bool parseCnt(int64_t &IntVal);
1492  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496  struct OperandInfoTy {
1497  SMLoc Loc;
1498  int64_t Id;
1499  bool IsSymbolic = false;
1500  bool IsDefined = false;
1501 
1502  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503  };
1504 
1505  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506  bool validateSendMsg(const OperandInfoTy &Msg,
1507  const OperandInfoTy &Op,
1508  const OperandInfoTy &Stream);
1509 
1510  bool parseHwregBody(OperandInfoTy &HwReg,
1511  OperandInfoTy &Offset,
1512  OperandInfoTy &Width);
1513  bool validateHwreg(const OperandInfoTy &HwReg,
1514  const OperandInfoTy &Offset,
1515  const OperandInfoTy &Width);
1516 
1517  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521  const OperandVector &Operands) const;
1522  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524  SMLoc getLitLoc(const OperandVector &Operands) const;
1525  SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530  bool validateSOPLiteral(const MCInst &Inst) const;
1531  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533  bool validateIntClampSupported(const MCInst &Inst);
1534  bool validateMIMGAtomicDMask(const MCInst &Inst);
1535  bool validateMIMGGatherDMask(const MCInst &Inst);
1536  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537  bool validateMIMGDataSize(const MCInst &Inst);
1538  bool validateMIMGAddrSize(const MCInst &Inst);
1539  bool validateMIMGD16(const MCInst &Inst);
1540  bool validateMIMGDim(const MCInst &Inst);
1541  bool validateMIMGMSAA(const MCInst &Inst);
1542  bool validateOpSel(const MCInst &Inst);
1543  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544  bool validateVccOperand(unsigned Reg) const;
1545  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547  bool validateAGPRLdSt(const MCInst &Inst) const;
1548  bool validateVGPRAlign(const MCInst &Inst) const;
1549  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550  bool validateDivScale(const MCInst &Inst);
1551  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552  const SMLoc &IDLoc);
1553  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554  unsigned getConstantBusLimit(unsigned Opcode) const;
1555  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558 
1559  bool isSupportedMnemo(StringRef Mnemo,
1560  const FeatureBitset &FBS);
1561  bool isSupportedMnemo(StringRef Mnemo,
1562  const FeatureBitset &FBS,
1563  ArrayRef<unsigned> Variants);
1564  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565 
1566  bool isId(const StringRef Id) const;
1567  bool isId(const AsmToken &Token, const StringRef Id) const;
1568  bool isToken(const AsmToken::TokenKind Kind) const;
1569  bool trySkipId(const StringRef Id);
1570  bool trySkipId(const StringRef Pref, const StringRef Id);
1571  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572  bool trySkipToken(const AsmToken::TokenKind Kind);
1573  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576 
1577  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578  AsmToken::TokenKind getTokenKind() const;
1579  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580  bool parseExpr(OperandVector &Operands);
1581  StringRef getTokenStr() const;
1582  AsmToken peekToken();
1583  AsmToken getToken() const;
1584  SMLoc getLoc() const;
1585  void lex();
1586 
1587 public:
1588  void onBeginOfFile() override;
1589 
1590  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592 
1593  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599 
1600  bool parseSwizzleOperand(int64_t &Op,
1601  const unsigned MinVal,
1602  const unsigned MaxVal,
1603  const StringRef ErrMsg,
1604  SMLoc &Loc);
1605  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606  const unsigned MinVal,
1607  const unsigned MaxVal,
1608  const StringRef ErrMsg);
1609  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610  bool parseSwizzleOffset(int64_t &Imm);
1611  bool parseSwizzleMacro(int64_t &Imm);
1612  bool parseSwizzleQuadPerm(int64_t &Imm);
1613  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614  bool parseSwizzleBroadcast(int64_t &Imm);
1615  bool parseSwizzleSwap(int64_t &Imm);
1616  bool parseSwizzleReverse(int64_t &Imm);
1617 
1618  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619  int64_t parseGPRIdxMacro();
1620 
1621  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1622  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1623  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625 
1626  AMDGPUOperand::Ptr defaultCPol() const;
1627 
1628  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631  AMDGPUOperand::Ptr defaultFlatOffset() const;
1632 
1633  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634 
1635  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636  OptionalImmIndexMap &OptionalIdx);
1637  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641  OptionalImmIndexMap &OptionalIdx);
1642 
1643  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644 
1645  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646  bool IsAtomic = false);
1647  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649 
1650  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651 
1652  bool parseDimId(unsigned &Encoding);
1653  OperandMatchResultTy parseDim(OperandVector &Operands);
1654  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657  int64_t parseDPPCtrlSel(StringRef Ctrl);
1658  int64_t parseDPPCtrlPerm();
1659  AMDGPUOperand::Ptr defaultRowMask() const;
1660  AMDGPUOperand::Ptr defaultBankMask() const;
1661  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662  AMDGPUOperand::Ptr defaultFI() const;
1663  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1664  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665 
1666  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667  AMDGPUOperand::ImmTy Type);
1668  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675  uint64_t BasicInstType,
1676  bool SkipDstVcc = false,
1677  bool SkipSrcVcc = false);
1678 
1679  AMDGPUOperand::Ptr defaultBLGP() const;
1680  AMDGPUOperand::Ptr defaultCBSZ() const;
1681  AMDGPUOperand::Ptr defaultABID() const;
1682 
1683  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685 };
1686 
1687 struct OptionalOperand {
1688  const char *Name;
1689  AMDGPUOperand::ImmTy Type;
1690  bool IsBit;
1691  bool (*ConvertResult)(int64_t&);
1692 };
1693 
1694 } // end anonymous namespace
1695 
1696 // May be called with integer type with equivalent bitwidth.
1697 static const fltSemantics *getFltSemantics(unsigned Size) {
1698  switch (Size) {
1699  case 4:
1700  return &APFloat::IEEEsingle();
1701  case 8:
1702  return &APFloat::IEEEdouble();
1703  case 2:
1704  return &APFloat::IEEEhalf();
1705  default:
1706  llvm_unreachable("unsupported fp type");
1707  }
1708 }
1709 
1710 static const fltSemantics *getFltSemantics(MVT VT) {
1711  return getFltSemantics(VT.getSizeInBits() / 8);
1712 }
1713 
1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715  switch (OperandType) {
1716  case AMDGPU::OPERAND_REG_IMM_INT32:
1717  case AMDGPU::OPERAND_REG_IMM_FP32:
1718  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1719  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1720  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1721  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1722  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1723  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1724  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1725  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1726  return &APFloat::IEEEsingle();
1727  case AMDGPU::OPERAND_REG_IMM_INT64:
1728  case AMDGPU::OPERAND_REG_IMM_FP64:
1729  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1730  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1731  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1732  return &APFloat::IEEEdouble();
1733  case AMDGPU::OPERAND_REG_IMM_INT16:
1734  case AMDGPU::OPERAND_REG_IMM_FP16:
1735  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1736  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1737  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1738  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1739  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1740  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1741  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1742  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1743  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1744  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1745  return &APFloat::IEEEhalf();
1746  default:
1747  llvm_unreachable("unsupported fp type");
1748  }
1749 }
1750 
1751 //===----------------------------------------------------------------------===//
1752 // Operand
1753 //===----------------------------------------------------------------------===//
1754 
1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1756  bool Lost;
1757 
1758  // Convert literal to single precision
1759  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1760  APFloat::rmNearestTiesToEven,
1761  &Lost);
1762  // We allow precision lost but not overflow or underflow
1763  if (Status != APFloat::opOK &&
1764  Lost &&
1765  ((Status & APFloat::opOverflow) != 0 ||
1766  (Status & APFloat::opUnderflow) != 0)) {
1767  return false;
1768  }
1769 
1770  return true;
1771 }
1772 
1773 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1774  return isUIntN(Size, Val) || isIntN(Size, Val);
1775 }
1776 
1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1778  if (VT.getScalarType() == MVT::i16) {
1779  // FP immediate values are broken.
1780  return isInlinableIntLiteral(Val);
1781  }
1782 
1783  // f16/v2f16 operands work correctly for all values.
1784  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1785 }
1786 
1787 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1788 
1789  // This is a hack to enable named inline values like
1790  // shared_base with both 32-bit and 64-bit operands.
1791  // Note that these values are defined as
1792  // 32-bit operands only.
1793  if (isInlineValue()) {
1794  return true;
1795  }
1796 
1797  if (!isImmTy(ImmTyNone)) {
1798  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1799  return false;
1800  }
1801  // TODO: We should avoid using host float here. It would be better to
1802  // check the float bit values which is what a few other places do.
1803  // We've had bot failures before due to weird NaN support on mips hosts.
1804 
1805  APInt Literal(64, Imm.Val);
1806 
1807  if (Imm.IsFPImm) { // We got fp literal token
1808  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1809  return AMDGPU::isInlinableLiteral64(Imm.Val,
1810  AsmParser->hasInv2PiInlineImm());
1811  }
1812 
1813  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1814  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1815  return false;
1816 
1817  if (type.getScalarSizeInBits() == 16) {
1818  return isInlineableLiteralOp16(
1819  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1820  type, AsmParser->hasInv2PiInlineImm());
1821  }
1822 
1823  // Check if single precision literal is inlinable
1825  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1826  AsmParser->hasInv2PiInlineImm());
1827  }
1828 
1829  // We got int literal token.
1830  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1831  return AMDGPU::isInlinableLiteral64(Imm.Val,
1832  AsmParser->hasInv2PiInlineImm());
1833  }
1834 
1835  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1836  return false;
1837  }
1838 
1839  if (type.getScalarSizeInBits() == 16) {
1840  return isInlineableLiteralOp16(
1841  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1842  type, AsmParser->hasInv2PiInlineImm());
1843  }
1844 
1846  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1847  AsmParser->hasInv2PiInlineImm());
1848 }
1849 
1850 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1851  // Check that this immediate can be added as literal
1852  if (!isImmTy(ImmTyNone)) {
1853  return false;
1854  }
1855 
1856  if (!Imm.IsFPImm) {
1857  // We got int literal token.
1858 
1859  if (type == MVT::f64 && hasFPModifiers()) {
1860  // Cannot apply fp modifiers to int literals preserving the same semantics
1861  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1862  // disable these cases.
1863  return false;
1864  }
1865 
1866  unsigned Size = type.getSizeInBits();
1867  if (Size == 64)
1868  Size = 32;
1869 
1870  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1871  // types.
1872  return isSafeTruncation(Imm.Val, Size);
1873  }
1874 
1875  // We got fp literal token
1876  if (type == MVT::f64) { // Expected 64-bit fp operand
1877  // We would set low 64-bits of literal to zeroes but we accept this literals
1878  return true;
1879  }
1880 
1881  if (type == MVT::i64) { // Expected 64-bit int operand
1882  // We don't allow fp literals in 64-bit integer instructions. It is
1883  // unclear how we should encode them.
1884  return false;
1885  }
1886 
1887  // We allow fp literals with f16x2 operands assuming that the specified
1888  // literal goes into the lower half and the upper half is zero. We also
1889  // require that the literal may be losslesly converted to f16.
1890  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1891  (type == MVT::v2i16)? MVT::i16 :
1892  (type == MVT::v2f32)? MVT::f32 : type;
1893 
1894  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1895  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1896 }
1897 
1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1899  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1900 }
1901 
1902 bool AMDGPUOperand::isVRegWithInputMods() const {
1903  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1904  // GFX90A allows DPP on 64-bit operands.
1905  (isRegClass(AMDGPU::VReg_64RegClassID) &&
1906  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1907 }
1908 
1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1910  if (AsmParser->isVI())
1911  return isVReg32();
1912  else if (AsmParser->isGFX9Plus())
1913  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1914  else
1915  return false;
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAFP16Operand() const {
1919  return isSDWAOperand(MVT::f16);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAFP32Operand() const {
1923  return isSDWAOperand(MVT::f32);
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAInt16Operand() const {
1927  return isSDWAOperand(MVT::i16);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAInt32Operand() const {
1931  return isSDWAOperand(MVT::i32);
1932 }
1933 
1934 bool AMDGPUOperand::isBoolReg() const {
1935  auto FB = AsmParser->getFeatureBits();
1936  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1937  (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1938 }
1939 
1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1941 {
1942  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1943  assert(Size == 2 || Size == 4 || Size == 8);
1944 
1945  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1946 
1947  if (Imm.Mods.Abs) {
1948  Val &= ~FpSignMask;
1949  }
1950  if (Imm.Mods.Neg) {
1951  Val ^= FpSignMask;
1952  }
1953 
1954  return Val;
1955 }
1956 
1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1958  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1959  Inst.getNumOperands())) {
1960  addLiteralImmOperand(Inst, Imm.Val,
1961  ApplyModifiers &
1962  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1963  } else {
1964  assert(!isImmTy(ImmTyNone) || !hasModifiers());
1965  Inst.addOperand(MCOperand::createImm(Imm.Val));
1966  setImmKindNone();
1967  }
1968 }
1969 
1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1971  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1972  auto OpNum = Inst.getNumOperands();
1973  // Check that this operand accepts literals
1974  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1975 
1976  if (ApplyModifiers) {
1977  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1978  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1979  Val = applyInputFPModifiers(Val, Size);
1980  }
1981 
1982  APInt Literal(64, Val);
1983  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1984 
1985  if (Imm.IsFPImm) { // We got fp literal token
1986  switch (OpTy) {
1992  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1993  AsmParser->hasInv2PiInlineImm())) {
1994  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1995  setImmKindConst();
1996  return;
1997  }
1998 
1999  // Non-inlineable
2000  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2001  // For fp operands we check if low 32 bits are zeros
2002  if (Literal.getLoBits(32) != 0) {
2003  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2004  "Can't encode literal as exact 64-bit floating-point operand. "
2005  "Low 32-bits will be set to zero");
2006  }
2007 
2008  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2009  setImmKindLiteral();
2010  return;
2011  }
2012 
2013  // We don't allow fp literals in 64-bit integer instructions. It is
2014  // unclear how we should encode them. This case should be checked earlier
2015  // in predicate methods (isLiteralImm())
2016  llvm_unreachable("fp literal in 64-bit integer instruction.");
2017 
2040  bool lost;
2041  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2042  // Convert literal to single precision
2043  FPLiteral.convert(*getOpFltSemantics(OpTy),
2045  // We allow precision lost but not overflow or underflow. This should be
2046  // checked earlier in isLiteralImm()
2047 
2048  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2049  Inst.addOperand(MCOperand::createImm(ImmVal));
2050  setImmKindLiteral();
2051  return;
2052  }
2053  default:
2054  llvm_unreachable("invalid operand size");
2055  }
2056 
2057  return;
2058  }
2059 
2060  // We got int literal token.
2061  // Only sign extend inline immediates.
2062  switch (OpTy) {
2075  if (isSafeTruncation(Val, 32) &&
2076  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2077  AsmParser->hasInv2PiInlineImm())) {
2078  Inst.addOperand(MCOperand::createImm(Val));
2079  setImmKindConst();
2080  return;
2081  }
2082 
2083  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2084  setImmKindLiteral();
2085  return;
2086 
2092  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2093  Inst.addOperand(MCOperand::createImm(Val));
2094  setImmKindConst();
2095  return;
2096  }
2097 
2099  setImmKindLiteral();
2100  return;
2101 
2108  if (isSafeTruncation(Val, 16) &&
2109  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2110  AsmParser->hasInv2PiInlineImm())) {
2111  Inst.addOperand(MCOperand::createImm(Val));
2112  setImmKindConst();
2113  return;
2114  }
2115 
2116  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2117  setImmKindLiteral();
2118  return;
2119 
2124  assert(isSafeTruncation(Val, 16));
2125  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2126  AsmParser->hasInv2PiInlineImm()));
2127 
2128  Inst.addOperand(MCOperand::createImm(Val));
2129  return;
2130  }
2131  default:
2132  llvm_unreachable("invalid operand size");
2133  }
2134 }
2135 
2136 template <unsigned Bitwidth>
2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2138  APInt Literal(64, Imm.Val);
2139  setImmKindNone();
2140 
2141  if (!Imm.IsFPImm) {
2142  // We got int literal token.
2143  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2144  return;
2145  }
2146 
2147  bool Lost;
2148  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2149  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2151  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2152 }
2153 
2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2155  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2156 }
2157 
2158 static bool isInlineValue(unsigned Reg) {
2159  switch (Reg) {
2160  case AMDGPU::SRC_SHARED_BASE:
2161  case AMDGPU::SRC_SHARED_LIMIT:
2162  case AMDGPU::SRC_PRIVATE_BASE:
2163  case AMDGPU::SRC_PRIVATE_LIMIT:
2164  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2165  return true;
2166  case AMDGPU::SRC_VCCZ:
2167  case AMDGPU::SRC_EXECZ:
2168  case AMDGPU::SRC_SCC:
2169  return true;
2170  case AMDGPU::SGPR_NULL:
2171  return true;
2172  default:
2173  return false;
2174  }
2175 }
2176 
2177 bool AMDGPUOperand::isInlineValue() const {
2178  return isRegKind() && ::isInlineValue(getReg());
2179 }
2180 
2181 //===----------------------------------------------------------------------===//
2182 // AsmParser
2183 //===----------------------------------------------------------------------===//
2184 
2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2186  if (Is == IS_VGPR) {
2187  switch (RegWidth) {
2188  default: return -1;
2189  case 1: return AMDGPU::VGPR_32RegClassID;
2190  case 2: return AMDGPU::VReg_64RegClassID;
2191  case 3: return AMDGPU::VReg_96RegClassID;
2192  case 4: return AMDGPU::VReg_128RegClassID;
2193  case 5: return AMDGPU::VReg_160RegClassID;
2194  case 6: return AMDGPU::VReg_192RegClassID;
2195  case 8: return AMDGPU::VReg_256RegClassID;
2196  case 16: return AMDGPU::VReg_512RegClassID;
2197  case 32: return AMDGPU::VReg_1024RegClassID;
2198  }
2199  } else if (Is == IS_TTMP) {
2200  switch (RegWidth) {
2201  default: return -1;
2202  case 1: return AMDGPU::TTMP_32RegClassID;
2203  case 2: return AMDGPU::TTMP_64RegClassID;
2204  case 4: return AMDGPU::TTMP_128RegClassID;
2205  case 8: return AMDGPU::TTMP_256RegClassID;
2206  case 16: return AMDGPU::TTMP_512RegClassID;
2207  }
2208  } else if (Is == IS_SGPR) {
2209  switch (RegWidth) {
2210  default: return -1;
2211  case 1: return AMDGPU::SGPR_32RegClassID;
2212  case 2: return AMDGPU::SGPR_64RegClassID;
2213  case 3: return AMDGPU::SGPR_96RegClassID;
2214  case 4: return AMDGPU::SGPR_128RegClassID;
2215  case 5: return AMDGPU::SGPR_160RegClassID;
2216  case 6: return AMDGPU::SGPR_192RegClassID;
2217  case 8: return AMDGPU::SGPR_256RegClassID;
2218  case 16: return AMDGPU::SGPR_512RegClassID;
2219  }
2220  } else if (Is == IS_AGPR) {
2221  switch (RegWidth) {
2222  default: return -1;
2223  case 1: return AMDGPU::AGPR_32RegClassID;
2224  case 2: return AMDGPU::AReg_64RegClassID;
2225  case 3: return AMDGPU::AReg_96RegClassID;
2226  case 4: return AMDGPU::AReg_128RegClassID;
2227  case 5: return AMDGPU::AReg_160RegClassID;
2228  case 6: return AMDGPU::AReg_192RegClassID;
2229  case 8: return AMDGPU::AReg_256RegClassID;
2230  case 16: return AMDGPU::AReg_512RegClassID;
2231  case 32: return AMDGPU::AReg_1024RegClassID;
2232  }
2233  }
2234  return -1;
2235 }
2236 
2239  .Case("exec", AMDGPU::EXEC)
2240  .Case("vcc", AMDGPU::VCC)
2241  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2242  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2243  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2244  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2245  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2246  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2247  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2248  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2249  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2250  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2251  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2252  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2253  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2254  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2255  .Case("m0", AMDGPU::M0)
2256  .Case("vccz", AMDGPU::SRC_VCCZ)
2257  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2258  .Case("execz", AMDGPU::SRC_EXECZ)
2259  .Case("src_execz", AMDGPU::SRC_EXECZ)
2260  .Case("scc", AMDGPU::SRC_SCC)
2261  .Case("src_scc", AMDGPU::SRC_SCC)
2262  .Case("tba", AMDGPU::TBA)
2263  .Case("tma", AMDGPU::TMA)
2264  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2265  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2266  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2267  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2268  .Case("vcc_lo", AMDGPU::VCC_LO)
2269  .Case("vcc_hi", AMDGPU::VCC_HI)
2270  .Case("exec_lo", AMDGPU::EXEC_LO)
2271  .Case("exec_hi", AMDGPU::EXEC_HI)
2272  .Case("tma_lo", AMDGPU::TMA_LO)
2273  .Case("tma_hi", AMDGPU::TMA_HI)
2274  .Case("tba_lo", AMDGPU::TBA_LO)
2275  .Case("tba_hi", AMDGPU::TBA_HI)
2276  .Case("pc", AMDGPU::PC_REG)
2277  .Case("null", AMDGPU::SGPR_NULL)
2278  .Default(AMDGPU::NoRegister);
2279 }
2280 
2281 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2282  SMLoc &EndLoc, bool RestoreOnFailure) {
2283  auto R = parseRegister();
2284  if (!R) return true;
2285  assert(R->isReg());
2286  RegNo = R->getReg();
2287  StartLoc = R->getStartLoc();
2288  EndLoc = R->getEndLoc();
2289  return false;
2290 }
2291 
2292 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2293  SMLoc &EndLoc) {
2294  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2295 }
2296 
2297 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2298  SMLoc &StartLoc,
2299  SMLoc &EndLoc) {
2300  bool Result =
2301  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2302  bool PendingErrors = getParser().hasPendingError();
2303  getParser().clearPendingErrors();
2304  if (PendingErrors)
2305  return MatchOperand_ParseFail;
2306  if (Result)
2307  return MatchOperand_NoMatch;
2308  return MatchOperand_Success;
2309 }
2310 
2311 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2312  RegisterKind RegKind, unsigned Reg1,
2313  SMLoc Loc) {
2314  switch (RegKind) {
2315  case IS_SPECIAL:
2316  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2317  Reg = AMDGPU::EXEC;
2318  RegWidth = 2;
2319  return true;
2320  }
2321  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2322  Reg = AMDGPU::FLAT_SCR;
2323  RegWidth = 2;
2324  return true;
2325  }
2326  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2327  Reg = AMDGPU::XNACK_MASK;
2328  RegWidth = 2;
2329  return true;
2330  }
2331  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2332  Reg = AMDGPU::VCC;
2333  RegWidth = 2;
2334  return true;
2335  }
2336  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2337  Reg = AMDGPU::TBA;
2338  RegWidth = 2;
2339  return true;
2340  }
2341  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2342  Reg = AMDGPU::TMA;
2343  RegWidth = 2;
2344  return true;
2345  }
2346  Error(Loc, "register does not fit in the list");
2347  return false;
2348  case IS_VGPR:
2349  case IS_SGPR:
2350  case IS_AGPR:
2351  case IS_TTMP:
2352  if (Reg1 != Reg + RegWidth) {
2353  Error(Loc, "registers in a list must have consecutive indices");
2354  return false;
2355  }
2356  RegWidth++;
2357  return true;
2358  default:
2359  llvm_unreachable("unexpected register kind");
2360  }
2361 }
2362 
2363 struct RegInfo {
2365  RegisterKind Kind;
2366 };
2367 
2368 static constexpr RegInfo RegularRegisters[] = {
2369  {{"v"}, IS_VGPR},
2370  {{"s"}, IS_SGPR},
2371  {{"ttmp"}, IS_TTMP},
2372  {{"acc"}, IS_AGPR},
2373  {{"a"}, IS_AGPR},
2374 };
2375 
2376 static bool isRegularReg(RegisterKind Kind) {
2377  return Kind == IS_VGPR ||
2378  Kind == IS_SGPR ||
2379  Kind == IS_TTMP ||
2380  Kind == IS_AGPR;
2381 }
2382 
2383 static const RegInfo* getRegularRegInfo(StringRef Str) {
2384  for (const RegInfo &Reg : RegularRegisters)
2385  if (Str.startswith(Reg.Name))
2386  return &Reg;
2387  return nullptr;
2388 }
2389 
2390 static bool getRegNum(StringRef Str, unsigned& Num) {
2391  return !Str.getAsInteger(10, Num);
2392 }
2393 
2394 bool
2395 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2396  const AsmToken &NextToken) const {
2397 
2398  // A list of consecutive registers: [s0,s1,s2,s3]
2399  if (Token.is(AsmToken::LBrac))
2400  return true;
2401 
2402  if (!Token.is(AsmToken::Identifier))
2403  return false;
2404 
2405  // A single register like s0 or a range of registers like s[0:1]
2406 
2407  StringRef Str = Token.getString();
2408  const RegInfo *Reg = getRegularRegInfo(Str);
2409  if (Reg) {
2410  StringRef RegName = Reg->Name;
2411  StringRef RegSuffix = Str.substr(RegName.size());
2412  if (!RegSuffix.empty()) {
2413  unsigned Num;
2414  // A single register with an index: rXX
2415  if (getRegNum(RegSuffix, Num))
2416  return true;
2417  } else {
2418  // A range of registers: r[XX:YY].
2419  if (NextToken.is(AsmToken::LBrac))
2420  return true;
2421  }
2422  }
2423 
2424  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2425 }
2426 
2427 bool
2428 AMDGPUAsmParser::isRegister()
2429 {
2430  return isRegister(getToken(), peekToken());
2431 }
2432 
2433 unsigned
2434 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2435  unsigned RegNum,
2436  unsigned RegWidth,
2437  SMLoc Loc) {
2438 
2439  assert(isRegularReg(RegKind));
2440 
2441  unsigned AlignSize = 1;
2442  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2443  // SGPR and TTMP registers must be aligned.
2444  // Max required alignment is 4 dwords.
2445  AlignSize = std::min(RegWidth, 4u);
2446  }
2447 
2448  if (RegNum % AlignSize != 0) {
2449  Error(Loc, "invalid register alignment");
2450  return AMDGPU::NoRegister;
2451  }
2452 
2453  unsigned RegIdx = RegNum / AlignSize;
2454  int RCID = getRegClass(RegKind, RegWidth);
2455  if (RCID == -1) {
2456  Error(Loc, "invalid or unsupported register size");
2457  return AMDGPU::NoRegister;
2458  }
2459 
2460  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2461  const MCRegisterClass RC = TRI->getRegClass(RCID);
2462  if (RegIdx >= RC.getNumRegs()) {
2463  Error(Loc, "register index is out of range");
2464  return AMDGPU::NoRegister;
2465  }
2466 
2467  return RC.getRegister(RegIdx);
2468 }
2469 
2470 bool
2471 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2472  int64_t RegLo, RegHi;
2473  if (!skipToken(AsmToken::LBrac, "missing register index"))
2474  return false;
2475 
2476  SMLoc FirstIdxLoc = getLoc();
2477  SMLoc SecondIdxLoc;
2478 
2479  if (!parseExpr(RegLo))
2480  return false;
2481 
2482  if (trySkipToken(AsmToken::Colon)) {
2483  SecondIdxLoc = getLoc();
2484  if (!parseExpr(RegHi))
2485  return false;
2486  } else {
2487  RegHi = RegLo;
2488  }
2489 
2490  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2491  return false;
2492 
2493  if (!isUInt<32>(RegLo)) {
2494  Error(FirstIdxLoc, "invalid register index");
2495  return false;
2496  }
2497 
2498  if (!isUInt<32>(RegHi)) {
2499  Error(SecondIdxLoc, "invalid register index");
2500  return false;
2501  }
2502 
2503  if (RegLo > RegHi) {
2504  Error(FirstIdxLoc, "first register index should not exceed second index");
2505  return false;
2506  }
2507 
2508  Num = static_cast<unsigned>(RegLo);
2509  Width = (RegHi - RegLo) + 1;
2510  return true;
2511 }
2512 
2513 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2514  unsigned &RegNum, unsigned &RegWidth,
2515  SmallVectorImpl<AsmToken> &Tokens) {
2516  assert(isToken(AsmToken::Identifier));
2517  unsigned Reg = getSpecialRegForName(getTokenStr());
2518  if (Reg) {
2519  RegNum = 0;
2520  RegWidth = 1;
2521  RegKind = IS_SPECIAL;
2522  Tokens.push_back(getToken());
2523  lex(); // skip register name
2524  }
2525  return Reg;
2526 }
2527 
2528 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2529  unsigned &RegNum, unsigned &RegWidth,
2530  SmallVectorImpl<AsmToken> &Tokens) {
2531  assert(isToken(AsmToken::Identifier));
2532  StringRef RegName = getTokenStr();
2533  auto Loc = getLoc();
2534 
2535  const RegInfo *RI = getRegularRegInfo(RegName);
2536  if (!RI) {
2537  Error(Loc, "invalid register name");
2538  return AMDGPU::NoRegister;
2539  }
2540 
2541  Tokens.push_back(getToken());
2542  lex(); // skip register name
2543 
2544  RegKind = RI->Kind;
2545  StringRef RegSuffix = RegName.substr(RI->Name.size());
2546  if (!RegSuffix.empty()) {
2547  // Single 32-bit register: vXX.
2548  if (!getRegNum(RegSuffix, RegNum)) {
2549  Error(Loc, "invalid register index");
2550  return AMDGPU::NoRegister;
2551  }
2552  RegWidth = 1;
2553  } else {
2554  // Range of registers: v[XX:YY]. ":YY" is optional.
2555  if (!ParseRegRange(RegNum, RegWidth))
2556  return AMDGPU::NoRegister;
2557  }
2558 
2559  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2560 }
2561 
2562 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2563  unsigned &RegWidth,
2564  SmallVectorImpl<AsmToken> &Tokens) {
2565  unsigned Reg = AMDGPU::NoRegister;
2566  auto ListLoc = getLoc();
2567 
2568  if (!skipToken(AsmToken::LBrac,
2569  "expected a register or a list of registers")) {
2570  return AMDGPU::NoRegister;
2571  }
2572 
2573  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2574 
2575  auto Loc = getLoc();
2576  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2577  return AMDGPU::NoRegister;
2578  if (RegWidth != 1) {
2579  Error(Loc, "expected a single 32-bit register");
2580  return AMDGPU::NoRegister;
2581  }
2582 
2583  for (; trySkipToken(AsmToken::Comma); ) {
2584  RegisterKind NextRegKind;
2585  unsigned NextReg, NextRegNum, NextRegWidth;
2586  Loc = getLoc();
2587 
2588  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2589  NextRegNum, NextRegWidth,
2590  Tokens)) {
2591  return AMDGPU::NoRegister;
2592  }
2593  if (NextRegWidth != 1) {
2594  Error(Loc, "expected a single 32-bit register");
2595  return AMDGPU::NoRegister;
2596  }
2597  if (NextRegKind != RegKind) {
2598  Error(Loc, "registers in a list must be of the same kind");
2599  return AMDGPU::NoRegister;
2600  }
2601  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2602  return AMDGPU::NoRegister;
2603  }
2604 
2605  if (!skipToken(AsmToken::RBrac,
2606  "expected a comma or a closing square bracket")) {
2607  return AMDGPU::NoRegister;
2608  }
2609 
2610  if (isRegularReg(RegKind))
2611  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2612 
2613  return Reg;
2614 }
2615 
2616 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2617  unsigned &RegNum, unsigned &RegWidth,
2618  SmallVectorImpl<AsmToken> &Tokens) {
2619  auto Loc = getLoc();
2620  Reg = AMDGPU::NoRegister;
2621 
2622  if (isToken(AsmToken::Identifier)) {
2623  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2624  if (Reg == AMDGPU::NoRegister)
2625  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2626  } else {
2627  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2628  }
2629 
2630  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2631  if (Reg == AMDGPU::NoRegister) {
2632  assert(Parser.hasPendingError());
2633  return false;
2634  }
2635 
2636  if (!subtargetHasRegister(*TRI, Reg)) {
2637  if (Reg == AMDGPU::SGPR_NULL) {
2638  Error(Loc, "'null' operand is not supported on this GPU");
2639  } else {
2640  Error(Loc, "register not available on this GPU");
2641  }
2642  return false;
2643  }
2644 
2645  return true;
2646 }
2647 
2648 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2649  unsigned &RegNum, unsigned &RegWidth,
2650  bool RestoreOnFailure /*=false*/) {
2651  Reg = AMDGPU::NoRegister;
2652 
2653  SmallVector<AsmToken, 1> Tokens;
2654  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2655  if (RestoreOnFailure) {
2656  while (!Tokens.empty()) {
2657  getLexer().UnLex(Tokens.pop_back_val());
2658  }
2659  }
2660  return true;
2661  }
2662  return false;
2663 }
2664 
2666 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2667  switch (RegKind) {
2668  case IS_VGPR:
2669  return StringRef(".amdgcn.next_free_vgpr");
2670  case IS_SGPR:
2671  return StringRef(".amdgcn.next_free_sgpr");
2672  default:
2673  return None;
2674  }
2675 }
2676 
2677 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2678  auto SymbolName = getGprCountSymbolName(RegKind);
2679  assert(SymbolName && "initializing invalid register kind");
2680  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2681  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2682 }
2683 
2684 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2685  unsigned DwordRegIndex,
2686  unsigned RegWidth) {
2687  // Symbols are only defined for GCN targets
2688  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2689  return true;
2690 
2691  auto SymbolName = getGprCountSymbolName(RegKind);
2692  if (!SymbolName)
2693  return true;
2694  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2695 
2696  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2697  int64_t OldCount;
2698 
2699  if (!Sym->isVariable())
2700  return !Error(getLoc(),
2701  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2702  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2703  return !Error(
2704  getLoc(),
2705  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2706 
2707  if (OldCount <= NewMax)
2708  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2709 
2710  return true;
2711 }
2712 
2713 std::unique_ptr<AMDGPUOperand>
2714 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2715  const auto &Tok = getToken();
2716  SMLoc StartLoc = Tok.getLoc();
2717  SMLoc EndLoc = Tok.getEndLoc();
2718  RegisterKind RegKind;
2719  unsigned Reg, RegNum, RegWidth;
2720 
2721  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2722  return nullptr;
2723  }
2724  if (isHsaAbiVersion3Or4(&getSTI())) {
2725  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2726  return nullptr;
2727  } else
2728  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2729  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2730 }
2731 
2733 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2734  // TODO: add syntactic sugar for 1/(2*PI)
2735 
2736  assert(!isRegister());
2737  assert(!isModifier());
2738 
2739  const auto& Tok = getToken();
2740  const auto& NextTok = peekToken();
2741  bool IsReal = Tok.is(AsmToken::Real);
2742  SMLoc S = getLoc();
2743  bool Negate = false;
2744 
2745  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2746  lex();
2747  IsReal = true;
2748  Negate = true;
2749  }
2750 
2751  if (IsReal) {
2752  // Floating-point expressions are not supported.
2753  // Can only allow floating-point literals with an
2754  // optional sign.
2755 
2756  StringRef Num = getTokenStr();
2757  lex();
2758 
2759  APFloat RealVal(APFloat::IEEEdouble());
2760  auto roundMode = APFloat::rmNearestTiesToEven;
2761  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2762  return MatchOperand_ParseFail;
2763  }
2764  if (Negate)
2765  RealVal.changeSign();
2766 
2767  Operands.push_back(
2768  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2769  AMDGPUOperand::ImmTyNone, true));
2770 
2771  return MatchOperand_Success;
2772 
2773  } else {
2774  int64_t IntVal;
2775  const MCExpr *Expr;
2776  SMLoc S = getLoc();
2777 
2778  if (HasSP3AbsModifier) {
2779  // This is a workaround for handling expressions
2780  // as arguments of SP3 'abs' modifier, for example:
2781  // |1.0|
2782  // |-1|
2783  // |1+x|
2784  // This syntax is not compatible with syntax of standard
2785  // MC expressions (due to the trailing '|').
2786  SMLoc EndLoc;
2787  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2788  return MatchOperand_ParseFail;
2789  } else {
2790  if (Parser.parseExpression(Expr))
2791  return MatchOperand_ParseFail;
2792  }
2793 
2794  if (Expr->evaluateAsAbsolute(IntVal)) {
2795  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2796  } else {
2797  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2798  }
2799 
2800  return MatchOperand_Success;
2801  }
2802 
2803  return MatchOperand_NoMatch;
2804 }
2805 
2807 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2808  if (!isRegister())
2809  return MatchOperand_NoMatch;
2810 
2811  if (auto R = parseRegister()) {
2812  assert(R->isReg());
2813  Operands.push_back(std::move(R));
2814  return MatchOperand_Success;
2815  }
2816  return MatchOperand_ParseFail;
2817 }
2818 
2820 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2821  auto res = parseReg(Operands);
2822  if (res != MatchOperand_NoMatch) {
2823  return res;
2824  } else if (isModifier()) {
2825  return MatchOperand_NoMatch;
2826  } else {
2827  return parseImm(Operands, HasSP3AbsMod);
2828  }
2829 }
2830 
2831 bool
2832 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2833  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2834  const auto &str = Token.getString();
2835  return str == "abs" || str == "neg" || str == "sext";
2836  }
2837  return false;
2838 }
2839 
2840 bool
2841 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2842  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2843 }
2844 
2845 bool
2846 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2847  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2848 }
2849 
2850 bool
2851 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2852  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2853 }
2854 
2855 // Check if this is an operand modifier or an opcode modifier
2856 // which may look like an expression but it is not. We should
2857 // avoid parsing these modifiers as expressions. Currently
2858 // recognized sequences are:
2859 // |...|
2860 // abs(...)
2861 // neg(...)
2862 // sext(...)
2863 // -reg
2864 // -|...|
2865 // -abs(...)
2866 // name:...
2867 // Note that simple opcode modifiers like 'gds' may be parsed as
2868 // expressions; this is a special case. See getExpressionAsToken.
2869 //
2870 bool
2871 AMDGPUAsmParser::isModifier() {
2872 
2873  AsmToken Tok = getToken();
2874  AsmToken NextToken[2];
2875  peekTokens(NextToken);
2876 
2877  return isOperandModifier(Tok, NextToken[0]) ||
2878  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2879  isOpcodeModifierWithVal(Tok, NextToken[0]);
2880 }
2881 
2882 // Check if the current token is an SP3 'neg' modifier.
2883 // Currently this modifier is allowed in the following context:
2884 //
2885 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2886 // 2. Before an 'abs' modifier: -abs(...)
2887 // 3. Before an SP3 'abs' modifier: -|...|
2888 //
2889 // In all other cases "-" is handled as a part
2890 // of an expression that follows the sign.
2891 //
2892 // Note: When "-" is followed by an integer literal,
2893 // this is interpreted as integer negation rather
2894 // than a floating-point NEG modifier applied to N.
2895 // Beside being contr-intuitive, such use of floating-point
2896 // NEG modifier would have resulted in different meaning
2897 // of integer literals used with VOP1/2/C and VOP3,
2898 // for example:
2899 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2900 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2901 // Negative fp literals with preceding "-" are
2902 // handled likewise for unifomtity
2903 //
2904 bool
2905 AMDGPUAsmParser::parseSP3NegModifier() {
2906 
2907  AsmToken NextToken[2];
2908  peekTokens(NextToken);
2909 
2910  if (isToken(AsmToken::Minus) &&
2911  (isRegister(NextToken[0], NextToken[1]) ||
2912  NextToken[0].is(AsmToken::Pipe) ||
2913  isId(NextToken[0], "abs"))) {
2914  lex();
2915  return true;
2916  }
2917 
2918  return false;
2919 }
2920 
2922 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2923  bool AllowImm) {
2924  bool Neg, SP3Neg;
2925  bool Abs, SP3Abs;
2926  SMLoc Loc;
2927 
2928  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2929  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2930  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2931  return MatchOperand_ParseFail;
2932  }
2933 
2934  SP3Neg = parseSP3NegModifier();
2935 
2936  Loc = getLoc();
2937  Neg = trySkipId("neg");
2938  if (Neg && SP3Neg) {
2939  Error(Loc, "expected register or immediate");
2940  return MatchOperand_ParseFail;
2941  }
2942  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2943  return MatchOperand_ParseFail;
2944 
2945  Abs = trySkipId("abs");
2946  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2947  return MatchOperand_ParseFail;
2948 
2949  Loc = getLoc();
2950  SP3Abs = trySkipToken(AsmToken::Pipe);
2951  if (Abs && SP3Abs) {
2952  Error(Loc, "expected register or immediate");
2953  return MatchOperand_ParseFail;
2954  }
2955 
2957  if (AllowImm) {
2958  Res = parseRegOrImm(Operands, SP3Abs);
2959  } else {
2960  Res = parseReg(Operands);
2961  }
2962  if (Res != MatchOperand_Success) {
2963  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2964  }
2965 
2966  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2967  return MatchOperand_ParseFail;
2968  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2969  return MatchOperand_ParseFail;
2970  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2971  return MatchOperand_ParseFail;
2972 
2973  AMDGPUOperand::Modifiers Mods;
2974  Mods.Abs = Abs || SP3Abs;
2975  Mods.Neg = Neg || SP3Neg;
2976 
2977  if (Mods.hasFPModifiers()) {
2978  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2979  if (Op.isExpr()) {
2980  Error(Op.getStartLoc(), "expected an absolute expression");
2981  return MatchOperand_ParseFail;
2982  }
2983  Op.setModifiers(Mods);
2984  }
2985  return MatchOperand_Success;
2986 }
2987 
2989 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2990  bool AllowImm) {
2991  bool Sext = trySkipId("sext");
2992  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2993  return MatchOperand_ParseFail;
2994 
2996  if (AllowImm) {
2997  Res = parseRegOrImm(Operands);
2998  } else {
2999  Res = parseReg(Operands);
3000  }
3001  if (Res != MatchOperand_Success) {
3002  return Sext? MatchOperand_ParseFail : Res;
3003  }
3004 
3005  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3006  return MatchOperand_ParseFail;
3007 
3008  AMDGPUOperand::Modifiers Mods;
3009  Mods.Sext = Sext;
3010 
3011  if (Mods.hasIntModifiers()) {
3012  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3013  if (Op.isExpr()) {
3014  Error(Op.getStartLoc(), "expected an absolute expression");
3015  return MatchOperand_ParseFail;
3016  }
3017  Op.setModifiers(Mods);
3018  }
3019 
3020  return MatchOperand_Success;
3021 }
3022 
3024 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3025  return parseRegOrImmWithFPInputMods(Operands, false);
3026 }
3027 
3029 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3030  return parseRegOrImmWithIntInputMods(Operands, false);
3031 }
3032 
3033 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3034  auto Loc = getLoc();
3035  if (trySkipId("off")) {
3036  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3037  AMDGPUOperand::ImmTyOff, false));
3038  return MatchOperand_Success;
3039  }
3040 
3041  if (!isRegister())
3042  return MatchOperand_NoMatch;
3043 
3044  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3045  if (Reg) {
3046  Operands.push_back(std::move(Reg));
3047  return MatchOperand_Success;
3048  }
3049 
3050  return MatchOperand_ParseFail;
3051 
3052 }
3053 
3054 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3055  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3056 
3057  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3058  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3059  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3060  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3061  return Match_InvalidOperand;
3062 
3063  if ((TSFlags & SIInstrFlags::VOP3) &&
3064  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3065  getForcedEncodingSize() != 64)
3066  return Match_PreferE32;
3067 
3068  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3069  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3070  // v_mac_f32/16 allow only dst_sel == DWORD;
3071  auto OpNum =
3072  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3073  const auto &Op = Inst.getOperand(OpNum);
3074  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3075  return Match_InvalidOperand;
3076  }
3077  }
3078 
3079  return Match_Success;
3080 }
3081 
3083  static const unsigned Variants[] = {
3086  };
3087 
3088  return makeArrayRef(Variants);
3089 }
3090 
3091 // What asm variants we should check
3092 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3093  if (getForcedEncodingSize() == 32) {
3094  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3095  return makeArrayRef(Variants);
3096  }
3097 
3098  if (isForcedVOP3()) {
3099  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3100  return makeArrayRef(Variants);
3101  }
3102 
3103  if (isForcedSDWA()) {
3104  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3106  return makeArrayRef(Variants);
3107  }
3108 
3109  if (isForcedDPP()) {
3110  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3111  return makeArrayRef(Variants);
3112  }
3113 
3114  return getAllVariants();
3115 }
3116 
3117 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3118  if (getForcedEncodingSize() == 32)
3119  return "e32";
3120 
3121  if (isForcedVOP3())
3122  return "e64";
3123 
3124  if (isForcedSDWA())
3125  return "sdwa";
3126 
3127  if (isForcedDPP())
3128  return "dpp";
3129 
3130  return "";
3131 }
3132 
3133 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3134  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3135  const unsigned Num = Desc.getNumImplicitUses();
3136  for (unsigned i = 0; i < Num; ++i) {
3137  unsigned Reg = Desc.ImplicitUses[i];
3138  switch (Reg) {
3139  case AMDGPU::FLAT_SCR:
3140  case AMDGPU::VCC:
3141  case AMDGPU::VCC_LO:
3142  case AMDGPU::VCC_HI:
3143  case AMDGPU::M0:
3144  return Reg;
3145  default:
3146  break;
3147  }
3148  }
3149  return AMDGPU::NoRegister;
3150 }
3151 
3152 // NB: This code is correct only when used to check constant
3153 // bus limitations because GFX7 support no f16 inline constants.
3154 // Note that there are no cases when a GFX7 opcode violates
3155 // constant bus limitations due to the use of an f16 constant.
3156 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3157  unsigned OpIdx) const {
3158  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3159 
3160  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3161  return false;
3162  }
3163 
3164  const MCOperand &MO = Inst.getOperand(OpIdx);
3165 
3166  int64_t Val = MO.getImm();
3167  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3168 
3169  switch (OpSize) { // expected operand size
3170  case 8:
3171  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3172  case 4:
3173  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3174  case 2: {
3175  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3179  return AMDGPU::isInlinableIntLiteral(Val);
3180 
3185 
3189  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3190 
3191  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3192  }
3193  default:
3194  llvm_unreachable("invalid operand size");
3195  }
3196 }
3197 
3198 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3199  if (!isGFX10Plus())
3200  return 1;
3201 
3202  switch (Opcode) {
3203  // 64-bit shift instructions can use only one scalar value input
3204  case AMDGPU::V_LSHLREV_B64_e64:
3205  case AMDGPU::V_LSHLREV_B64_gfx10:
3206  case AMDGPU::V_LSHRREV_B64_e64:
3207  case AMDGPU::V_LSHRREV_B64_gfx10:
3208  case AMDGPU::V_ASHRREV_I64_e64:
3209  case AMDGPU::V_ASHRREV_I64_gfx10:
3210  case AMDGPU::V_LSHL_B64_e64:
3211  case AMDGPU::V_LSHR_B64_e64:
3212  case AMDGPU::V_ASHR_I64_e64:
3213  return 1;
3214  default:
3215  return 2;
3216  }
3217 }
3218 
3219 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3220  const MCOperand &MO = Inst.getOperand(OpIdx);
3221  if (MO.isImm()) {
3222  return !isInlineConstant(Inst, OpIdx);
3223  } else if (MO.isReg()) {
3224  auto Reg = MO.getReg();
3225  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3226  auto PReg = mc2PseudoReg(Reg);
3227  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3228  } else {
3229  return true;
3230  }
3231 }
3232 
3233 bool
3234 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3235  const OperandVector &Operands) {
3236  const unsigned Opcode = Inst.getOpcode();
3237  const MCInstrDesc &Desc = MII.get(Opcode);
3238  unsigned LastSGPR = AMDGPU::NoRegister;
3239  unsigned ConstantBusUseCount = 0;
3240  unsigned NumLiterals = 0;
3241  unsigned LiteralSize;
3242 
3243  if (Desc.TSFlags &
3247  SIInstrFlags::SDWA)) {
3248  // Check special imm operands (used by madmk, etc)
3249  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3250  ++ConstantBusUseCount;
3251  }
3252 
3253  SmallDenseSet<unsigned> SGPRsUsed;
3254  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3255  if (SGPRUsed != AMDGPU::NoRegister) {
3256  SGPRsUsed.insert(SGPRUsed);
3257  ++ConstantBusUseCount;
3258  }
3259 
3260  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3261  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3262  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3263 
3264  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3265 
3266  for (int OpIdx : OpIndices) {
3267  if (OpIdx == -1) break;
3268 
3269  const MCOperand &MO = Inst.getOperand(OpIdx);
3270  if (usesConstantBus(Inst, OpIdx)) {
3271  if (MO.isReg()) {
3272  LastSGPR = mc2PseudoReg(MO.getReg());
3273  // Pairs of registers with a partial intersections like these
3274  // s0, s[0:1]
3275  // flat_scratch_lo, flat_scratch
3276  // flat_scratch_lo, flat_scratch_hi
3277  // are theoretically valid but they are disabled anyway.
3278  // Note that this code mimics SIInstrInfo::verifyInstruction
3279  if (!SGPRsUsed.count(LastSGPR)) {
3280  SGPRsUsed.insert(LastSGPR);
3281  ++ConstantBusUseCount;
3282  }
3283  } else { // Expression or a literal
3284 
3285  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3286  continue; // special operand like VINTERP attr_chan
3287 
3288  // An instruction may use only one literal.
3289  // This has been validated on the previous step.
3290  // See validateVOP3Literal.
3291  // This literal may be used as more than one operand.
3292  // If all these operands are of the same size,
3293  // this literal counts as one scalar value.
3294  // Otherwise it counts as 2 scalar values.
3295  // See "GFX10 Shader Programming", section 3.6.2.3.
3296 
3297  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3298  if (Size < 4) Size = 4;
3299 
3300  if (NumLiterals == 0) {
3301  NumLiterals = 1;
3302  LiteralSize = Size;
3303  } else if (LiteralSize != Size) {
3304  NumLiterals = 2;
3305  }
3306  }
3307  }
3308  }
3309  }
3310  ConstantBusUseCount += NumLiterals;
3311 
3312  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3313  return true;
3314 
3315  SMLoc LitLoc = getLitLoc(Operands);
3316  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3317  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3318  Error(Loc, "invalid operand (violates constant bus restrictions)");
3319  return false;
3320 }
3321 
3322 bool
3323 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3324  const OperandVector &Operands) {
3325  const unsigned Opcode = Inst.getOpcode();
3326  const MCInstrDesc &Desc = MII.get(Opcode);
3327 
3328  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3329  if (DstIdx == -1 ||
3330  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3331  return true;
3332  }
3333 
3334  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3335 
3336  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3337  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3338  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3339 
3340  assert(DstIdx != -1);
3341  const MCOperand &Dst = Inst.getOperand(DstIdx);
3342  assert(Dst.isReg());
3343  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3344 
3345  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3346 
3347  for (int SrcIdx : SrcIndices) {
3348  if (SrcIdx == -1) break;
3349  const MCOperand &Src = Inst.getOperand(SrcIdx);
3350  if (Src.isReg()) {
3351  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3352  if (isRegIntersect(DstReg, SrcReg, TRI)) {
3353  Error(getRegLoc(SrcReg, Operands),
3354  "destination must be different than all sources");
3355  return false;
3356  }
3357  }
3358  }
3359 
3360  return true;
3361 }
3362 
3363 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3364 
3365  const unsigned Opc = Inst.getOpcode();
3366  const MCInstrDesc &Desc = MII.get(Opc);
3367 
3368  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3369  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3370  assert(ClampIdx != -1);
3371  return Inst.getOperand(ClampIdx).getImm() == 0;
3372  }
3373 
3374  return true;
3375 }
3376 
3377 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3378 
3379  const unsigned Opc = Inst.getOpcode();
3380  const MCInstrDesc &Desc = MII.get(Opc);
3381 
3382  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3383  return true;
3384 
3385  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3386  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3387  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3388 
3389  assert(VDataIdx != -1);
3390 
3391  if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3392  return true;
3393 
3394  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3395  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3396  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3397  if (DMask == 0)
3398  DMask = 1;
3399 
3400  unsigned DataSize =
3401  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3402  if (hasPackedD16()) {
3403  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3404  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3405  DataSize = (DataSize + 1) / 2;
3406  }
3407 
3408  return (VDataSize / 4) == DataSize + TFESize;
3409 }
3410 
3411 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3412  const unsigned Opc = Inst.getOpcode();
3413  const MCInstrDesc &Desc = MII.get(Opc);
3414 
3415  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3416  return true;
3417 
3419 
3420  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3421  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3422  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3423  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3424  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3425  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3426 
3427  assert(VAddr0Idx != -1);
3428  assert(SrsrcIdx != -1);
3429  assert(SrsrcIdx > VAddr0Idx);
3430 
3431  if (DimIdx == -1)
3432  return true; // intersect_ray
3433 
3434  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3436  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3437  unsigned ActualAddrSize =
3438  IsNSA ? SrsrcIdx - VAddr0Idx
3439  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3440  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3441 
3442  unsigned ExpectedAddrSize =
3443  AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3444 
3445  if (!IsNSA) {
3446  if (ExpectedAddrSize > 8)
3447  ExpectedAddrSize = 16;
3448  else if (ExpectedAddrSize > 5)
3449  ExpectedAddrSize = 8;
3450 
3451  // Allow oversized 8 VGPR vaddr when only 5 VGPR are required.
3452  // This provides backward compatibility for assembly created
3453  // before 160b types were directly supported.
3454  if (ExpectedAddrSize == 5 && ActualAddrSize == 8)
3455  return true;
3456  }
3457 
3458  return ActualAddrSize == ExpectedAddrSize;
3459 }
3460 
3461 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3462 
3463  const unsigned Opc = Inst.getOpcode();
3464  const MCInstrDesc &Desc = MII.get(Opc);
3465 
3466  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3467  return true;
3468  if (!Desc.mayLoad() || !Desc.mayStore())
3469  return true; // Not atomic
3470 
3471  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3472  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3473 
3474  // This is an incomplete check because image_atomic_cmpswap
3475  // may only use 0x3 and 0xf while other atomic operations
3476  // may use 0x1 and 0x3. However these limitations are
3477  // verified when we check that dmask matches dst size.
3478  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3479 }
3480 
3481 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3482 
3483  const unsigned Opc = Inst.getOpcode();
3484  const MCInstrDesc &Desc = MII.get(Opc);
3485 
3486  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3487  return true;
3488 
3489  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3490  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3491 
3492  // GATHER4 instructions use dmask in a different fashion compared to
3493  // other MIMG instructions. The only useful DMASK values are
3494  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3495  // (red,red,red,red) etc.) The ISA document doesn't mention
3496  // this.
3497  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3498 }
3499 
3500 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3501  const unsigned Opc = Inst.getOpcode();
3502  const MCInstrDesc &Desc = MII.get(Opc);
3503 
3504  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3505  return true;
3506 
3508  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3509  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3510 
3511  if (!BaseOpcode->MSAA)
3512  return true;
3513 
3514  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3515  assert(DimIdx != -1);
3516 
3517  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3519 
3520  return DimInfo->MSAA;
3521 }
3522 
3523 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3524 {
3525  switch (Opcode) {
3526  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3527  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3528  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3529  return true;
3530  default:
3531  return false;
3532  }
3533 }
3534 
3535 // movrels* opcodes should only allow VGPRS as src0.
3536 // This is specified in .td description for vop1/vop3,
3537 // but sdwa is handled differently. See isSDWAOperand.
3538 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3539  const OperandVector &Operands) {
3540 
3541  const unsigned Opc = Inst.getOpcode();
3542  const MCInstrDesc &Desc = MII.get(Opc);
3543 
3544  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3545  return true;
3546 
3547  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3548  assert(Src0Idx != -1);
3549 
3550  SMLoc ErrLoc;
3551  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3552  if (Src0.isReg()) {
3553  auto Reg = mc2PseudoReg(Src0.getReg());
3554  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3555  if (!isSGPR(Reg, TRI))
3556  return true;
3557  ErrLoc = getRegLoc(Reg, Operands);
3558  } else {
3559  ErrLoc = getConstLoc(Operands);
3560  }
3561 
3562  Error(ErrLoc, "source operand must be a VGPR");
3563  return false;
3564 }
3565 
3566 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3567  const OperandVector &Operands) {
3568 
3569  const unsigned Opc = Inst.getOpcode();
3570 
3571  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3572  return true;
3573 
3574  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3575  assert(Src0Idx != -1);
3576 
3577  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3578  if (!Src0.isReg())
3579  return true;
3580 
3581  auto Reg = mc2PseudoReg(Src0.getReg());
3582  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3583  if (isSGPR(Reg, TRI)) {
3584  Error(getRegLoc(Reg, Operands),
3585  "source operand must be either a VGPR or an inline constant");
3586  return false;
3587  }
3588 
3589  return true;
3590 }
3591 
3592 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3593  switch (Inst.getOpcode()) {
3594  default:
3595  return true;
3596  case V_DIV_SCALE_F32_gfx6_gfx7:
3597  case V_DIV_SCALE_F32_vi:
3598  case V_DIV_SCALE_F32_gfx10:
3599  case V_DIV_SCALE_F64_gfx6_gfx7:
3600  case V_DIV_SCALE_F64_vi:
3601  case V_DIV_SCALE_F64_gfx10:
3602  break;
3603  }
3604 
3605  // TODO: Check that src0 = src1 or src2.
3606 
3607  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3608  AMDGPU::OpName::src2_modifiers,
3609  AMDGPU::OpName::src2_modifiers}) {
3611  .getImm() &
3612  SISrcMods::ABS) {
3613  return false;
3614  }
3615  }
3616 
3617  return true;
3618 }
3619 
3620 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3621 
3622  const unsigned Opc = Inst.getOpcode();
3623  const MCInstrDesc &Desc = MII.get(Opc);
3624 
3625  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3626  return true;
3627 
3628  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3629  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3630  if (isCI() || isSI())
3631  return false;
3632  }
3633 
3634  return true;
3635 }
3636 
3637 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3638  const unsigned Opc = Inst.getOpcode();
3639  const MCInstrDesc &Desc = MII.get(Opc);
3640 
3641  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3642  return true;
3643 
3644  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3645  if (DimIdx < 0)
3646  return true;
3647 
3648  long Imm = Inst.getOperand(DimIdx).getImm();
3649  if (Imm < 0 || Imm >= 8)
3650  return false;
3651 
3652  return true;
3653 }
3654 
3655 static bool IsRevOpcode(const unsigned Opcode)
3656 {
3657  switch (Opcode) {
3658  case AMDGPU::V_SUBREV_F32_e32:
3659  case AMDGPU::V_SUBREV_F32_e64:
3660  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3661  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3662  case AMDGPU::V_SUBREV_F32_e32_vi:
3663  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3664  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3665  case AMDGPU::V_SUBREV_F32_e64_vi:
3666 
3667  case AMDGPU::V_SUBREV_CO_U32_e32:
3668  case AMDGPU::V_SUBREV_CO_U32_e64:
3669  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3670  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3671 
3672  case AMDGPU::V_SUBBREV_U32_e32:
3673  case AMDGPU::V_SUBBREV_U32_e64:
3674  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3675  case AMDGPU::V_SUBBREV_U32_e32_vi:
3676  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3677  case AMDGPU::V_SUBBREV_U32_e64_vi:
3678 
3679  case AMDGPU::V_SUBREV_U32_e32:
3680  case AMDGPU::V_SUBREV_U32_e64:
3681  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3682  case AMDGPU::V_SUBREV_U32_e32_vi:
3683  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3684  case AMDGPU::V_SUBREV_U32_e64_vi:
3685 
3686  case AMDGPU::V_SUBREV_F16_e32:
3687  case AMDGPU::V_SUBREV_F16_e64:
3688  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3689  case AMDGPU::V_SUBREV_F16_e32_vi:
3690  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3691  case AMDGPU::V_SUBREV_F16_e64_vi:
3692 
3693  case AMDGPU::V_SUBREV_U16_e32:
3694  case AMDGPU::V_SUBREV_U16_e64:
3695  case AMDGPU::V_SUBREV_U16_e32_vi:
3696  case AMDGPU::V_SUBREV_U16_e64_vi:
3697 
3698  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3699  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3700  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3701 
3702  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3703  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3704 
3705  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3706  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3707 
3708  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3709  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3710 
3711  case AMDGPU::V_LSHRREV_B32_e32:
3712  case AMDGPU::V_LSHRREV_B32_e64:
3713  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3714  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3715  case AMDGPU::V_LSHRREV_B32_e32_vi:
3716  case AMDGPU::V_LSHRREV_B32_e64_vi:
3717  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3718  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3719 
3720  case AMDGPU::V_ASHRREV_I32_e32:
3721  case AMDGPU::V_ASHRREV_I32_e64:
3722  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3723  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3724  case AMDGPU::V_ASHRREV_I32_e32_vi:
3725  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3726  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3727  case AMDGPU::V_ASHRREV_I32_e64_vi:
3728 
3729  case AMDGPU::V_LSHLREV_B32_e32:
3730  case AMDGPU::V_LSHLREV_B32_e64:
3731  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3732  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3733  case AMDGPU::V_LSHLREV_B32_e32_vi:
3734  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3735  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3736  case AMDGPU::V_LSHLREV_B32_e64_vi:
3737 
3738  case AMDGPU::V_LSHLREV_B16_e32:
3739  case AMDGPU::V_LSHLREV_B16_e64:
3740  case AMDGPU::V_LSHLREV_B16_e32_vi:
3741  case AMDGPU::V_LSHLREV_B16_e64_vi:
3742  case AMDGPU::V_LSHLREV_B16_gfx10:
3743 
3744  case AMDGPU::V_LSHRREV_B16_e32:
3745  case AMDGPU::V_LSHRREV_B16_e64:
3746  case AMDGPU::V_LSHRREV_B16_e32_vi:
3747  case AMDGPU::V_LSHRREV_B16_e64_vi:
3748  case AMDGPU::V_LSHRREV_B16_gfx10:
3749 
3750  case AMDGPU::V_ASHRREV_I16_e32:
3751  case AMDGPU::V_ASHRREV_I16_e64:
3752  case AMDGPU::V_ASHRREV_I16_e32_vi:
3753  case AMDGPU::V_ASHRREV_I16_e64_vi:
3754  case AMDGPU::V_ASHRREV_I16_gfx10:
3755 
3756  case AMDGPU::V_LSHLREV_B64_e64:
3757  case AMDGPU::V_LSHLREV_B64_gfx10:
3758  case AMDGPU::V_LSHLREV_B64_vi:
3759 
3760  case AMDGPU::V_LSHRREV_B64_e64:
3761  case AMDGPU::V_LSHRREV_B64_gfx10:
3762  case AMDGPU::V_LSHRREV_B64_vi:
3763 
3764  case AMDGPU::V_ASHRREV_I64_e64:
3765  case AMDGPU::V_ASHRREV_I64_gfx10:
3766  case AMDGPU::V_ASHRREV_I64_vi:
3767 
3768  case AMDGPU::V_PK_LSHLREV_B16:
3769  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3770  case AMDGPU::V_PK_LSHLREV_B16_vi:
3771 
3772  case AMDGPU::V_PK_LSHRREV_B16:
3773  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3774  case AMDGPU::V_PK_LSHRREV_B16_vi:
3775  case AMDGPU::V_PK_ASHRREV_I16:
3776  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3777  case AMDGPU::V_PK_ASHRREV_I16_vi:
3778  return true;
3779  default:
3780  return false;
3781  }
3782 }
3783 
3784 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3785 
3786  using namespace SIInstrFlags;
3787  const unsigned Opcode = Inst.getOpcode();
3788  const MCInstrDesc &Desc = MII.get(Opcode);
3789 
3790  // lds_direct register is defined so that it can be used
3791  // with 9-bit operands only. Ignore encodings which do not accept these.
3792  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3793  if ((Desc.TSFlags & Enc) == 0)
3794  return None;
3795 
3796  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3797  auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3798  if (SrcIdx == -1)
3799  break;
3800  const auto &Src = Inst.getOperand(SrcIdx);
3801  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3802 
3803  if (isGFX90A())
3804  return StringRef("lds_direct is not supported on this GPU");
3805 
3806  if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3807  return StringRef("lds_direct cannot be used with this instruction");
3808 
3809  if (SrcName != OpName::src0)
3810  return StringRef("lds_direct may be used as src0 only");
3811  }
3812  }
3813 
3814  return None;
3815 }
3816 
3817 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3818  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3819  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3820  if (Op.isFlatOffset())
3821  return Op.getStartLoc();
3822  }
3823  return getLoc();
3824 }
3825 
3826 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3827  const OperandVector &Operands) {
3828  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3829  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3830  return true;
3831 
3832  auto Opcode = Inst.getOpcode();
3833  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3834  assert(OpNum != -1);
3835 
3836  const auto &Op = Inst.getOperand(OpNum);
3837  if (!hasFlatOffsets() && Op.getImm() != 0) {
3838  Error(getFlatOffsetLoc(Operands),
3839  "flat offset modifier is not supported on this GPU");
3840  return false;
3841  }
3842 
3843  // For FLAT segment the offset must be positive;
3844  // MSB is ignored and forced to zero.
3846  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3847  if (!isIntN(OffsetSize, Op.getImm())) {
3848  Error(getFlatOffsetLoc(Operands),
3849  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3850  return false;
3851  }
3852  } else {
3853  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3854  if (!isUIntN(OffsetSize, Op.getImm())) {
3855  Error(getFlatOffsetLoc(Operands),
3856  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3857  return false;
3858  }
3859  }
3860 
3861  return true;
3862 }
3863 
3864 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3865  // Start with second operand because SMEM Offset cannot be dst or src0.
3866  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3867  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3868  if (Op.isSMEMOffset())
3869  return Op.getStartLoc();
3870  }
3871  return getLoc();
3872 }
3873 
3874 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3875  const OperandVector &Operands) {
3876  if (isCI() || isSI())
3877  return true;
3878 
3879  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3880  if ((TSFlags & SIInstrFlags::SMRD) == 0)
3881  return true;
3882 
3883  auto Opcode = Inst.getOpcode();
3884  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3885  if (OpNum == -1)
3886  return true;
3887 
3888  const auto &Op = Inst.getOperand(OpNum);
3889  if (!Op.isImm())
3890  return true;
3891 
3892  uint64_t Offset = Op.getImm();
3893  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3895  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3896  return true;
3897 
3898  Error(getSMEMOffsetLoc(Operands),
3899  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3900  "expected a 21-bit signed offset");
3901 
3902  return false;
3903 }
3904 
3905 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3906  unsigned Opcode = Inst.getOpcode();
3907  const MCInstrDesc &Desc = MII.get(Opcode);
3908  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3909  return true;
3910 
3911  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3912  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3913 
3914  const int OpIndices[] = { Src0Idx, Src1Idx };
3915 
3916  unsigned NumExprs = 0;
3917  unsigned NumLiterals = 0;
3918  uint32_t LiteralValue;
3919 
3920  for (int OpIdx : OpIndices) {
3921  if (OpIdx == -1) break;
3922 
3923  const MCOperand &MO = Inst.getOperand(OpIdx);
3924  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3925  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3926  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3927  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3928  if (NumLiterals == 0 || LiteralValue != Value) {
3929  LiteralValue = Value;
3930  ++NumLiterals;
3931  }
3932  } else if (MO.isExpr()) {
3933  ++NumExprs;
3934  }
3935  }
3936  }
3937 
3938  return NumLiterals + NumExprs <= 1;
3939 }
3940 
3941 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3942  const unsigned Opc = Inst.getOpcode();
3943  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3944  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3945  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3946  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3947 
3948  if (OpSel & ~3)
3949  return false;
3950  }
3951  return true;
3952 }
3953 
3954 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3955  const OperandVector &Operands) {
3956  const unsigned Opc = Inst.getOpcode();
3957  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3958  if (DppCtrlIdx < 0)
3959  return true;
3960  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3961 
3963  // DPP64 is supported for row_newbcast only.
3964  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3965  if (Src0Idx >= 0 &&
3966  getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3967  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3968  Error(S, "64 bit dpp only supports row_newbcast");
3969  return false;
3970  }
3971  }
3972 
3973  return true;
3974 }
3975 
3976 // Check if VCC register matches wavefront size
3977 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3978  auto FB = getFeatureBits();
3979  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3980  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3981 }
3982 
3983 // VOP3 literal is only allowed in GFX10+ and only one can be used
3984 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3985  const OperandVector &Operands) {
3986  unsigned Opcode = Inst.getOpcode();
3987  const MCInstrDesc &Desc = MII.get(Opcode);
3989  return true;
3990 
3991  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3992  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3993  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3994 
3995  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3996 
3997  unsigned NumExprs = 0;
3998  unsigned NumLiterals = 0;
3999  uint32_t LiteralValue;
4000 
4001  for (int OpIdx : OpIndices) {
4002  if (OpIdx == -1) break;
4003 
4004  const MCOperand &MO = Inst.getOperand(OpIdx);
4005  if (!MO.isImm() && !MO.isExpr())
4006  continue;
4007  if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4008  continue;
4009 
4010  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4011  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4012  Error(getConstLoc(Operands),
4013  "inline constants are not allowed for this operand");
4014  return false;
4015  }
4016 
4017  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4018  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4019  if (NumLiterals == 0 || LiteralValue != Value) {
4020  LiteralValue = Value;
4021  ++NumLiterals;
4022  }
4023  } else if (MO.isExpr()) {
4024  ++NumExprs;
4025  }
4026  }
4027  NumLiterals += NumExprs;
4028 
4029  if (!NumLiterals)
4030  return true;
4031 
4032  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4033  Error(getLitLoc(Operands), "literal operands are not supported");
4034  return false;
4035  }
4036 
4037  if (NumLiterals > 1) {
4038  Error(getLitLoc(Operands), "only one literal operand is allowed");
4039  return false;
4040  }
4041 
4042  return true;
4043 }
4044 
4045 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4046 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4047  const MCRegisterInfo *MRI) {
4048  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4049  if (OpIdx < 0)
4050  return -1;
4051 
4052  const MCOperand &Op = Inst.getOperand(OpIdx);
4053  if (!Op.isReg())
4054  return -1;
4055 
4056  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4057  auto Reg = Sub ? Sub : Op.getReg();
4058  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4059  return AGPR32.contains(Reg) ? 1 : 0;
4060 }
4061 
4062 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4063  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4064  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4066  SIInstrFlags::DS)) == 0)
4067  return true;
4068 
4069  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4070  : AMDGPU::OpName::vdata;
4071 
4072  const MCRegisterInfo *MRI = getMRI();
4073  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4074  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4075 
4076  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4077  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4078  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4079  return false;
4080  }
4081 
4082  auto FB = getFeatureBits();
4083  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4084  if (DataAreg < 0 || DstAreg < 0)
4085  return true;
4086  return DstAreg == DataAreg;
4087  }
4088 
4089  return DstAreg < 1 && DataAreg < 1;
4090 }
4091 
4092 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4093  auto FB = getFeatureBits();
4094  if (!FB[AMDGPU::FeatureGFX90AInsts])
4095  return true;
4096 
4097  const MCRegisterInfo *MRI = getMRI();
4098  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4099  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4100  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4101  const MCOperand &Op = Inst.getOperand(I);
4102  if (!Op.isReg())
4103  continue;
4104 
4105  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4106  if (!Sub)
4107  continue;
4108 
4109  if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4110  return false;
4111  if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4112  return false;
4113  }
4114 
4115  return true;
4116 }
4117 
4118 // gfx90a has an undocumented limitation:
4119 // DS_GWS opcodes must use even aligned registers.
4120 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4121  const OperandVector &Operands) {
4122  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4123  return true;
4124 
4125  int Opc = Inst.getOpcode();
4126  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4127  Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4128  return true;
4129 
4130  const MCRegisterInfo *MRI = getMRI();
4131  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4132  int Data0Pos =
4133  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4134  assert(Data0Pos != -1);
4135  auto Reg = Inst.getOperand(Data0Pos).getReg();
4136  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4137  if (RegIdx & 1) {
4138  SMLoc RegLoc = getRegLoc(Reg, Operands);
4139  Error(RegLoc, "vgpr must be even aligned");
4140  return false;
4141  }
4142 
4143  return true;
4144 }
4145 
4146 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4147  const OperandVector &Operands,
4148  const SMLoc &IDLoc) {
4149  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4150  AMDGPU::OpName::cpol);
4151  if (CPolPos == -1)
4152  return true;
4153 
4154  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4155 
4156  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4157  if ((TSFlags & (SIInstrFlags::SMRD)) &&
4159  Error(IDLoc, "invalid cache policy for SMRD instruction");
4160  return false;
4161  }
4162 
4163  if (isGFX90A() && (CPol & CPol::SCC)) {
4164  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4165  StringRef CStr(S.getPointer());
4166  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4167  Error(S, "scc is not supported on this GPU");
4168  return false;
4169  }
4170 
4172  return true;
4173 
4174  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4175  if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4176  Error(IDLoc, "instruction must use glc");
4177  return false;
4178  }
4179  } else {
4180  if (CPol & CPol::GLC) {
4181  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4182  StringRef CStr(S.getPointer());
4183  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4184  Error(S, "instruction must not use glc");
4185  return false;
4186  }
4187  }
4188 
4189  return true;
4190 }
4191 
4192 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4193  const SMLoc &IDLoc,
4194  const OperandVector &Operands) {
4195  if (auto ErrMsg = validateLdsDirect(Inst)) {
4196  Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4197  return false;
4198  }
4199  if (!validateSOPLiteral(Inst)) {
4200  Error(getLitLoc(Operands),
4201  "only one literal operand is allowed");
4202  return false;
4203  }
4204  if (!validateVOP3Literal(Inst, Operands)) {
4205  return false;
4206  }
4207  if (!validateConstantBusLimitations(Inst, Operands)) {
4208  return false;
4209  }
4210  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4211  return false;
4212  }
4213  if (!validateIntClampSupported(Inst)) {
4214  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4215  "integer clamping is not supported on this GPU");
4216  return false;
4217  }
4218  if (!validateOpSel(Inst)) {
4219  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4220  "invalid op_sel operand");
4221  return false;
4222  }
4223  if (!validateDPP(Inst, Operands)) {
4224  return false;
4225  }
4226  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4227  if (!validateMIMGD16(Inst)) {
4228  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4229  "d16 modifier is not supported on this GPU");
4230  return false;
4231  }
4232  if (!validateMIMGDim(Inst)) {
4233  Error(IDLoc, "dim modifier is required on this GPU");
4234  return false;
4235  }
4236  if (!validateMIMGMSAA(Inst)) {
4237  Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4238  "invalid dim; must be MSAA type");
4239  return false;
4240  }
4241  if (!validateMIMGDataSize(Inst)) {
4242  Error(IDLoc,
4243  "image data size does not match dmask and tfe");
4244  return false;
4245  }
4246  if (!validateMIMGAddrSize(Inst)) {
4247  Error(IDLoc,
4248  "image address size does not match dim and a16");
4249  return false;
4250  }
4251  if (!validateMIMGAtomicDMask(Inst)) {
4252  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4253  "invalid atomic image dmask");
4254  return false;
4255  }
4256  if (!validateMIMGGatherDMask(Inst)) {
4257  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4258  "invalid image_gather dmask: only one bit must be set");
4259  return false;
4260  }
4261  if (!validateMovrels(Inst, Operands)) {
4262  return false;
4263  }
4264  if (!validateFlatOffset(Inst, Operands)) {
4265  return false;
4266  }
4267  if (!validateSMEMOffset(Inst, Operands)) {
4268  return false;
4269  }
4270  if (!validateMAIAccWrite(Inst, Operands)) {
4271  return false;
4272  }
4273  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4274  return false;
4275  }
4276 
4277  if (!validateAGPRLdSt(Inst)) {
4278  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4279  ? "invalid register class: data and dst should be all VGPR or AGPR"
4280  : "invalid register class: agpr loads and stores not supported on this GPU"
4281  );
4282  return false;
4283  }
4284  if (!validateVGPRAlign(Inst)) {
4285  Error(IDLoc,
4286  "invalid register class: vgpr tuples must be 64 bit aligned");
4287  return false;
4288  }
4289  if (!validateGWS(Inst, Operands)) {
4290  return false;
4291  }
4292 
4293  if (!validateDivScale(Inst)) {
4294  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4295  return false;
4296  }
4297  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4298  return false;
4299  }
4300 
4301  return true;
4302 }
4303 
4304 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4305  const FeatureBitset &FBS,
4306  unsigned VariantID = 0);
4307 
4308 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4309  const FeatureBitset &AvailableFeatures,
4310  unsigned VariantID);
4311 
4312 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4313  const FeatureBitset &FBS) {
4314  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4315 }
4316 
4317 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4318  const FeatureBitset &FBS,
4319  ArrayRef<unsigned> Variants) {
4320  for (auto Variant : Variants) {
4321  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4322  return true;
4323  }
4324 
4325  return false;
4326 }
4327 
4328 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4329  const SMLoc &IDLoc) {
4330  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4331 
4332  // Check if requested instruction variant is supported.
4333  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4334  return false;
4335 
4336  // This instruction is not supported.
4337  // Clear any other pending errors because they are no longer relevant.
4338  getParser().clearPendingErrors();
4339 
4340  // Requested instruction variant is not supported.
4341  // Check if any other variants are supported.
4342  StringRef VariantName = getMatchedVariantName();
4343  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4344  return Error(IDLoc,
4345  Twine(VariantName,
4346  " variant of this instruction is not supported"));
4347  }
4348 
4349  // Finally check if this instruction is supported on any other GPU.
4350  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4351  return Error(IDLoc, "instruction not supported on this GPU");
4352  }
4353 
4354  // Instruction not supported on any GPU. Probably a typo.
4355  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4356  return Error(IDLoc, "invalid instruction" + Suggestion);
4357 }
4358 
4359 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4361  MCStreamer &Out,
4362  uint64_t &ErrorInfo,
4363  bool MatchingInlineAsm) {
4364  MCInst Inst;
4365  unsigned Result = Match_Success;
4366  for (auto Variant : getMatchedVariants()) {
4367  uint64_t EI;
4368  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4369  Variant);
4370  // We order match statuses from least to most specific. We use most specific
4371  // status as resulting
4372  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4373  if ((R == Match_Success) ||
4374  (R == Match_PreferE32) ||
4375  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4376  (R == Match_InvalidOperand && Result != Match_MissingFeature
4377  && Result != Match_PreferE32) ||
4378  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4379  && Result != Match_MissingFeature
4380  && Result != Match_PreferE32)) {
4381  Result = R;
4382  ErrorInfo = EI;
4383  }
4384  if (R == Match_Success)
4385  break;
4386  }
4387 
4388  if (Result == Match_Success) {
4389  if (!validateInstruction(Inst, IDLoc, Operands)) {
4390  return true;
4391  }
4392  Inst.setLoc(IDLoc);
4393  Out.emitInstruction(Inst, getSTI());
4394  return false;
4395  }
4396 
4397  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4398  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4399  return true;
4400  }
4401 
4402  switch (Result) {
4403  default: break;
4404  case Match_MissingFeature:
4405  // It has been verified that the specified instruction
4406  // mnemonic is valid. A match was found but it requires
4407  // features which are not supported on this GPU.
4408  return Error(IDLoc, "operands are not valid for this GPU or mode");
4409 
4410  case Match_InvalidOperand: {
4411  SMLoc ErrorLoc = IDLoc;
4412  if (ErrorInfo != ~0ULL) {
4413  if (ErrorInfo >= Operands.size()) {
4414  return Error(IDLoc, "too few operands for instruction");
4415  }
4416  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4417  if (ErrorLoc == SMLoc())
4418  ErrorLoc = IDLoc;
4419  }
4420  return Error(ErrorLoc, "invalid operand for instruction");
4421  }
4422 
4423  case Match_PreferE32:
4424  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4425  "should be encoded as e32");
4426  case Match_MnemonicFail:
4427  llvm_unreachable("Invalid instructions should have been handled already");
4428  }
4429  llvm_unreachable("Implement any new match types added!");
4430 }
4431 
4432 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4433  int64_t Tmp = -1;
4434  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4435  return true;
4436  }
4437  if (getParser().parseAbsoluteExpression(Tmp)) {
4438  return true;
4439  }
4440  Ret = static_cast<uint32_t>(Tmp);
4441  return false;
4442 }
4443 
4444 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4445  uint32_t &Minor) {
4446  if (ParseAsAbsoluteExpression(Major))
4447  return TokError("invalid major version");
4448 
4449  if (!trySkipToken(AsmToken::Comma))
4450  return TokError("minor version number required, comma expected");
4451 
4452  if (ParseAsAbsoluteExpression(Minor))
4453  return TokError("invalid minor version");
4454 
4455  return false;
4456 }
4457 
4458 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4459  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4460  return TokError("directive only supported for amdgcn architecture");
4461 
4462  std::string TargetIDDirective;
4463  SMLoc TargetStart = getTok().getLoc();
4464  if (getParser().parseEscapedString(TargetIDDirective))
4465  return true;
4466 
4467  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4468  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4469  return getParser().Error(TargetRange.Start,
4470  (Twine(".amdgcn_target directive's target id ") +
4471  Twine(TargetIDDirective) +
4472  Twine(" does not match the specified target id ") +
4473  Twine(getTargetStreamer().getTargetID()->toString())).str());
4474 
4475  return false;
4476 }
4477 
4478 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4479  return Error(Range.Start, "value out of range", Range);
4480 }
4481 
4482 bool AMDGPUAsmParser::calculateGPRBlocks(
4483  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4484  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4485  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4486  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4487  // TODO(scott.linder): These calculations are duplicated from
4488  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4489  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4490 
4491  unsigned NumVGPRs = NextFreeVGPR;
4492  unsigned NumSGPRs = NextFreeSGPR;
4493 
4494  if (Version.Major >= 10)
4495  NumSGPRs = 0;
4496  else {
4497  unsigned MaxAddressableNumSGPRs =
4499 
4500  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4501  NumSGPRs > MaxAddressableNumSGPRs)
4502  return OutOfRangeError(SGPRRange);
4503 
4504  NumSGPRs +=
4505  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4506 
4507  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4508  NumSGPRs > MaxAddressableNumSGPRs)
4509  return OutOfRangeError(SGPRRange);
4510 
4511  if (Features.test(FeatureSGPRInitBug))
4513  }
4514 
4515  VGPRBlocks =
4516  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4517  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4518 
4519  return false;
4520 }
4521 
4522 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4523  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4524  return TokError("directive only supported for amdgcn architecture");
4525 
4526  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4527  return TokError("directive only supported for amdhsa OS");
4528 
4529  StringRef KernelName;
4530  if (getParser().parseIdentifier(KernelName))
4531  return true;
4532 
4534 
4535  StringSet<> Seen;
4536 
4537  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4538 
4539  SMRange VGPRRange;
4540  uint64_t NextFreeVGPR = 0;
4541  uint64_t AccumOffset = 0;
4542  SMRange SGPRRange;
4543  uint64_t NextFreeSGPR = 0;
4544  unsigned UserSGPRCount = 0;
4545  bool ReserveVCC = true;
4546  bool ReserveFlatScr = true;
4547  Optional<bool> EnableWavefrontSize32;
4548 
4549  while (true) {
4550  while (trySkipToken(AsmToken::EndOfStatement));
4551 
4552  StringRef ID;
4553  SMRange IDRange = getTok().getLocRange();
4554  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4555  return true;
4556 
4557  if (ID == ".end_amdhsa_kernel")
4558  break;
4559 
4560  if (Seen.find(ID) != Seen.end())
4561  return TokError(".amdhsa_ directives cannot be repeated");
4562  Seen.insert(ID);
4563 
4564  SMLoc ValStart = getLoc();
4565  int64_t IVal;
4566  if (getParser().parseAbsoluteExpression(IVal))
4567  return true;
4568  SMLoc ValEnd = getLoc();
4569  SMRange ValRange = SMRange(ValStart, ValEnd);
4570 
4571  if (IVal < 0)
4572  return OutOfRangeError(ValRange);
4573 
4574  uint64_t Val = IVal;
4575 
4576 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4577  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4578  return OutOfRangeError(RANGE); \
4579  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4580 
4581  if (ID == ".amdhsa_group_segment_fixed_size") {
4582  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4583  return OutOfRangeError(ValRange);
4584  KD.group_segment_fixed_size = Val;
4585  } else if (ID == ".amdhsa_private_segment_fixed_size") {
4586  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4587  return OutOfRangeError(ValRange);
4588  KD.private_segment_fixed_size = Val;
4589  } else if (ID == ".amdhsa_kernarg_size") {
4590  if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4591  return OutOfRangeError(ValRange);
4592  KD.kernarg_size = Val;
4593  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4595  return Error(IDRange.Start,
4596  "directive is not supported with architected flat scratch",
4597  IDRange);
4599  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4600  Val, ValRange);
4601  if (Val)
4602  UserSGPRCount += 4;
4603  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4605  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4606  ValRange);
4607  if (Val)
4608  UserSGPRCount += 2;
4609  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4611  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4612  ValRange);
4613  if (Val)
4614  UserSGPRCount += 2;
4615  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4617  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4618  Val, ValRange);
4619  if (Val)
4620  UserSGPRCount += 2;
4621  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4623  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4624  ValRange);
4625  if (Val)
4626  UserSGPRCount += 2;
4627  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4629  return Error(IDRange.Start,
4630  "directive is not supported with architected flat scratch",
4631  IDRange);
4633  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4634  ValRange);
4635  if (Val)
4636  UserSGPRCount += 2;
4637  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4639  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4640  Val, ValRange);
4641  if (Val)
4642  UserSGPRCount += 1;
4643  } else if (ID == ".amdhsa_wavefront_size32") {
4644  if (IVersion.Major < 10)
4645  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4646  EnableWavefrontSize32 = Val;
4648  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4649  Val, ValRange);
4650  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4652  return Error(IDRange.Start,
4653  "directive is not supported with architected flat scratch",
4654  IDRange);
4656  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4657  } else if (ID == ".amdhsa_enable_private_segment") {
4659  return Error(
4660  IDRange.Start,
4661  "directive is not supported without architected flat scratch",
4662  IDRange);
4664  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4665  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4667  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4668  ValRange);
4669  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4671  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4672  ValRange);
4673  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4675  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4676  ValRange);
4677  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4679  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4680  ValRange);
4681  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4683  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4684  ValRange);
4685  } else if (ID == ".amdhsa_next_free_vgpr") {
4686  VGPRRange = ValRange;
4687  NextFreeVGPR = Val;
4688  } else if (ID == ".amdhsa_next_free_sgpr") {
4689  SGPRRange = ValRange;
4690  NextFreeSGPR = Val;
4691  } else if (ID == ".amdhsa_accum_offset") {
4692  if (!isGFX90A())
4693  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4694  AccumOffset = Val;
4695  } else if (ID == ".amdhsa_reserve_vcc") {
4696  if (!isUInt<1>(Val))
4697  return OutOfRangeError(ValRange);
4698  ReserveVCC = Val;
4699  } else if (ID == ".amdhsa_reserve_flat_scratch") {
4700  if (IVersion.Major < 7)
4701  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4703  return Error(IDRange.Start,
4704  "directive is not supported with architected flat scratch",
4705  IDRange);
4706  if (!isUInt<1>(Val))
4707  return OutOfRangeError(ValRange);
4708  ReserveFlatScr = Val;
4709  } else if (ID == ".amdhsa_reserve_xnack_mask") {
4710  if (IVersion.Major < 8)
4711  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4712  if (!isUInt<1>(Val))
4713  return OutOfRangeError(ValRange);
4714  if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4715  return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4716  IDRange);
4717  } else if (ID == ".amdhsa_float_round_mode_32") {
4719  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4720  } else if (ID == ".amdhsa_float_round_mode_16_64") {
4722  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4723  } else if (ID == ".amdhsa_float_denorm_mode_32") {
4725  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4726  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4728  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4729  ValRange);
4730  } else if (ID == ".amdhsa_dx10_clamp") {
4732  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4733  } else if (ID == ".amdhsa_ieee_mode") {
4734  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4735  Val, ValRange);
4736  } else if (ID == ".amdhsa_fp16_overflow") {
4737  if (IVersion.Major < 9)
4738  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4739  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4740  ValRange);
4741  } else if (ID == ".amdhsa_tg_split") {
4742  if (!isGFX90A())
4743  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4744  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4745  ValRange);
4746  } else if (ID == ".amdhsa_workgroup_processor_mode") {
4747  if (IVersion.Major < 10)
4748  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4749  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4750  ValRange);
4751  } else if (ID == ".amdhsa_memory_ordered") {
4752  if (IVersion.Major < 10)
4753  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4754  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4755  ValRange);
4756  } else if (ID == ".amdhsa_forward_progress") {
4757  if (IVersion.Major < 10)
4758  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4759  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4760  ValRange);
4761  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4763  KD.compute_pgm_rsrc2,
4764  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4765  ValRange);
4766  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4768  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4769  Val, ValRange);
4770  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4772  KD.compute_pgm_rsrc2,
4773  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4774  ValRange);
4775  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4777  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4778  Val, ValRange);
4779  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4781  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4782  Val, ValRange);
4783  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4785  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4786  Val, ValRange);
4787  } else if (ID == ".amdhsa_exception_int_div_zero") {
4789  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4790  Val, ValRange);
4791  } else {
4792  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4793  }
4794 
4795 #undef PARSE_BITS_ENTRY
4796  }
4797 
4798  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4799  return TokError(".amdhsa_next_free_vgpr directive is required");
4800 
4801  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4802  return TokError(".amdhsa_next_free_sgpr directive is required");
4803 
4804  unsigned VGPRBlocks;
4805  unsigned SGPRBlocks;
4806  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4807  getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4808  EnableWavefrontSize32, NextFreeVGPR,
4809  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4810  SGPRBlocks))
4811  return true;
4812 
4813  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4814  VGPRBlocks))
4815  return OutOfRangeError(VGPRRange);
4817  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4818 
4819  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4820  SGPRBlocks))
4821  return OutOfRangeError(SGPRRange);
4823  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4824  SGPRBlocks);
4825 
4826  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4827  return TokError("too many user SGPRs enabled");
4828  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4829  UserSGPRCount);
4830 
4831  if (isGFX90A()) {
4832  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4833  return TokError(".amdhsa_accum_offset directive is required");
4834  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4835  return TokError("accum_offset should be in range [4..256] in "
4836  "increments of 4");
4837  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4838  return TokError("accum_offset exceeds total VGPR allocation");
4839  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4840  (AccumOffset / 4 - 1));
4841  }
4842 
4843  getTargetStreamer().EmitAmdhsaKernelDescriptor(
4844  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4845  ReserveFlatScr);
4846  return false;
4847 }
4848 
4849 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4850  uint32_t Major;
4851  uint32_t Minor;
4852 
4853  if (ParseDirectiveMajorMinor(Major, Minor))
4854  return true;
4855 
4856  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4857  return false;
4858 }
4859 
4860 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4861  uint32_t Major;
4862  uint32_t Minor;
4863  uint32_t Stepping;
4864  StringRef VendorName;
4865  StringRef ArchName;
4866 
4867  // If this directive has no arguments, then use the ISA version for the
4868  // targeted GPU.
4869  if (isToken(AsmToken::EndOfStatement)) {
4871  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4872  ISA.Stepping,
4873  "AMD", "AMDGPU");
4874  return false;
4875  }
4876 
4877  if (ParseDirectiveMajorMinor(Major, Minor))
4878  return true;
4879 
4880  if (!trySkipToken(AsmToken::Comma))
4881  return TokError("stepping version number required, comma expected");
4882 
4883  if (ParseAsAbsoluteExpression(Stepping))
4884  return TokError("invalid stepping version");
4885 
4886  if (!trySkipToken(AsmToken::Comma))
4887  return TokError("vendor name required, comma expected");
4888 
4889  if (!parseString(VendorName, "invalid vendor name"))
4890  return true;
4891 
4892  if (!trySkipToken(AsmToken::Comma))
4893  return TokError("arch name required, comma expected");
4894 
4895  if (!parseString(ArchName, "invalid arch name"))
4896  return true;
4897 
4898  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4899  VendorName, ArchName);
4900  return false;
4901 }
4902 
4903 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4904  amd_kernel_code_t &Header) {
4905  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4906  // assembly for backwards compatibility.
4907  if (ID == "max_scratch_backing_memory_byte_size") {
4908  Parser.eatToEndOfStatement();
4909  return false;
4910  }
4911 
4912  SmallString<40> ErrStr;
4913  raw_svector_ostream Err(ErrStr);
4914  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4915  return TokError(Err.str());
4916  }
4917  Lex();
4918 
4919  if (ID == "enable_wavefront_size32") {
4921  if (!isGFX10Plus())
4922  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4923  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4924  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4925  } else {
4926  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4927  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4928  }
4929  }
4930 
4931  if (ID == "wavefront_size") {
4932  if (Header.wavefront_size == 5) {
4933  if (!isGFX10Plus())
4934  return TokError("wavefront_size=5 is only allowed on GFX10+");
4935  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4936  return TokError("wavefront_size=5 requires +WavefrontSize32");
4937  } else if (Header.wavefront_size == 6) {
4938  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4939  return TokError("wavefront_size=6 requires +WavefrontSize64");
4940  }
4941  }
4942 
4943  if (ID == "enable_wgp_mode") {
4945  !isGFX10Plus())
4946  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4947  }
4948 
4949  if (ID == "enable_mem_ordered") {
4951  !isGFX10Plus())
4952  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4953  }
4954 
4955  if (ID == "enable_fwd_progress") {
4957  !isGFX10Plus())
4958  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4959  }
4960 
4961  return false;
4962 }
4963 
4964 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4965  amd_kernel_code_t Header;
4966  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4967 
4968  while (true) {
4969  // Lex EndOfStatement. This is in a while loop, because lexing a comment
4970  // will set the current token to EndOfStatement.
4971  while(trySkipToken(AsmToken::EndOfStatement));
4972 
4973  StringRef ID;
4974  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4975  return true;
4976 
4977  if (ID == ".end_amd_kernel_code_t")
4978  break;
4979 
4980  if (ParseAMDKernelCodeTValue(ID, Header))
4981  return true;
4982  }
4983 
4984  getTargetStreamer().EmitAMDKernelCodeT(Header);
4985 
4986  return false;
4987 }
4988 
4989 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4990  StringRef KernelName;
4991  if (!parseId(KernelName, "expected symbol name"))
4992  return true;
4993 
4994  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4996 
4997  KernelScope.initialize(getContext());
4998  return false;
4999 }
5000 
5001 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5002  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5003  return Error(getLoc(),
5004  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5005  "architectures");
5006  }
5007 
5008  auto TargetIDDirective = getLexer().getTok().getStringContents();
5009  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5010  return Error(getParser().getTok().getLoc(), "target id must match options");
5011 
5012  getTargetStreamer().EmitISAVersion();
5013  Lex();
5014 
5015  return false;
5016 }
5017 
5018 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5019  const char *AssemblerDirectiveBegin;
5020  const char *AssemblerDirectiveEnd;
5022  isHsaAbiVersion3Or4(&getSTI())
5023  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5025  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5026  HSAMD::AssemblerDirectiveEnd);
5027 
5028  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5029  return Error(getLoc(),
5030  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5031  "not available on non-amdhsa OSes")).str());
5032  }
5033 
5034  std::string HSAMetadataString;
5035  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5036  HSAMetadataString))
5037  return true;
5038 
5039  if (isHsaAbiVersion3Or4(&getSTI())) {
5040  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5041  return Error(getLoc(), "invalid HSA metadata");
5042  } else {
5043  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5044  return Error(getLoc(), "invalid HSA metadata");
5045  }
5046 
5047  return false;
5048 }
5049 
5050 /// Common code to parse out a block of text (typically YAML) between start and
5051 /// end directives.
5052 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5053  const char *AssemblerDirectiveEnd,
5054  std::string &CollectString) {
5055 
5056  raw_string_ostream CollectStream(CollectString);
5057 
5058  getLexer().setSkipSpace(false);
5059 
5060  bool FoundEnd = false;
5061  while (!isToken(AsmToken::Eof)) {
5062  while (isToken(AsmToken::Space)) {
5063  CollectStream << getTokenStr();
5064  Lex();
5065  }
5066 
5067  if (trySkipId(AssemblerDirectiveEnd)) {
5068  FoundEnd = true;
5069  break;
5070  }
5071 
5072  CollectStream << Parser.parseStringToEndOfStatement()
5073  << getContext().getAsmInfo()->getSeparatorString();
5074 
5075  Parser.eatToEndOfStatement();
5076  }
5077 
5078  getLexer().setSkipSpace(true);
5079 
5080  if (isToken(AsmToken::Eof) && !FoundEnd) {
5081  return TokError(Twine("expected directive ") +
5082  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5083  }
5084 
5085  CollectStream.flush();
5086  return false;
5087 }
5088 
5089 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5090 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5091  std::string String;
5092  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5094  return true;
5095 
5096  auto PALMetadata = getTargetStreamer().getPALMetadata();
5097  if (!PALMetadata->setFromString(String))
5098  return Error(getLoc(), "invalid PAL metadata");
5099  return false;
5100 }
5101 
5102 /// Parse the assembler directive for old linear-format PAL metadata.
5103 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5104  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5105  return Error(getLoc(),
5106  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5107  "not available on non-amdpal OSes")).str());
5108  }
5109 
5110  auto PALMetadata = getTargetStreamer().getPALMetadata();
5111  PALMetadata->setLegacy();
5112  for (;;) {
5113  uint32_t Key, Value;
5114  if (ParseAsAbsoluteExpression(Key)) {
5115  return TokError(Twine("invalid value in ") +
5117  }
5118  if (!trySkipToken(AsmToken::Comma)) {
5119  return TokError(Twine("expected an even number of values in ") +
5121  }
5122  if (ParseAsAbsoluteExpression(Value)) {
5123  return TokError(Twine("invalid value in ") +
5125  }
5126  PALMetadata->setRegister(Key, Value);
5127  if (!trySkipToken(AsmToken::Comma))
5128  break;
5129  }
5130  return false;
5131 }
5132 
5133 /// ParseDirectiveAMDGPULDS
5134 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5135 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5136  if (getParser().checkForValidSection())
5137  return true;
5138 
5139  StringRef Name;
5140  SMLoc NameLoc = getLoc();
5141  if (getParser().parseIdentifier(Name))
5142  return TokError("expected identifier in directive");
5143 
5144  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5145  if (parseToken(AsmToken::Comma, "expected ','"))
5146  return true;
5147 
5148  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5149 
5150  int64_t Size;
5151  SMLoc SizeLoc = getLoc();
5152  if (getParser().parseAbsoluteExpression(Size))
5153  return true;
5154  if (Size < 0)
5155  return Error(SizeLoc, "size must be non-negative");
5156  if (Size > LocalMemorySize)
5157  return Error(SizeLoc, "size is too large");
5158 
5159  int64_t Alignment = 4;
5160  if (trySkipToken(AsmToken::Comma)) {
5161  SMLoc AlignLoc = getLoc();
5162  if (getParser().parseAbsoluteExpression(Alignment))
5163  return true;
5164  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5165  return Error(AlignLoc, "alignment must be a power of two");
5166 
5167  // Alignment larger than the size of LDS is possible in theory, as long
5168  // as the linker manages to place to symbol at address 0, but we do want
5169  // to make sure the alignment fits nicely into a 32-bit integer.
5170  if (Alignment >= 1u << 31)
5171  return Error(AlignLoc, "alignment is too large");
5172  }
5173 
5174  if (parseToken(AsmToken::EndOfStatement,
5175  "unexpected token in '.amdgpu_lds' directive"))
5176  return true;
5177 
5178  Symbol->redefineIfPossible();
5179  if (!Symbol->isUndefined())
5180  return Error(NameLoc, "invalid symbol redefinition");
5181 
5182  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5183  return false;
5184 }
5185 
5186 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5187  StringRef IDVal = DirectiveID.getString();
5188 
5189  if (isHsaAbiVersion3Or4(&getSTI())) {
5190  if (IDVal == ".amdhsa_kernel")
5191  return ParseDirectiveAMDHSAKernel();
5192 
5193  // TODO: Restructure/combine with PAL metadata directive.
5195  return ParseDirectiveHSAMetadata();
5196  } else {
5197  if (IDVal == ".hsa_code_object_version")
5198  return ParseDirectiveHSACodeObjectVersion();
5199 
5200  if (IDVal == ".hsa_code_object_isa")
5201  return ParseDirectiveHSACodeObjectISA();
5202 
5203  if (IDVal == ".amd_kernel_code_t")
5204  return ParseDirectiveAMDKernelCodeT();
5205 
5206  if (IDVal == ".amdgpu_hsa_kernel")
5207  return ParseDirectiveAMDGPUHsaKernel();
5208 
5209  if (IDVal == ".amd_amdgpu_isa")
5210  return ParseDirectiveISAVersion();
5211 
5213  return ParseDirectiveHSAMetadata();
5214  }
5215 
5216  if (IDVal == ".amdgcn_target")
5217  return ParseDirectiveAMDGCNTarget();
5218 
5219  if (IDVal == ".amdgpu_lds")
5220  return ParseDirectiveAMDGPULDS();
5221 
5222  if (IDVal == PALMD::AssemblerDirectiveBegin)
5223  return ParseDirectivePALMetadataBegin();
5224 
5225  if (IDVal == PALMD::AssemblerDirective)
5226  return ParseDirectivePALMetadata();
5227 
5228  return true;
5229 }
5230 
5231 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5232  unsigned RegNo) {
5233 
5234  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5235  R.isValid(); ++R) {
5236  if (*R == RegNo)
5237  return isGFX9Plus();
5238  }
5239 
5240  // GFX10 has 2 more SGPRs 104 and 105.
5241  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5242  R.isValid(); ++R) {
5243  if (*R == RegNo)
5244  return hasSGPR104_SGPR105();
5245  }
5246 
5247  switch (RegNo) {
5248  case AMDGPU::SRC_SHARED_BASE:
5249  case AMDGPU::SRC_SHARED_LIMIT:
5250  case AMDGPU::SRC_PRIVATE_BASE:
5251  case AMDGPU::SRC_PRIVATE_LIMIT:
5252  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5253  return isGFX9Plus();
5254  case AMDGPU::TBA:
5255  case AMDGPU::TBA_LO:
5256  case AMDGPU::TBA_HI:
5257  case AMDGPU::TMA:
5258  case AMDGPU::TMA_LO:
5259  case AMDGPU::TMA_HI:
5260  return !isGFX9Plus();
5261  case AMDGPU::XNACK_MASK:
5262  case AMDGPU::XNACK_MASK_LO:
5263  case AMDGPU::XNACK_MASK_HI:
5264  return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5265  case AMDGPU::SGPR_NULL:
5266  return isGFX10Plus();
5267  default:
5268  break;
5269  }
5270 
5271  if (isCI())
5272  return true;
5273 
5274  if (isSI() || isGFX10Plus()) {
5275  // No flat_scr on SI.
5276  // On GFX10 flat scratch is not a valid register operand and can only be
5277  // accessed with s_setreg/s_getreg.
5278  switch (RegNo) {
5279  case AMDGPU::FLAT_SCR:
5280  case AMDGPU::FLAT_SCR_LO:
5281  case AMDGPU::FLAT_SCR_HI:
5282  return false;
5283  default:
5284  return true;
5285  }
5286  }
5287 
5288  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5289  // SI/CI have.
5290  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5291  R.isValid(); ++R) {
5292  if (*R == RegNo)
5293  return hasSGPR102_SGPR103();
5294  }
5295 
5296  return true;
5297 }
5298 
5300 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5301  OperandMode Mode) {
5302  // Try to parse with a custom parser
5303  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5304 
5305  // If we successfully parsed the operand or if there as an error parsing,
5306  // we are done.
5307  //
5308  // If we are parsing after we reach EndOfStatement then this means we
5309  // are appending default values to the Operands list. This is only done
5310  // by custom parser, so we shouldn't continue on to the generic parsing.
5311  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5312  isToken(AsmToken::EndOfStatement))
5313  return ResTy;
5314 
5315  SMLoc RBraceLoc;
5316  SMLoc LBraceLoc = getLoc();
5317  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5318  unsigned Prefix = Operands.size();
5319 
5320  for (;;) {
5321  auto Loc = getLoc();
5322  ResTy = parseReg(Operands);
5323  if (ResTy == MatchOperand_NoMatch)
5324  Error(Loc, "expected a register");
5325  if (ResTy != MatchOperand_Success)
5326  return MatchOperand_ParseFail;
5327 
5328  RBraceLoc = getLoc();
5329  if (trySkipToken(AsmToken::RBrac))
5330  break;
5331 
5332  if (!skipToken(AsmToken::Comma,
5333  "expected a comma or a closing square bracket")) {
5334  return MatchOperand_ParseFail;
5335  }
5336  }
5337 
5338  if (Operands.size() - Prefix > 1) {
5339  Operands.insert(Operands.begin() + Prefix,
5340  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5341  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5342  }
5343 
5344  return MatchOperand_Success;
5345  }
5346 
5347  return parseRegOrImm(Operands);
5348 }
5349 
5350 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5351  // Clear any forced encodings from the previous instruction.
5352  setForcedEncodingSize(0);
5353  setForcedDPP(false);
5354  setForcedSDWA(false);
5355 
5356  if (Name.endswith("_e64")) {
5357  setForcedEncodingSize(64);
5358  return Name.substr(0, Name.size() - 4);
5359  } else if (Name.endswith("_e32")) {
5360  setForcedEncodingSize(32);
5361  return Name.substr(0, Name.size() - 4);
5362  } else if (Name.endswith("_dpp")) {
5363  setForcedDPP(true);
5364  return Name.substr(0, Name.size() - 4);
5365  } else if (Name.endswith("_sdwa")) {
5366  setForcedSDWA(true);
5367  return Name.substr(0, Name.size() - 5);
5368  }
5369  return Name;
5370 }
5371 
5372 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5373  StringRef Name,
5374  SMLoc NameLoc, OperandVector &Operands) {
5375  // Add the instruction mnemonic
5376  Name = parseMnemonicSuffix(Name);
5377  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5378 
5379  bool IsMIMG = Name.startswith("image_");
5380 
5381  while (!trySkipToken(AsmToken::EndOfStatement)) {
5382  OperandMode Mode = OperandMode_Default;
5383  if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5384  Mode = OperandMode_NSA;
5385  CPolSeen = 0;
5386  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5387 
5388  if (Res != MatchOperand_Success) {
5389  checkUnsupportedInstruction(Name, NameLoc);
5390  if (!Parser.hasPendingError()) {
5391  // FIXME: use real operand location rather than the current location.
5392  StringRef Msg =
5393  (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5394  "not a valid operand.";
5395  Error(getLoc(), Msg);
5396  }
5397  while (!trySkipToken(AsmToken::EndOfStatement)) {
5398  lex();
5399  }
5400  return true;
5401  }
5402 
5403  // Eat the comma or space if there is one.
5404  trySkipToken(AsmToken::Comma);
5405  }
5406 
5407  return false;
5408 }
5409 
5410 //===----------------------------------------------------------------------===//
5411 // Utility functions
5412 //===----------------------------------------------------------------------===//
5413 
5415 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5416 
5417  if (!trySkipId(Prefix, AsmToken::Colon))
5418  return MatchOperand_NoMatch;
5419 
5420  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5421 }
5422 
5424 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5425  AMDGPUOperand::ImmTy ImmTy,
5426  bool (*ConvertResult)(int64_t&)) {
5427  SMLoc S = getLoc();
5428  int64_t Value = 0;
5429 
5430  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5431  if (Res != MatchOperand_Success)
5432  return Res;
5433 
5434  if (ConvertResult && !ConvertResult(Value)) {
5435  Error(S, "invalid " + StringRef(Prefix) + " value.");
5436  }
5437 
5438  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5439  return MatchOperand_Success;
5440 }
5441 
5443 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5445  AMDGPUOperand::ImmTy ImmTy,
5446  bool (*ConvertResult)(int64_t&)) {
5447  SMLoc S = getLoc();
5448  if (!trySkipId(Prefix, AsmToken::Colon))
5449  return MatchOperand_NoMatch;
5450 
5451  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5452  return MatchOperand_ParseFail;
5453 
5454  unsigned Val = 0;
5455  const unsigned MaxSize = 4;
5456 
5457  // FIXME: How to verify the number of elements matches the number of src
5458  // operands?
5459  for (int I = 0; ; ++I) {
5460  int64_t Op;
5461  SMLoc Loc = getLoc();
5462  if (!parseExpr(Op))
5463  return MatchOperand_ParseFail;
5464 
5465  if (Op != 0 && Op != 1) {
5466  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5467  return MatchOperand_ParseFail;
5468  }
5469 
5470  Val |= (Op << I);
5471 
5472  if (trySkipToken(AsmToken::RBrac))
5473  break;
5474 
5475  if (I + 1 == MaxSize) {
5476  Error(getLoc(), "expected a closing square bracket");
5477  return MatchOperand_ParseFail;
5478  }
5479 
5480  if (!skipToken(AsmToken::Comma, "expected a comma"))
5481  return MatchOperand_ParseFail;
5482  }
5483 
5484  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5485  return MatchOperand_Success;
5486 }
5487 
5489 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5490  AMDGPUOperand::ImmTy ImmTy) {
5491  int64_t Bit;
5492  SMLoc S = getLoc();
5493 
5494  if (trySkipId(Name)) {
5495  Bit = 1;
5496  } else if (trySkipId("no", Name)) {
5497  Bit = 0;
5498  } else {
5499  return MatchOperand_NoMatch;
5500  }
5501 
5502  if (Name == "r128" && !hasMIMG_R128()) {
5503  Error(S, "r128 modifier is not supported on this GPU");
5504  return MatchOperand_ParseFail;
5505  }
5506  if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5507  Error(S, "a16 modifier is not supported on this GPU");
5508  return MatchOperand_ParseFail;
5509  }
5510 
5511  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5512  ImmTy = AMDGPUOperand::ImmTyR128A16;
5513 
5514  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5515  return MatchOperand_Success;
5516 }
5517 
5519 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5520  unsigned CPolOn = 0;
5521  unsigned CPolOff = 0;
5522  SMLoc S = getLoc();
5523 
5524  if (trySkipId("glc"))
5525  CPolOn = AMDGPU::CPol::GLC;
5526  else if (trySkipId("noglc"))
5527  CPolOff = AMDGPU::CPol::GLC;
5528  else if (trySkipId("slc"))
5529  CPolOn = AMDGPU::CPol::SLC;
5530  else if (trySkipId("noslc"))
5531  CPolOff = AMDGPU::CPol::SLC;
5532  else if (trySkipId("dlc"))
5533  CPolOn = AMDGPU::CPol::DLC;
5534  else if (trySkipId("nodlc"))
5535  CPolOff = AMDGPU::CPol::DLC;
5536  else if (trySkipId("scc"))
5537  CPolOn = AMDGPU::CPol::SCC;
5538  else if (trySkipId("noscc"))
5539  CPolOff = AMDGPU::CPol::SCC;
5540  else
5541  return MatchOperand_NoMatch;
5542 
5543  if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5544  Error(S, "dlc modifier is not supported on this GPU");
5545  return MatchOperand_ParseFail;
5546  }
5547 
5548  if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5549  Error(S, "scc modifier is not supported on this GPU");
5550  return MatchOperand_ParseFail;
5551  }
5552 
5553  if (CPolSeen & (CPolOn | CPolOff)) {
5554  Error(S, "duplicate cache policy modifier");
5555  return MatchOperand_ParseFail;
5556  }
5557 
5558  CPolSeen |= (CPolOn | CPolOff);
5559 
5560  for (unsigned I = 1; I != Operands.size(); ++I) {
5561  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5562  if (Op.isCPol()) {
5563  Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5564  return MatchOperand_Success;
5565  }
5566  }
5567 
5568  Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5569  AMDGPUOperand::ImmTyCPol));
5570 
5571  return MatchOperand_Success;
5572 }
5573 
5575  MCInst& Inst, const OperandVector& Operands,
5576  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5577  AMDGPUOperand::ImmTy ImmT,
5578  int64_t Default = 0) {
5579  auto i = OptionalIdx.find(ImmT);
5580  if (i != OptionalIdx.end()) {
5581  unsigned Idx = i->second;
5582  ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5583  } else {
5584  Inst.addOperand(MCOperand::createImm(Default));
5585  }
5586 }
5587 
5589 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5590  StringRef &Value,
5591  SMLoc &StringLoc) {
5592  if (!trySkipId(Prefix, AsmToken::Colon))
5593  return MatchOperand_NoMatch;
5594 
5595  StringLoc = getLoc();
5596  return parseId(Value, "expected an identifier") ? MatchOperand_Success
5598 }
5599 
5600 //===----------------------------------------------------------------------===//
5601 // MTBUF format
5602 //===----------------------------------------------------------------------===//
5603 
5604 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5605  int64_t MaxVal,
5606  int64_t &Fmt) {
5607  int64_t Val;
5608  SMLoc Loc = getLoc();
5609 
5610  auto Res = parseIntWithPrefix(Pref, Val);
5611  if (Res == MatchOperand_ParseFail)
5612  return false;
5613  if (Res == MatchOperand_NoMatch)
5614  return true;
5615 
5616  if (Val < 0 || Val > MaxVal) {
5617  Error(Loc, Twine("out of range ", StringRef(Pref)));
5618  return false;
5619  }
5620 
5621  Fmt = Val;
5622  return true;
5623 }
5624 
5625 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5626 // values to live in a joint format operand in the MCInst encoding.
5628 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5629  using namespace llvm::AMDGPU::MTBUFFormat;
5630 
5631  int64_t Dfmt = DFMT_UNDEF;
5632  int64_t Nfmt = NFMT_UNDEF;
5633 
5634  // dfmt and nfmt can appear in either order, and each is optional.
5635  for (int I = 0; I < 2; ++I) {
5636  if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5637  return MatchOperand_ParseFail;
5638 
5639  if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5640  return MatchOperand_ParseFail;
5641  }
5642  // Skip optional comma between dfmt/nfmt
5643  // but guard against 2 commas following each other.
5644  if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5645  !peekToken().is(AsmToken::Comma)) {
5646  trySkipToken(AsmToken::Comma);
5647  }
5648  }
5649 
5650  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5651  return MatchOperand_NoMatch;
5652 
5653  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5654  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5655 
5656  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5657  return MatchOperand_Success;
5658 }
5659 
5661 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5662  using namespace llvm::AMDGPU::MTBUFFormat;
5663 
5664  int64_t Fmt = UFMT_UNDEF;
5665 
5666  if (!tryParseFmt("format", UFMT_MAX, Fmt))
5667  return MatchOperand_ParseFail;
5668 
5669  if (Fmt == UFMT_UNDEF)
5670  return MatchOperand_NoMatch;
5671 
5672  Format = Fmt;
5673  return MatchOperand_Success;
5674 }
5675 
5676 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5677  int64_t &Nfmt,
5678  StringRef FormatStr,
5679  SMLoc Loc) {
5680  using namespace llvm::AMDGPU::MTBUFFormat;
5681  int64_t Format;
5682 
5683  Format = getDfmt(FormatStr);
5684  if (Format != DFMT_UNDEF) {
5685  Dfmt = Format;
5686  return true;
5687  }
5688 
5689  Format = getNfmt(FormatStr, getSTI());
5690  if (Format != NFMT_UNDEF) {
5691  Nfmt = Format;
5692  return true;
5693  }
5694 
5695  Error(Loc, "unsupported format");
5696  return false;
5697 }
5698 
5700 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5701  SMLoc FormatLoc,
5702  int64_t &Format) {
5703  using namespace llvm::AMDGPU::MTBUFFormat;
5704 
5705  int64_t Dfmt = DFMT_UNDEF;
5706  int64_t Nfmt = NFMT_UNDEF;
5707  if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5708  return MatchOperand_ParseFail;
5709 
5710  if (trySkipToken(AsmToken::Comma)) {
5711  StringRef Str;
5712  SMLoc Loc = getLoc();
5713  if (!parseId(Str, "expected a format string") ||
5714  !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5715  return MatchOperand_ParseFail;
5716  }
5717  if (Dfmt == DFMT_UNDEF) {
5718  Error(Loc, "duplicate numeric format");
5719  return MatchOperand_ParseFail;
5720  } else if (Nfmt == NFMT_UNDEF) {
5721  Error(Loc, "duplicate data format");
5722  return MatchOperand_ParseFail;
5723  }
5724  }
5725 
5726  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5727  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5728 
5729  if (isGFX10Plus()) {
5730  auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5731  if (Ufmt == UFMT_UNDEF) {
5732  Error(FormatLoc, "unsupported format");
5733  return MatchOperand_ParseFail;
5734  }
5735  Format = Ufmt;
5736  } else {
5737  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5738  }
5739 
5740  return MatchOperand_Success;
5741 }
5742 
5744 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5745  SMLoc Loc,
5746  int64_t &Format) {
5747  using namespace llvm::AMDGPU::MTBUFFormat;
5748 
5749  auto Id = getUnifiedFormat(FormatStr);
5750  if (Id == UFMT_UNDEF)
5751  return MatchOperand_NoMatch;
5752 
5753  if (!isGFX10Plus()) {
5754  Error(Loc, "unified format is not supported on this GPU");
5755  return MatchOperand_ParseFail;
5756  }
5757 
5758  Format = Id;
5759  return MatchOperand_Success;
5760 }
5761 
5763 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5764  using namespace llvm::AMDGPU::MTBUFFormat;
5765  SMLoc Loc = getLoc();
5766 
5767  if (!parseExpr(Format))
5768  return MatchOperand_ParseFail;
5769  if (!isValidFormatEncoding(Format, getSTI())) {
5770  Error(Loc, "out of range format");
5771  return MatchOperand_ParseFail;
5772  }
5773 
5774  return MatchOperand_Success;
5775 }
5776 
5778 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5779  using namespace llvm::AMDGPU::MTBUFFormat;
5780 
5781  if (!trySkipId("format", AsmToken::Colon))
5782  return MatchOperand_NoMatch;
5783 
5784  if (trySkipToken(AsmToken::LBrac)) {
5785  StringRef FormatStr;
5786  SMLoc Loc = getLoc();
5787  if (!parseId(FormatStr, "expected a format string"))
5788  return MatchOperand_ParseFail;
5789 
5790  auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5791  if (Res == MatchOperand_NoMatch)
5792  Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5793  if (Res != MatchOperand_Success)
5794  return Res;
5795 
5796  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5797  return MatchOperand_ParseFail;
5798 
5799  return MatchOperand_Success;
5800  }
5801 
5802  return parseNumericFormat(Format);
5803 }
5804 
5806 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5807  using namespace llvm::AMDGPU::MTBUFFormat;
5808 
5809  int64_t Format = getDefaultFormatEncoding(getSTI());
5811  SMLoc Loc = getLoc();
5812 
5813  // Parse legacy format syntax.
5814  Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5815  if (Res == MatchOperand_ParseFail)
5816  return Res;
5817 
5818  bool FormatFound = (Res == MatchOperand_Success);
5819 
5820  Operands.push_back(
5821  AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5822 
5823  if (FormatFound)
5824  trySkipToken(AsmToken::Comma);
5825 
5826  if (isToken(AsmToken::EndOfStatement)) {
5827  // We are expecting an soffset operand,
5828  // but let matcher handle the error.
5829  return MatchOperand_Success;
5830  }
5831 
5832  // Parse soffset.
5833  Res = parseRegOrImm(Operands);
5834  if (Res != MatchOperand_Success)
5835  return Res;
5836 
5837  trySkipToken(AsmToken::Comma);
5838 
5839  if (!FormatFound) {
5840  Res = parseSymbolicOrNumericFormat(Format);
5841  if (Res == MatchOperand_ParseFail)
5842  return Res;
5843  if (Res == MatchOperand_Success) {
5844  auto Size = Operands.size();
5845  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5846  assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5847  Op.setImm(Format);
5848  }
5849  return MatchOperand_Success;
5850  }
5851 
5852  if (isId("format") && peekToken().is(AsmToken::Colon)) {
5853  Error(getLoc(), "duplicate format");
5854  return MatchOperand_ParseFail;
5855  }
5856  return MatchOperand_Success;
5857 }
5858 
5859 //===----------------------------------------------------------------------===//
5860 // ds
5861 //===----------------------------------------------------------------------===//
5862 
5863 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5864  const OperandVector &Operands) {
5865  OptionalImmIndexMap OptionalIdx;
5866 
5867  for (unsigned i = 1, e = Operands.size(); i != e; ++