LLVM  14.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
30 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/Support/Casting.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53  enum KindTy {
54  Token,
55  Immediate,
56  Register,
58  } Kind;
59 
60  SMLoc StartLoc, EndLoc;
61  const AMDGPUAsmParser *AsmParser;
62 
63 public:
64  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65  : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67  using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69  struct Modifiers {
70  bool Abs = false;
71  bool Neg = false;
72  bool Sext = false;
73 
74  bool hasFPModifiers() const { return Abs || Neg; }
75  bool hasIntModifiers() const { return Sext; }
76  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78  int64_t getFPModifiersOperand() const {
79  int64_t Operand = 0;
80  Operand |= Abs ? SISrcMods::ABS : 0u;
81  Operand |= Neg ? SISrcMods::NEG : 0u;
82  return Operand;
83  }
84 
85  int64_t getIntModifiersOperand() const {
86  int64_t Operand = 0;
87  Operand |= Sext ? SISrcMods::SEXT : 0u;
88  return Operand;
89  }
90 
91  int64_t getModifiersOperand() const {
92  assert(!(hasFPModifiers() && hasIntModifiers())
93  && "fp and int modifiers should not be used simultaneously");
94  if (hasFPModifiers()) {
95  return getFPModifiersOperand();
96  } else if (hasIntModifiers()) {
97  return getIntModifiersOperand();
98  } else {
99  return 0;
100  }
101  }
102 
103  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104  };
105 
106  enum ImmTy {
107  ImmTyNone,
108  ImmTyGDS,
109  ImmTyLDS,
110  ImmTyOffen,
111  ImmTyIdxen,
112  ImmTyAddr64,
113  ImmTyOffset,
114  ImmTyInstOffset,
115  ImmTyOffset0,
116  ImmTyOffset1,
117  ImmTyCPol,
118  ImmTySWZ,
119  ImmTyTFE,
120  ImmTyD16,
121  ImmTyClampSI,
122  ImmTyOModSI,
123  ImmTyDPP8,
124  ImmTyDppCtrl,
125  ImmTyDppRowMask,
126  ImmTyDppBankMask,
127  ImmTyDppBoundCtrl,
128  ImmTyDppFi,
129  ImmTySdwaDstSel,
130  ImmTySdwaSrc0Sel,
131  ImmTySdwaSrc1Sel,
132  ImmTySdwaDstUnused,
133  ImmTyDMask,
134  ImmTyDim,
135  ImmTyUNorm,
136  ImmTyDA,
137  ImmTyR128A16,
138  ImmTyA16,
139  ImmTyLWE,
140  ImmTyExpTgt,
141  ImmTyExpCompr,
142  ImmTyExpVM,
143  ImmTyFORMAT,
144  ImmTyHwreg,
145  ImmTyOff,
146  ImmTySendMsg,
147  ImmTyInterpSlot,
148  ImmTyInterpAttr,
149  ImmTyAttrChan,
150  ImmTyOpSel,
151  ImmTyOpSelHi,
152  ImmTyNegLo,
153  ImmTyNegHi,
154  ImmTySwizzle,
155  ImmTyGprIdxMode,
156  ImmTyHigh,
157  ImmTyBLGP,
158  ImmTyCBSZ,
159  ImmTyABID,
160  ImmTyEndpgm,
161  };
162 
163  enum ImmKindTy {
164  ImmKindTyNone,
165  ImmKindTyLiteral,
166  ImmKindTyConst,
167  };
168 
169 private:
170  struct TokOp {
171  const char *Data;
172  unsigned Length;
173  };
174 
175  struct ImmOp {
176  int64_t Val;
177  ImmTy Type;
178  bool IsFPImm;
179  mutable ImmKindTy Kind;
180  Modifiers Mods;
181  };
182 
183  struct RegOp {
184  unsigned RegNo;
185  Modifiers Mods;
186  };
187 
188  union {
189  TokOp Tok;
190  ImmOp Imm;
191  RegOp Reg;
192  const MCExpr *Expr;
193  };
194 
195 public:
196  bool isToken() const override {
197  if (Kind == Token)
198  return true;
199 
200  // When parsing operands, we can't always tell if something was meant to be
201  // a token, like 'gds', or an expression that references a global variable.
202  // In this case, we assume the string is an expression, and if we need to
203  // interpret is a token, then we treat the symbol name as the token.
204  return isSymbolRefExpr();
205  }
206 
207  bool isSymbolRefExpr() const {
208  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209  }
210 
211  bool isImm() const override {
212  return Kind == Immediate;
213  }
214 
215  void setImmKindNone() const {
216  assert(isImm());
217  Imm.Kind = ImmKindTyNone;
218  }
219 
220  void setImmKindLiteral() const {
221  assert(isImm());
222  Imm.Kind = ImmKindTyLiteral;
223  }
224 
225  void setImmKindConst() const {
226  assert(isImm());
227  Imm.Kind = ImmKindTyConst;
228  }
229 
230  bool IsImmKindLiteral() const {
231  return isImm() && Imm.Kind == ImmKindTyLiteral;
232  }
233 
234  bool isImmKindConst() const {
235  return isImm() && Imm.Kind == ImmKindTyConst;
236  }
237 
238  bool isInlinableImm(MVT type) const;
239  bool isLiteralImm(MVT type) const;
240 
241  bool isRegKind() const {
242  return Kind == Register;
243  }
244 
245  bool isReg() const override {
246  return isRegKind() && !hasModifiers();
247  }
248 
249  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250  return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251  }
252 
253  bool isRegOrImmWithInt16InputMods() const {
254  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255  }
256 
257  bool isRegOrImmWithInt32InputMods() const {
258  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259  }
260 
261  bool isRegOrImmWithInt64InputMods() const {
262  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263  }
264 
265  bool isRegOrImmWithFP16InputMods() const {
266  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267  }
268 
269  bool isRegOrImmWithFP32InputMods() const {
270  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271  }
272 
273  bool isRegOrImmWithFP64InputMods() const {
274  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275  }
276 
277  bool isVReg() const {
278  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279  isRegClass(AMDGPU::VReg_64RegClassID) ||
280  isRegClass(AMDGPU::VReg_96RegClassID) ||
281  isRegClass(AMDGPU::VReg_128RegClassID) ||
282  isRegClass(AMDGPU::VReg_160RegClassID) ||
283  isRegClass(AMDGPU::VReg_192RegClassID) ||
284  isRegClass(AMDGPU::VReg_256RegClassID) ||
285  isRegClass(AMDGPU::VReg_512RegClassID) ||
286  isRegClass(AMDGPU::VReg_1024RegClassID);
287  }
288 
289  bool isVReg32() const {
290  return isRegClass(AMDGPU::VGPR_32RegClassID);
291  }
292 
293  bool isVReg32OrOff() const {
294  return isOff() || isVReg32();
295  }
296 
297  bool isNull() const {
298  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299  }
300 
301  bool isVRegWithInputMods() const;
302 
303  bool isSDWAOperand(MVT type) const;
304  bool isSDWAFP16Operand() const;
305  bool isSDWAFP32Operand() const;
306  bool isSDWAInt16Operand() const;
307  bool isSDWAInt32Operand() const;
308 
309  bool isImmTy(ImmTy ImmT) const {
310  return isImm() && Imm.Type == ImmT;
311  }
312 
313  bool isImmModifier() const {
314  return isImm() && Imm.Type != ImmTyNone;
315  }
316 
317  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319  bool isDMask() const { return isImmTy(ImmTyDMask); }
320  bool isDim() const { return isImmTy(ImmTyDim); }
321  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322  bool isDA() const { return isImmTy(ImmTyDA); }
323  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325  bool isLWE() const { return isImmTy(ImmTyLWE); }
326  bool isOff() const { return isImmTy(ImmTyOff); }
327  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330  bool isOffen() const { return isImmTy(ImmTyOffen); }
331  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338  bool isGDS() const { return isImmTy(ImmTyGDS); }
339  bool isLDS() const { return isImmTy(ImmTyLDS); }
340  bool isCPol() const { return isImmTy(ImmTyCPol); }
341  bool isSWZ() const { return isImmTy(ImmTySWZ); }
342  bool isTFE() const { return isImmTy(ImmTyTFE); }
343  bool isD16() const { return isImmTy(ImmTyD16); }
344  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348  bool isFI() const { return isImmTy(ImmTyDppFi); }
349  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360  bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362  bool isMod() const {
363  return isClampSI() || isOModSI();
364  }
365 
366  bool isRegOrImm() const {
367  return isReg() || isImm();
368  }
369 
370  bool isRegClass(unsigned RCID) const;
371 
372  bool isInlineValue() const;
373 
374  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375  return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376  }
377 
378  bool isSCSrcB16() const {
379  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380  }
381 
382  bool isSCSrcV2B16() const {
383  return isSCSrcB16();
384  }
385 
386  bool isSCSrcB32() const {
387  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388  }
389 
390  bool isSCSrcB64() const {
391  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392  }
393 
394  bool isBoolReg() const;
395 
396  bool isSCSrcF16() const {
397  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398  }
399 
400  bool isSCSrcV2F16() const {
401  return isSCSrcF16();
402  }
403 
404  bool isSCSrcF32() const {
405  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406  }
407 
408  bool isSCSrcF64() const {
409  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410  }
411 
412  bool isSSrcB32() const {
413  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414  }
415 
416  bool isSSrcB16() const {
417  return isSCSrcB16() || isLiteralImm(MVT::i16);
418  }
419 
420  bool isSSrcV2B16() const {
421  llvm_unreachable("cannot happen");
422  return isSSrcB16();
423  }
424 
425  bool isSSrcB64() const {
426  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427  // See isVSrc64().
428  return isSCSrcB64() || isLiteralImm(MVT::i64);
429  }
430 
431  bool isSSrcF32() const {
432  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433  }
434 
435  bool isSSrcF64() const {
436  return isSCSrcB64() || isLiteralImm(MVT::f64);
437  }
438 
439  bool isSSrcF16() const {
440  return isSCSrcB16() || isLiteralImm(MVT::f16);
441  }
442 
443  bool isSSrcV2F16() const {
444  llvm_unreachable("cannot happen");
445  return isSSrcF16();
446  }
447 
448  bool isSSrcV2FP32() const {
449  llvm_unreachable("cannot happen");
450  return isSSrcF32();
451  }
452 
453  bool isSCSrcV2FP32() const {
454  llvm_unreachable("cannot happen");
455  return isSCSrcF32();
456  }
457 
458  bool isSSrcV2INT32() const {
459  llvm_unreachable("cannot happen");
460  return isSSrcB32();
461  }
462 
463  bool isSCSrcV2INT32() const {
464  llvm_unreachable("cannot happen");
465  return isSCSrcB32();
466  }
467 
468  bool isSSrcOrLdsB32() const {
469  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470  isLiteralImm(MVT::i32) || isExpr();
471  }
472 
473  bool isVCSrcB32() const {
474  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475  }
476 
477  bool isVCSrcB64() const {
478  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479  }
480 
481  bool isVCSrcB16() const {
482  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483  }
484 
485  bool isVCSrcV2B16() const {
486  return isVCSrcB16();
487  }
488 
489  bool isVCSrcF32() const {
490  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491  }
492 
493  bool isVCSrcF64() const {
494  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495  }
496 
497  bool isVCSrcF16() const {
498  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499  }
500 
501  bool isVCSrcV2F16() const {
502  return isVCSrcF16();
503  }
504 
505  bool isVSrcB32() const {
506  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507  }
508 
509  bool isVSrcB64() const {
510  return isVCSrcF64() || isLiteralImm(MVT::i64);
511  }
512 
513  bool isVSrcB16() const {
514  return isVCSrcB16() || isLiteralImm(MVT::i16);
515  }
516 
517  bool isVSrcV2B16() const {
518  return isVSrcB16() || isLiteralImm(MVT::v2i16);
519  }
520 
521  bool isVCSrcV2FP32() const {
522  return isVCSrcF64();
523  }
524 
525  bool isVSrcV2FP32() const {
526  return isVSrcF64() || isLiteralImm(MVT::v2f32);
527  }
528 
529  bool isVCSrcV2INT32() const {
530  return isVCSrcB64();
531  }
532 
533  bool isVSrcV2INT32() const {
534  return isVSrcB64() || isLiteralImm(MVT::v2i32);
535  }
536 
537  bool isVSrcF32() const {
538  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539  }
540 
541  bool isVSrcF64() const {
542  return isVCSrcF64() || isLiteralImm(MVT::f64);
543  }
544 
545  bool isVSrcF16() const {
546  return isVCSrcF16() || isLiteralImm(MVT::f16);
547  }
548 
549  bool isVSrcV2F16() const {
550  return isVSrcF16() || isLiteralImm(MVT::v2f16);
551  }
552 
553  bool isVISrcB32() const {
554  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555  }
556 
557  bool isVISrcB16() const {
558  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559  }
560 
561  bool isVISrcV2B16() const {
562  return isVISrcB16();
563  }
564 
565  bool isVISrcF32() const {
566  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567  }
568 
569  bool isVISrcF16() const {
570  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571  }
572 
573  bool isVISrcV2F16() const {
574  return isVISrcF16() || isVISrcB32();
575  }
576 
577  bool isVISrc_64B64() const {
578  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579  }
580 
581  bool isVISrc_64F64() const {
582  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583  }
584 
585  bool isVISrc_64V2FP32() const {
586  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587  }
588 
589  bool isVISrc_64V2INT32() const {
590  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591  }
592 
593  bool isVISrc_256B64() const {
594  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595  }
596 
597  bool isVISrc_256F64() const {
598  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599  }
600 
601  bool isVISrc_128B16() const {
602  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603  }
604 
605  bool isVISrc_128V2B16() const {
606  return isVISrc_128B16();
607  }
608 
609  bool isVISrc_128B32() const {
610  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611  }
612 
613  bool isVISrc_128F32() const {
614  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615  }
616 
617  bool isVISrc_256V2FP32() const {
618  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619  }
620 
621  bool isVISrc_256V2INT32() const {
622  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623  }
624 
625  bool isVISrc_512B32() const {
626  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627  }
628 
629  bool isVISrc_512B16() const {
630  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631  }
632 
633  bool isVISrc_512V2B16() const {
634  return isVISrc_512B16();
635  }
636 
637  bool isVISrc_512F32() const {
638  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639  }
640 
641  bool isVISrc_512F16() const {
642  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643  }
644 
645  bool isVISrc_512V2F16() const {
646  return isVISrc_512F16() || isVISrc_512B32();
647  }
648 
649  bool isVISrc_1024B32() const {
650  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651  }
652 
653  bool isVISrc_1024B16() const {
654  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655  }
656 
657  bool isVISrc_1024V2B16() const {
658  return isVISrc_1024B16();
659  }
660 
661  bool isVISrc_1024F32() const {
662  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663  }
664 
665  bool isVISrc_1024F16() const {
666  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667  }
668 
669  bool isVISrc_1024V2F16() const {
670  return isVISrc_1024F16() || isVISrc_1024B32();
671  }
672 
673  bool isAISrcB32() const {
674  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675  }
676 
677  bool isAISrcB16() const {
678  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679  }
680 
681  bool isAISrcV2B16() const {
682  return isAISrcB16();
683  }
684 
685  bool isAISrcF32() const {
686  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687  }
688 
689  bool isAISrcF16() const {
690  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691  }
692 
693  bool isAISrcV2F16() const {
694  return isAISrcF16() || isAISrcB32();
695  }
696 
697  bool isAISrc_64B64() const {
698  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699  }
700 
701  bool isAISrc_64F64() const {
702  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703  }
704 
705  bool isAISrc_128B32() const {
706  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707  }
708 
709  bool isAISrc_128B16() const {
710  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711  }
712 
713  bool isAISrc_128V2B16() const {
714  return isAISrc_128B16();
715  }
716 
717  bool isAISrc_128F32() const {
718  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719  }
720 
721  bool isAISrc_128F16() const {
722  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723  }
724 
725  bool isAISrc_128V2F16() const {
726  return isAISrc_128F16() || isAISrc_128B32();
727  }
728 
729  bool isVISrc_128F16() const {
730  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731  }
732 
733  bool isVISrc_128V2F16() const {
734  return isVISrc_128F16() || isVISrc_128B32();
735  }
736 
737  bool isAISrc_256B64() const {
738  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739  }
740 
741  bool isAISrc_256F64() const {
742  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743  }
744 
745  bool isAISrc_512B32() const {
746  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747  }
748 
749  bool isAISrc_512B16() const {
750  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751  }
752 
753  bool isAISrc_512V2B16() const {
754  return isAISrc_512B16();
755  }
756 
757  bool isAISrc_512F32() const {
758  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759  }
760 
761  bool isAISrc_512F16() const {
762  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763  }
764 
765  bool isAISrc_512V2F16() const {
766  return isAISrc_512F16() || isAISrc_512B32();
767  }
768 
769  bool isAISrc_1024B32() const {
770  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771  }
772 
773  bool isAISrc_1024B16() const {
774  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775  }
776 
777  bool isAISrc_1024V2B16() const {
778  return isAISrc_1024B16();
779  }
780 
781  bool isAISrc_1024F32() const {
782  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783  }
784 
785  bool isAISrc_1024F16() const {
786  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787  }
788 
789  bool isAISrc_1024V2F16() const {
790  return isAISrc_1024F16() || isAISrc_1024B32();
791  }
792 
793  bool isKImmFP32() const {
794  return isLiteralImm(MVT::f32);
795  }
796 
797  bool isKImmFP16() const {
798  return isLiteralImm(MVT::f16);
799  }
800 
801  bool isMem() const override {
802  return false;
803  }
804 
805  bool isExpr() const {
806  return Kind == Expression;
807  }
808 
809  bool isSoppBrTarget() const {
810  return isExpr() || isImm();
811  }
812 
813  bool isSWaitCnt() const;
814  bool isHwreg() const;
815  bool isSendMsg() const;
816  bool isSwizzle() const;
817  bool isSMRDOffset8() const;
818  bool isSMEMOffset() const;
819  bool isSMRDLiteralOffset() const;
820  bool isDPP8() const;
821  bool isDPPCtrl() const;
822  bool isBLGP() const;
823  bool isCBSZ() const;
824  bool isABID() const;
825  bool isGPRIdxMode() const;
826  bool isS16Imm() const;
827  bool isU16Imm() const;
828  bool isEndpgm() const;
829 
830  StringRef getExpressionAsToken() const {
831  assert(isExpr());
832  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833  return S->getSymbol().getName();
834  }
835 
836  StringRef getToken() const {
837  assert(isToken());
838 
839  if (Kind == Expression)
840  return getExpressionAsToken();
841 
842  return StringRef(Tok.Data, Tok.Length);
843  }
844 
845  int64_t getImm() const {
846  assert(isImm());
847  return Imm.Val;
848  }
849 
850  void setImm(int64_t Val) {
851  assert(isImm());
852  Imm.Val = Val;
853  }
854 
855  ImmTy getImmTy() const {
856  assert(isImm());
857  return Imm.Type;
858  }
859 
860  unsigned getReg() const override {
861  assert(isRegKind());
862  return Reg.RegNo;
863  }
864 
865  SMLoc getStartLoc() const override {
866  return StartLoc;
867  }
868 
869  SMLoc getEndLoc() const override {
870  return EndLoc;
871  }
872 
873  SMRange getLocRange() const {
874  return SMRange(StartLoc, EndLoc);
875  }
876 
877  Modifiers getModifiers() const {
878  assert(isRegKind() || isImmTy(ImmTyNone));
879  return isRegKind() ? Reg.Mods : Imm.Mods;
880  }
881 
882  void setModifiers(Modifiers Mods) {
883  assert(isRegKind() || isImmTy(ImmTyNone));
884  if (isRegKind())
885  Reg.Mods = Mods;
886  else
887  Imm.Mods = Mods;
888  }
889 
890  bool hasModifiers() const {
891  return getModifiers().hasModifiers();
892  }
893 
894  bool hasFPModifiers() const {
895  return getModifiers().hasFPModifiers();
896  }
897 
898  bool hasIntModifiers() const {
899  return getModifiers().hasIntModifiers();
900  }
901 
902  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908  template <unsigned Bitwidth>
909  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912  addKImmFPOperands<16>(Inst, N);
913  }
914 
915  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916  addKImmFPOperands<32>(Inst, N);
917  }
918 
919  void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922  addRegOperands(Inst, N);
923  }
924 
925  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926  if (isRegKind())
927  addRegOperands(Inst, N);
928  else if (isExpr())
929  Inst.addOperand(MCOperand::createExpr(Expr));
930  else
931  addImmOperands(Inst, N);
932  }
933 
934  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935  Modifiers Mods = getModifiers();
936  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937  if (isRegKind()) {
938  addRegOperands(Inst, N);
939  } else {
940  addImmOperands(Inst, N, false);
941  }
942  }
943 
944  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945  assert(!hasIntModifiers());
946  addRegOrImmWithInputModsOperands(Inst, N);
947  }
948 
949  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950  assert(!hasFPModifiers());
951  addRegOrImmWithInputModsOperands(Inst, N);
952  }
953 
954  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955  Modifiers Mods = getModifiers();
956  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957  assert(isRegKind());
958  addRegOperands(Inst, N);
959  }
960 
961  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962  assert(!hasIntModifiers());
963  addRegWithInputModsOperands(Inst, N);
964  }
965 
966  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967  assert(!hasFPModifiers());
968  addRegWithInputModsOperands(Inst, N);
969  }
970 
971  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972  if (isImm())
973  addImmOperands(Inst, N);
974  else {
975  assert(isExpr());
976  Inst.addOperand(MCOperand::createExpr(Expr));
977  }
978  }
979 
980  static void printImmTy(raw_ostream& OS, ImmTy Type) {
981  switch (Type) {
982  case ImmTyNone: OS << "None"; break;
983  case ImmTyGDS: OS << "GDS"; break;
984  case ImmTyLDS: OS << "LDS"; break;
985  case ImmTyOffen: OS << "Offen"; break;
986  case ImmTyIdxen: OS << "Idxen"; break;
987  case ImmTyAddr64: OS << "Addr64"; break;
988  case ImmTyOffset: OS << "Offset"; break;
989  case ImmTyInstOffset: OS << "InstOffset"; break;
990  case ImmTyOffset0: OS << "Offset0"; break;
991  case ImmTyOffset1: OS << "Offset1"; break;
992  case ImmTyCPol: OS << "CPol"; break;
993  case ImmTySWZ: OS << "SWZ"; break;
994  case ImmTyTFE: OS << "TFE"; break;
995  case ImmTyD16: OS << "D16"; break;
996  case ImmTyFORMAT: OS << "FORMAT"; break;
997  case ImmTyClampSI: OS << "ClampSI"; break;
998  case ImmTyOModSI: OS << "OModSI"; break;
999  case ImmTyDPP8: OS << "DPP8"; break;
1000  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004  case ImmTyDppFi: OS << "FI"; break;
1005  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009  case ImmTyDMask: OS << "DMask"; break;
1010  case ImmTyDim: OS << "Dim"; break;
1011  case ImmTyUNorm: OS << "UNorm"; break;
1012  case ImmTyDA: OS << "DA"; break;
1013  case ImmTyR128A16: OS << "R128A16"; break;
1014  case ImmTyA16: OS << "A16"; break;
1015  case ImmTyLWE: OS << "LWE"; break;
1016  case ImmTyOff: OS << "Off"; break;
1017  case ImmTyExpTgt: OS << "ExpTgt"; break;
1018  case ImmTyExpCompr: OS << "ExpCompr"; break;
1019  case ImmTyExpVM: OS << "ExpVM"; break;
1020  case ImmTyHwreg: OS << "Hwreg"; break;
1021  case ImmTySendMsg: OS << "SendMsg"; break;
1022  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024  case ImmTyAttrChan: OS << "AttrChan"; break;
1025  case ImmTyOpSel: OS << "OpSel"; break;
1026  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027  case ImmTyNegLo: OS << "NegLo"; break;
1028  case ImmTyNegHi: OS << "NegHi"; break;
1029  case ImmTySwizzle: OS << "Swizzle"; break;
1030  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031  case ImmTyHigh: OS << "High"; break;
1032  case ImmTyBLGP: OS << "BLGP"; break;
1033  case ImmTyCBSZ: OS << "CBSZ"; break;
1034  case ImmTyABID: OS << "ABID"; break;
1035  case ImmTyEndpgm: OS << "Endpgm"; break;
1036  }
1037  }
1038 
1039  void print(raw_ostream &OS) const override {
1040  switch (Kind) {
1041  case Register:
1042  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043  break;
1044  case Immediate:
1045  OS << '<' << getImm();
1046  if (getImmTy() != ImmTyNone) {
1047  OS << " type: "; printImmTy(OS, getImmTy());
1048  }
1049  OS << " mods: " << Imm.Mods << '>';
1050  break;
1051  case Token:
1052  OS << '\'' << getToken() << '\'';
1053  break;
1054  case Expression:
1055  OS << "<expr " << *Expr << '>';
1056  break;
1057  }
1058  }
1059 
1060  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061  int64_t Val, SMLoc Loc,
1062  ImmTy Type = ImmTyNone,
1063  bool IsFPImm = false) {
1064  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065  Op->Imm.Val = Val;
1066  Op->Imm.IsFPImm = IsFPImm;
1067  Op->Imm.Kind = ImmKindTyNone;
1068  Op->Imm.Type = Type;
1069  Op->Imm.Mods = Modifiers();
1070  Op->StartLoc = Loc;
1071  Op->EndLoc = Loc;
1072  return Op;
1073  }
1074 
1075  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076  StringRef Str, SMLoc Loc,
1077  bool HasExplicitEncodingSize = true) {
1078  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079  Res->Tok.Data = Str.data();
1080  Res->Tok.Length = Str.size();
1081  Res->StartLoc = Loc;
1082  Res->EndLoc = Loc;
1083  return Res;
1084  }
1085 
1086  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087  unsigned RegNo, SMLoc S,
1088  SMLoc E) {
1089  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090  Op->Reg.RegNo = RegNo;
1091  Op->Reg.Mods = Modifiers();
1092  Op->StartLoc = S;
1093  Op->EndLoc = E;
1094  return Op;
1095  }
1096 
1097  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098  const class MCExpr *Expr, SMLoc S) {
1099  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100  Op->Expr = Expr;
1101  Op->StartLoc = S;
1102  Op->EndLoc = S;
1103  return Op;
1104  }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109  return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120  int SgprIndexUnusedMin = -1;
1121  int VgprIndexUnusedMin = -1;
1122  MCContext *Ctx = nullptr;
1123 
1124  void usesSgprAt(int i) {
1125  if (i >= SgprIndexUnusedMin) {
1126  SgprIndexUnusedMin = ++i;
1127  if (Ctx) {
1128  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130  }
1131  }
1132  }
1133 
1134  void usesVgprAt(int i) {
1135  if (i >= VgprIndexUnusedMin) {
1136  VgprIndexUnusedMin = ++i;
1137  if (Ctx) {
1138  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139  Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140  }
1141  }
1142  }
1143 
1144 public:
1145  KernelScopeInfo() = default;
1146 
1147  void initialize(MCContext &Context) {
1148  Ctx = &Context;
1149  usesSgprAt(SgprIndexUnusedMin = -1);
1150  usesVgprAt(VgprIndexUnusedMin = -1);
1151  }
1152 
1153  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154  switch (RegKind) {
1155  case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156  case IS_AGPR: // fall through
1157  case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158  default: break;
1159  }
1160  }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164  MCAsmParser &Parser;
1165 
1166  // Number of extra operands parsed after the first optional operand.
1167  // This may be necessary to skip hardcoded mandatory operands.
1168  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170  unsigned ForcedEncodingSize = 0;
1171  bool ForcedDPP = false;
1172  bool ForcedSDWA = false;
1173  KernelScopeInfo KernelScope;
1174  unsigned CPolSeen;
1175 
1176  /// @name Auto-generated Match Functions
1177  /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182  /// }
1183 
1184 private:
1185  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186  bool OutOfRangeError(SMRange Range);
1187  /// Calculate VGPR/SGPR blocks required for given target, reserved
1188  /// registers, and user-specified NextFreeXGPR values.
1189  ///
1190  /// \param Features [in] Target features, used for bug corrections.
1191  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195  /// descriptor field, if valid.
1196  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200  /// \param VGPRBlocks [out] Result VGPR block count.
1201  /// \param SGPRBlocks [out] Result SGPR block count.
1202  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203  bool FlatScrUsed, bool XNACKUsed,
1204  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205  SMRange VGPRRange, unsigned NextFreeSGPR,
1206  SMRange SGPRRange, unsigned &VGPRBlocks,
1207  unsigned &SGPRBlocks);
1208  bool ParseDirectiveAMDGCNTarget();
1209  bool ParseDirectiveAMDHSAKernel();
1210  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211  bool ParseDirectiveHSACodeObjectVersion();
1212  bool ParseDirectiveHSACodeObjectISA();
1213  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214  bool ParseDirectiveAMDKernelCodeT();
1215  // TODO: Possibly make subtargetHasRegister const.
1216  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217  bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219  bool ParseDirectiveISAVersion();
1220  bool ParseDirectiveHSAMetadata();
1221  bool ParseDirectivePALMetadataBegin();
1222  bool ParseDirectivePALMetadata();
1223  bool ParseDirectiveAMDGPULDS();
1224 
1225  /// Common code to parse out a block of text (typically YAML) between start and
1226  /// end directives.
1227  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228  const char *AssemblerDirectiveEnd,
1229  std::string &CollectString);
1230 
1231  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234  unsigned &RegNum, unsigned &RegWidth,
1235  bool RestoreOnFailure = false);
1236  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237  unsigned &RegNum, unsigned &RegWidth,
1238  SmallVectorImpl<AsmToken> &Tokens);
1239  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240  unsigned &RegWidth,
1241  SmallVectorImpl<AsmToken> &Tokens);
1242  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243  unsigned &RegWidth,
1244  SmallVectorImpl<AsmToken> &Tokens);
1245  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247  bool ParseRegRange(unsigned& Num, unsigned& Width);
1248  unsigned getRegularReg(RegisterKind RegKind,
1249  unsigned RegNum,
1250  unsigned RegWidth,
1251  SMLoc Loc);
1252 
1253  bool isRegister();
1254  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256  void initializeGprCountSymbol(RegisterKind RegKind);
1257  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258  unsigned RegWidth);
1259  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260  bool IsAtomic, bool IsLds = false);
1261  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262  bool IsGdsHardcoded);
1263 
1264 public:
1265  enum AMDGPUMatchResultTy {
1266  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267  };
1268  enum OperandMode {
1269  OperandMode_Default,
1270  OperandMode_NSA,
1271  };
1272 
1273  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276  const MCInstrInfo &MII,
1277  const MCTargetOptions &Options)
1278  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279  MCAsmParserExtension::Initialize(Parser);
1280 
1281  if (getFeatureBits().none()) {
1282  // Set default features.
1283  copySTI().ToggleFeature("southern-islands");
1284  }
1285 
1286  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288  {
1289  // TODO: make those pre-defined variables read-only.
1290  // Currently there is none suitable machinery in the core llvm-mc for this.
1291  // MCSymbol::isRedefinable is intended for another purpose, and
1292  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294  MCContext &Ctx = getContext();
1295  if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296  MCSymbol *Sym =
1297  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303  } else {
1304  MCSymbol *Sym =
1305  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311  }
1312  if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313  initializeGprCountSymbol(IS_VGPR);
1314  initializeGprCountSymbol(IS_SGPR);
1315  } else
1316  KernelScope.initialize(getContext());
1317  }
1318  }
1319 
1320  bool hasMIMG_R128() const {
1321  return AMDGPU::hasMIMG_R128(getSTI());
1322  }
1323 
1324  bool hasPackedD16() const {
1325  return AMDGPU::hasPackedD16(getSTI());
1326  }
1327 
1328  bool hasGFX10A16() const {
1329  return AMDGPU::hasGFX10A16(getSTI());
1330  }
1331 
1332  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333 
1334  bool isSI() const {
1335  return AMDGPU::isSI(getSTI());
1336  }
1337 
1338  bool isCI() const {
1339  return AMDGPU::isCI(getSTI());
1340  }
1341 
1342  bool isVI() const {
1343  return AMDGPU::isVI(getSTI());
1344  }
1345 
1346  bool isGFX9() const {
1347  return AMDGPU::isGFX9(getSTI());
1348  }
1349 
1350  bool isGFX90A() const {
1351  return AMDGPU::isGFX90A(getSTI());
1352  }
1353 
1354  bool isGFX9Plus() const {
1355  return AMDGPU::isGFX9Plus(getSTI());
1356  }
1357 
1358  bool isGFX10() const {
1359  return AMDGPU::isGFX10(getSTI());
1360  }
1361 
1362  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363 
1364  bool isGFX10_BEncoding() const {
1365  return AMDGPU::isGFX10_BEncoding(getSTI());
1366  }
1367 
1368  bool hasInv2PiInlineImm() const {
1369  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370  }
1371 
1372  bool hasFlatOffsets() const {
1373  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374  }
1375 
1376  bool hasArchitectedFlatScratch() const {
1377  return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378  }
1379 
1380  bool hasSGPR102_SGPR103() const {
1381  return !isVI() && !isGFX9();
1382  }
1383 
1384  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386  bool hasIntClamp() const {
1387  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388  }
1389 
1390  AMDGPUTargetStreamer &getTargetStreamer() {
1391  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392  return static_cast<AMDGPUTargetStreamer &>(TS);
1393  }
1394 
1395  const MCRegisterInfo *getMRI() const {
1396  // We need this const_cast because for some reason getContext() is not const
1397  // in MCAsmParser.
1398  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399  }
1400 
1401  const MCInstrInfo *getMII() const {
1402  return &MII;
1403  }
1404 
1405  const FeatureBitset &getFeatureBits() const {
1406  return getSTI().getFeatureBits();
1407  }
1408 
1409  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415  bool isForcedDPP() const { return ForcedDPP; }
1416  bool isForcedSDWA() const { return ForcedSDWA; }
1417  ArrayRef<unsigned> getMatchedVariants() const;
1418  StringRef getMatchedVariantName() const;
1419 
1420  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422  bool RestoreOnFailure);
1423  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425  SMLoc &EndLoc) override;
1426  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428  unsigned Kind) override;
1429  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430  OperandVector &Operands, MCStreamer &Out,
1431  uint64_t &ErrorInfo,
1432  bool MatchingInlineAsm) override;
1433  bool ParseDirective(AsmToken DirectiveID) override;
1434  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435  OperandMode Mode = OperandMode_Default);
1436  StringRef parseMnemonicSuffix(StringRef Name);
1437  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438  SMLoc NameLoc, OperandVector &Operands) override;
1439  //bool ProcessInstruction(MCInst &Inst);
1440 
1441  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443  OperandMatchResultTy
1444  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446  bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448  OperandMatchResultTy
1449  parseOperandArrayWithPrefix(const char *Prefix,
1450  OperandVector &Operands,
1451  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452  bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454  OperandMatchResultTy
1455  parseNamedBit(StringRef Name, OperandVector &Operands,
1456  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457  OperandMatchResultTy parseCPol(OperandVector &Operands);
1458  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459  StringRef &Value,
1460  SMLoc &StringLoc);
1461 
1462  bool isModifier();
1463  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467  bool parseSP3NegModifier();
1468  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469  OperandMatchResultTy parseReg(OperandVector &Operands);
1470  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477  OperandMatchResultTy parseUfmt(int64_t &Format);
1478  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491  bool parseCnt(int64_t &IntVal);
1492  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496  struct OperandInfoTy {
1497  SMLoc Loc;
1498  int64_t Id;
1499  bool IsSymbolic = false;
1500  bool IsDefined = false;
1501 
1502  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503  };
1504 
1505  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506  bool validateSendMsg(const OperandInfoTy &Msg,
1507  const OperandInfoTy &Op,
1508  const OperandInfoTy &Stream);
1509 
1510  bool parseHwregBody(OperandInfoTy &HwReg,
1511  OperandInfoTy &Offset,
1512  OperandInfoTy &Width);
1513  bool validateHwreg(const OperandInfoTy &HwReg,
1514  const OperandInfoTy &Offset,
1515  const OperandInfoTy &Width);
1516 
1517  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521  const OperandVector &Operands) const;
1522  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524  SMLoc getLitLoc(const OperandVector &Operands) const;
1525  SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530  bool validateSOPLiteral(const MCInst &Inst) const;
1531  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533  bool validateIntClampSupported(const MCInst &Inst);
1534  bool validateMIMGAtomicDMask(const MCInst &Inst);
1535  bool validateMIMGGatherDMask(const MCInst &Inst);
1536  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537  bool validateMIMGDataSize(const MCInst &Inst);
1538  bool validateMIMGAddrSize(const MCInst &Inst);
1539  bool validateMIMGD16(const MCInst &Inst);
1540  bool validateMIMGDim(const MCInst &Inst);
1541  bool validateMIMGMSAA(const MCInst &Inst);
1542  bool validateOpSel(const MCInst &Inst);
1543  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544  bool validateVccOperand(unsigned Reg) const;
1545  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547  bool validateAGPRLdSt(const MCInst &Inst) const;
1548  bool validateVGPRAlign(const MCInst &Inst) const;
1549  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550  bool validateDivScale(const MCInst &Inst);
1551  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552  const SMLoc &IDLoc);
1553  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554  unsigned getConstantBusLimit(unsigned Opcode) const;
1555  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558 
1559  bool isSupportedMnemo(StringRef Mnemo,
1560  const FeatureBitset &FBS);
1561  bool isSupportedMnemo(StringRef Mnemo,
1562  const FeatureBitset &FBS,
1563  ArrayRef<unsigned> Variants);
1564  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565 
1566  bool isId(const StringRef Id) const;
1567  bool isId(const AsmToken &Token, const StringRef Id) const;
1568  bool isToken(const AsmToken::TokenKind Kind) const;
1569  bool trySkipId(const StringRef Id);
1570  bool trySkipId(const StringRef Pref, const StringRef Id);
1571  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572  bool trySkipToken(const AsmToken::TokenKind Kind);
1573  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576 
1577  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578  AsmToken::TokenKind getTokenKind() const;
1579  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580  bool parseExpr(OperandVector &Operands);
1581  StringRef getTokenStr() const;
1582  AsmToken peekToken();
1583  AsmToken getToken() const;
1584  SMLoc getLoc() const;
1585  void lex();
1586 
1587 public:
1588  void onBeginOfFile() override;
1589 
1590  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592 
1593  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599 
1600  bool parseSwizzleOperand(int64_t &Op,
1601  const unsigned MinVal,
1602  const unsigned MaxVal,
1603  const StringRef ErrMsg,
1604  SMLoc &Loc);
1605  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606  const unsigned MinVal,
1607  const unsigned MaxVal,
1608  const StringRef ErrMsg);
1609  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610  bool parseSwizzleOffset(int64_t &Imm);
1611  bool parseSwizzleMacro(int64_t &Imm);
1612  bool parseSwizzleQuadPerm(int64_t &Imm);
1613  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614  bool parseSwizzleBroadcast(int64_t &Imm);
1615  bool parseSwizzleSwap(int64_t &Imm);
1616  bool parseSwizzleReverse(int64_t &Imm);
1617 
1618  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619  int64_t parseGPRIdxMacro();
1620 
1621  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1622  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1623  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625 
1626  AMDGPUOperand::Ptr defaultCPol() const;
1627 
1628  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631  AMDGPUOperand::Ptr defaultFlatOffset() const;
1632 
1633  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634 
1635  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636  OptionalImmIndexMap &OptionalIdx);
1637  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641  OptionalImmIndexMap &OptionalIdx);
1642 
1643  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644 
1645  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646  bool IsAtomic = false);
1647  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649 
1650  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651 
1652  bool parseDimId(unsigned &Encoding);
1653  OperandMatchResultTy parseDim(OperandVector &Operands);
1654  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657  int64_t parseDPPCtrlSel(StringRef Ctrl);
1658  int64_t parseDPPCtrlPerm();
1659  AMDGPUOperand::Ptr defaultRowMask() const;
1660  AMDGPUOperand::Ptr defaultBankMask() const;
1661  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662  AMDGPUOperand::Ptr defaultFI() const;
1663  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1664  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665 
1666  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667  AMDGPUOperand::ImmTy Type);
1668  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675  uint64_t BasicInstType,
1676  bool SkipDstVcc = false,
1677  bool SkipSrcVcc = false);
1678 
1679  AMDGPUOperand::Ptr defaultBLGP() const;
1680  AMDGPUOperand::Ptr defaultCBSZ() const;
1681  AMDGPUOperand::Ptr defaultABID() const;
1682 
1683  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685 };
1686 
1687 struct OptionalOperand {
1688  const char *Name;
1689  AMDGPUOperand::ImmTy Type;
1690  bool IsBit;
1691  bool (*ConvertResult)(int64_t&);
1692 };
1693 
1694 } // end anonymous namespace
1695 
1696 // May be called with integer type with equivalent bitwidth.
1697 static const fltSemantics *getFltSemantics(unsigned Size) {
1698  switch (Size) {
1699  case 4:
1700  return &APFloat::IEEEsingle();
1701  case 8:
1702  return &APFloat::IEEEdouble();
1703  case 2:
1704  return &APFloat::IEEEhalf();
1705  default:
1706  llvm_unreachable("unsupported fp type");
1707  }
1708 }
1709 
1710 static const fltSemantics *getFltSemantics(MVT VT) {
1711  return getFltSemantics(VT.getSizeInBits() / 8);
1712 }
1713 
1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715  switch (OperandType) {
1716  case AMDGPU::OPERAND_REG_IMM_INT32:
1717  case AMDGPU::OPERAND_REG_IMM_FP32:
1718  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1719  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1720  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1721  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1722  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1723  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1724  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1725  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1726  return &APFloat::IEEEsingle();
1727  case AMDGPU::OPERAND_REG_IMM_INT64:
1728  case AMDGPU::OPERAND_REG_IMM_FP64:
1729  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1730  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1731  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1732  return &APFloat::IEEEdouble();
1733  case AMDGPU::OPERAND_REG_IMM_INT16:
1734  case AMDGPU::OPERAND_REG_IMM_FP16:
1735  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1736  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1737  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1738  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1739  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1740  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1741  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1742  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1743  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1744  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1745  return &APFloat::IEEEhalf();
1746  default:
1747  llvm_unreachable("unsupported fp type");
1748  }
1749 }
1750 
1751 //===----------------------------------------------------------------------===//
1752 // Operand
1753 //===----------------------------------------------------------------------===//
1754 
1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1756  bool Lost;
1757 
1758  // Convert literal to single precision
1759  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1760  APFloat::rmNearestTiesToEven,
1761  &Lost);
1762  // We allow precision lost but not overflow or underflow
1763  if (Status != APFloat::opOK &&
1764  Lost &&
1765  ((Status & APFloat::opOverflow) != 0 ||
1766  (Status & APFloat::opUnderflow) != 0)) {
1767  return false;
1768  }
1769 
1770  return true;
1771 }
1772 
1773 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1774  return isUIntN(Size, Val) || isIntN(Size, Val);
1775 }
1776 
1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1778  if (VT.getScalarType() == MVT::i16) {
1779  // FP immediate values are broken.
1780  return isInlinableIntLiteral(Val);
1781  }
1782 
1783  // f16/v2f16 operands work correctly for all values.
1784  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1785 }
1786 
1787 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1788 
1789  // This is a hack to enable named inline values like
1790  // shared_base with both 32-bit and 64-bit operands.
1791  // Note that these values are defined as
1792  // 32-bit operands only.
1793  if (isInlineValue()) {
1794  return true;
1795  }
1796 
1797  if (!isImmTy(ImmTyNone)) {
1798  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1799  return false;
1800  }
1801  // TODO: We should avoid using host float here. It would be better to
1802  // check the float bit values which is what a few other places do.
1803  // We've had bot failures before due to weird NaN support on mips hosts.
1804 
1805  APInt Literal(64, Imm.Val);
1806 
1807  if (Imm.IsFPImm) { // We got fp literal token
1808  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1809  return AMDGPU::isInlinableLiteral64(Imm.Val,
1810  AsmParser->hasInv2PiInlineImm());
1811  }
1812 
1813  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1814  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1815  return false;
1816 
1817  if (type.getScalarSizeInBits() == 16) {
1818  return isInlineableLiteralOp16(
1819  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1820  type, AsmParser->hasInv2PiInlineImm());
1821  }
1822 
1823  // Check if single precision literal is inlinable
1825  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1826  AsmParser->hasInv2PiInlineImm());
1827  }
1828 
1829  // We got int literal token.
1830  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1831  return AMDGPU::isInlinableLiteral64(Imm.Val,
1832  AsmParser->hasInv2PiInlineImm());
1833  }
1834 
1835  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1836  return false;
1837  }
1838 
1839  if (type.getScalarSizeInBits() == 16) {
1840  return isInlineableLiteralOp16(
1841  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1842  type, AsmParser->hasInv2PiInlineImm());
1843  }
1844 
1846  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1847  AsmParser->hasInv2PiInlineImm());
1848 }
1849 
1850 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1851  // Check that this immediate can be added as literal
1852  if (!isImmTy(ImmTyNone)) {
1853  return false;
1854  }
1855 
1856  if (!Imm.IsFPImm) {
1857  // We got int literal token.
1858 
1859  if (type == MVT::f64 && hasFPModifiers()) {
1860  // Cannot apply fp modifiers to int literals preserving the same semantics
1861  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1862  // disable these cases.
1863  return false;
1864  }
1865 
1866  unsigned Size = type.getSizeInBits();
1867  if (Size == 64)
1868  Size = 32;
1869 
1870  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1871  // types.
1872  return isSafeTruncation(Imm.Val, Size);
1873  }
1874 
1875  // We got fp literal token
1876  if (type == MVT::f64) { // Expected 64-bit fp operand
1877  // We would set low 64-bits of literal to zeroes but we accept this literals
1878  return true;
1879  }
1880 
1881  if (type == MVT::i64) { // Expected 64-bit int operand
1882  // We don't allow fp literals in 64-bit integer instructions. It is
1883  // unclear how we should encode them.
1884  return false;
1885  }
1886 
1887  // We allow fp literals with f16x2 operands assuming that the specified
1888  // literal goes into the lower half and the upper half is zero. We also
1889  // require that the literal may be losslesly converted to f16.
1890  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1891  (type == MVT::v2i16)? MVT::i16 :
1892  (type == MVT::v2f32)? MVT::f32 : type;
1893 
1894  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1895  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1896 }
1897 
1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1899  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1900 }
1901 
1902 bool AMDGPUOperand::isVRegWithInputMods() const {
1903  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1904  // GFX90A allows DPP on 64-bit operands.
1905  (isRegClass(AMDGPU::VReg_64RegClassID) &&
1906  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1907 }
1908 
1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1910  if (AsmParser->isVI())
1911  return isVReg32();
1912  else if (AsmParser->isGFX9Plus())
1913  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1914  else
1915  return false;
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAFP16Operand() const {
1919  return isSDWAOperand(MVT::f16);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAFP32Operand() const {
1923  return isSDWAOperand(MVT::f32);
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAInt16Operand() const {
1927  return isSDWAOperand(MVT::i16);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAInt32Operand() const {
1931  return isSDWAOperand(MVT::i32);
1932 }
1933 
1934 bool AMDGPUOperand::isBoolReg() const {
1935  auto FB = AsmParser->getFeatureBits();
1936  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1937  (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1938 }
1939 
1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1941 {
1942  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1943  assert(Size == 2 || Size == 4 || Size == 8);
1944 
1945  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1946 
1947  if (Imm.Mods.Abs) {
1948  Val &= ~FpSignMask;
1949  }
1950  if (Imm.Mods.Neg) {
1951  Val ^= FpSignMask;
1952  }
1953 
1954  return Val;
1955 }
1956 
1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1958  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1959  Inst.getNumOperands())) {
1960  addLiteralImmOperand(Inst, Imm.Val,
1961  ApplyModifiers &
1962  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1963  } else {
1964  assert(!isImmTy(ImmTyNone) || !hasModifiers());
1965  Inst.addOperand(MCOperand::createImm(Imm.Val));
1966  setImmKindNone();
1967  }
1968 }
1969 
1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1971  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1972  auto OpNum = Inst.getNumOperands();
1973  // Check that this operand accepts literals
1974  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1975 
1976  if (ApplyModifiers) {
1977  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1978  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1979  Val = applyInputFPModifiers(Val, Size);
1980  }
1981 
1982  APInt Literal(64, Val);
1983  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1984 
1985  if (Imm.IsFPImm) { // We got fp literal token
1986  switch (OpTy) {
1992  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1993  AsmParser->hasInv2PiInlineImm())) {
1994  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1995  setImmKindConst();
1996  return;
1997  }
1998 
1999  // Non-inlineable
2000  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2001  // For fp operands we check if low 32 bits are zeros
2002  if (Literal.getLoBits(32) != 0) {
2003  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2004  "Can't encode literal as exact 64-bit floating-point operand. "
2005  "Low 32-bits will be set to zero");
2006  }
2007 
2008  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2009  setImmKindLiteral();
2010  return;
2011  }
2012 
2013  // We don't allow fp literals in 64-bit integer instructions. It is
2014  // unclear how we should encode them. This case should be checked earlier
2015  // in predicate methods (isLiteralImm())
2016  llvm_unreachable("fp literal in 64-bit integer instruction.");
2017 
2040  bool lost;
2041  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2042  // Convert literal to single precision
2043  FPLiteral.convert(*getOpFltSemantics(OpTy),
2045  // We allow precision lost but not overflow or underflow. This should be
2046  // checked earlier in isLiteralImm()
2047 
2048  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2049  Inst.addOperand(MCOperand::createImm(ImmVal));
2050  setImmKindLiteral();
2051  return;
2052  }
2053  default:
2054  llvm_unreachable("invalid operand size");
2055  }
2056 
2057  return;
2058  }
2059 
2060  // We got int literal token.
2061  // Only sign extend inline immediates.
2062  switch (OpTy) {
2075  if (isSafeTruncation(Val, 32) &&
2076  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2077  AsmParser->hasInv2PiInlineImm())) {
2078  Inst.addOperand(MCOperand::createImm(Val));
2079  setImmKindConst();
2080  return;
2081  }
2082 
2083  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2084  setImmKindLiteral();
2085  return;
2086 
2092  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2093  Inst.addOperand(MCOperand::createImm(Val));
2094  setImmKindConst();
2095  return;
2096  }
2097 
2099  setImmKindLiteral();
2100  return;
2101 
2108  if (isSafeTruncation(Val, 16) &&
2109  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2110  AsmParser->hasInv2PiInlineImm())) {
2111  Inst.addOperand(MCOperand::createImm(Val));
2112  setImmKindConst();
2113  return;
2114  }
2115 
2116  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2117  setImmKindLiteral();
2118  return;
2119 
2124  assert(isSafeTruncation(Val, 16));
2125  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2126  AsmParser->hasInv2PiInlineImm()));
2127 
2128  Inst.addOperand(MCOperand::createImm(Val));
2129  return;
2130  }
2131  default:
2132  llvm_unreachable("invalid operand size");
2133  }
2134 }
2135 
2136 template <unsigned Bitwidth>
2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2138  APInt Literal(64, Imm.Val);
2139  setImmKindNone();
2140 
2141  if (!Imm.IsFPImm) {
2142  // We got int literal token.
2143  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2144  return;
2145  }
2146 
2147  bool Lost;
2148  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2149  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2151  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2152 }
2153 
2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2155  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2156 }
2157 
2158 static bool isInlineValue(unsigned Reg) {
2159  switch (Reg) {
2160  case AMDGPU::SRC_SHARED_BASE:
2161  case AMDGPU::SRC_SHARED_LIMIT:
2162  case AMDGPU::SRC_PRIVATE_BASE:
2163  case AMDGPU::SRC_PRIVATE_LIMIT:
2164  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2165  return true;
2166  case AMDGPU::SRC_VCCZ:
2167  case AMDGPU::SRC_EXECZ:
2168  case AMDGPU::SRC_SCC:
2169  return true;
2170  case AMDGPU::SGPR_NULL:
2171  return true;
2172  default:
2173  return false;
2174  }
2175 }
2176 
2177 bool AMDGPUOperand::isInlineValue() const {
2178  return isRegKind() && ::isInlineValue(getReg());
2179 }
2180 
2181 //===----------------------------------------------------------------------===//
2182 // AsmParser
2183 //===----------------------------------------------------------------------===//
2184 
2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2186  if (Is == IS_VGPR) {
2187  switch (RegWidth) {
2188  default: return -1;
2189  case 1: return AMDGPU::VGPR_32RegClassID;
2190  case 2: return AMDGPU::VReg_64RegClassID;
2191  case 3: return AMDGPU::VReg_96RegClassID;
2192  case 4: return AMDGPU::VReg_128RegClassID;
2193  case 5: return AMDGPU::VReg_160RegClassID;
2194  case 6: return AMDGPU::VReg_192RegClassID;
2195  case 7: return AMDGPU::VReg_224RegClassID;
2196  case 8: return AMDGPU::VReg_256RegClassID;
2197  case 16: return AMDGPU::VReg_512RegClassID;
2198  case 32: return AMDGPU::VReg_1024RegClassID;
2199  }
2200  } else if (Is == IS_TTMP) {
2201  switch (RegWidth) {
2202  default: return -1;
2203  case 1: return AMDGPU::TTMP_32RegClassID;
2204  case 2: return AMDGPU::TTMP_64RegClassID;
2205  case 4: return AMDGPU::TTMP_128RegClassID;
2206  case 8: return AMDGPU::TTMP_256RegClassID;
2207  case 16: return AMDGPU::TTMP_512RegClassID;
2208  }
2209  } else if (Is == IS_SGPR) {
2210  switch (RegWidth) {
2211  default: return -1;
2212  case 1: return AMDGPU::SGPR_32RegClassID;
2213  case 2: return AMDGPU::SGPR_64RegClassID;
2214  case 3: return AMDGPU::SGPR_96RegClassID;
2215  case 4: return AMDGPU::SGPR_128RegClassID;
2216  case 5: return AMDGPU::SGPR_160RegClassID;
2217  case 6: return AMDGPU::SGPR_192RegClassID;
2218  case 7: return AMDGPU::SGPR_224RegClassID;
2219  case 8: return AMDGPU::SGPR_256RegClassID;
2220  case 16: return AMDGPU::SGPR_512RegClassID;
2221  }
2222  } else if (Is == IS_AGPR) {
2223  switch (RegWidth) {
2224  default: return -1;
2225  case 1: return AMDGPU::AGPR_32RegClassID;
2226  case 2: return AMDGPU::AReg_64RegClassID;
2227  case 3: return AMDGPU::AReg_96RegClassID;
2228  case 4: return AMDGPU::AReg_128RegClassID;
2229  case 5: return AMDGPU::AReg_160RegClassID;
2230  case 6: return AMDGPU::AReg_192RegClassID;
2231  case 7: return AMDGPU::AReg_224RegClassID;
2232  case 8: return AMDGPU::AReg_256RegClassID;
2233  case 16: return AMDGPU::AReg_512RegClassID;
2234  case 32: return AMDGPU::AReg_1024RegClassID;
2235  }
2236  }
2237  return -1;
2238 }
2239 
2242  .Case("exec", AMDGPU::EXEC)
2243  .Case("vcc", AMDGPU::VCC)
2244  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2245  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2246  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2247  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2248  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2249  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2250  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2251  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2252  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2253  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2254  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2255  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2256  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2257  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2258  .Case("m0", AMDGPU::M0)
2259  .Case("vccz", AMDGPU::SRC_VCCZ)
2260  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2261  .Case("execz", AMDGPU::SRC_EXECZ)
2262  .Case("src_execz", AMDGPU::SRC_EXECZ)
2263  .Case("scc", AMDGPU::SRC_SCC)
2264  .Case("src_scc", AMDGPU::SRC_SCC)
2265  .Case("tba", AMDGPU::TBA)
2266  .Case("tma", AMDGPU::TMA)
2267  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2268  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2269  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2270  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2271  .Case("vcc_lo", AMDGPU::VCC_LO)
2272  .Case("vcc_hi", AMDGPU::VCC_HI)
2273  .Case("exec_lo", AMDGPU::EXEC_LO)
2274  .Case("exec_hi", AMDGPU::EXEC_HI)
2275  .Case("tma_lo", AMDGPU::TMA_LO)
2276  .Case("tma_hi", AMDGPU::TMA_HI)
2277  .Case("tba_lo", AMDGPU::TBA_LO)
2278  .Case("tba_hi", AMDGPU::TBA_HI)
2279  .Case("pc", AMDGPU::PC_REG)
2280  .Case("null", AMDGPU::SGPR_NULL)
2281  .Default(AMDGPU::NoRegister);
2282 }
2283 
2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2285  SMLoc &EndLoc, bool RestoreOnFailure) {
2286  auto R = parseRegister();
2287  if (!R) return true;
2288  assert(R->isReg());
2289  RegNo = R->getReg();
2290  StartLoc = R->getStartLoc();
2291  EndLoc = R->getEndLoc();
2292  return false;
2293 }
2294 
2295 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2296  SMLoc &EndLoc) {
2297  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2298 }
2299 
2300 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2301  SMLoc &StartLoc,
2302  SMLoc &EndLoc) {
2303  bool Result =
2304  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2305  bool PendingErrors = getParser().hasPendingError();
2306  getParser().clearPendingErrors();
2307  if (PendingErrors)
2308  return MatchOperand_ParseFail;
2309  if (Result)
2310  return MatchOperand_NoMatch;
2311  return MatchOperand_Success;
2312 }
2313 
2314 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2315  RegisterKind RegKind, unsigned Reg1,
2316  SMLoc Loc) {
2317  switch (RegKind) {
2318  case IS_SPECIAL:
2319  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2320  Reg = AMDGPU::EXEC;
2321  RegWidth = 2;
2322  return true;
2323  }
2324  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2325  Reg = AMDGPU::FLAT_SCR;
2326  RegWidth = 2;
2327  return true;
2328  }
2329  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2330  Reg = AMDGPU::XNACK_MASK;
2331  RegWidth = 2;
2332  return true;
2333  }
2334  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2335  Reg = AMDGPU::VCC;
2336  RegWidth = 2;
2337  return true;
2338  }
2339  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2340  Reg = AMDGPU::TBA;
2341  RegWidth = 2;
2342  return true;
2343  }
2344  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2345  Reg = AMDGPU::TMA;
2346  RegWidth = 2;
2347  return true;
2348  }
2349  Error(Loc, "register does not fit in the list");
2350  return false;
2351  case IS_VGPR:
2352  case IS_SGPR:
2353  case IS_AGPR:
2354  case IS_TTMP:
2355  if (Reg1 != Reg + RegWidth) {
2356  Error(Loc, "registers in a list must have consecutive indices");
2357  return false;
2358  }
2359  RegWidth++;
2360  return true;
2361  default:
2362  llvm_unreachable("unexpected register kind");
2363  }
2364 }
2365 
2366 struct RegInfo {
2368  RegisterKind Kind;
2369 };
2370 
2371 static constexpr RegInfo RegularRegisters[] = {
2372  {{"v"}, IS_VGPR},
2373  {{"s"}, IS_SGPR},
2374  {{"ttmp"}, IS_TTMP},
2375  {{"acc"}, IS_AGPR},
2376  {{"a"}, IS_AGPR},
2377 };
2378 
2379 static bool isRegularReg(RegisterKind Kind) {
2380  return Kind == IS_VGPR ||
2381  Kind == IS_SGPR ||
2382  Kind == IS_TTMP ||
2383  Kind == IS_AGPR;
2384 }
2385 
2386 static const RegInfo* getRegularRegInfo(StringRef Str) {
2387  for (const RegInfo &Reg : RegularRegisters)
2388  if (Str.startswith(Reg.Name))
2389  return &Reg;
2390  return nullptr;
2391 }
2392 
2393 static bool getRegNum(StringRef Str, unsigned& Num) {
2394  return !Str.getAsInteger(10, Num);
2395 }
2396 
2397 bool
2398 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2399  const AsmToken &NextToken) const {
2400 
2401  // A list of consecutive registers: [s0,s1,s2,s3]
2402  if (Token.is(AsmToken::LBrac))
2403  return true;
2404 
2405  if (!Token.is(AsmToken::Identifier))
2406  return false;
2407 
2408  // A single register like s0 or a range of registers like s[0:1]
2409 
2410  StringRef Str = Token.getString();
2411  const RegInfo *Reg = getRegularRegInfo(Str);
2412  if (Reg) {
2413  StringRef RegName = Reg->Name;
2414  StringRef RegSuffix = Str.substr(RegName.size());
2415  if (!RegSuffix.empty()) {
2416  unsigned Num;
2417  // A single register with an index: rXX
2418  if (getRegNum(RegSuffix, Num))
2419  return true;
2420  } else {
2421  // A range of registers: r[XX:YY].
2422  if (NextToken.is(AsmToken::LBrac))
2423  return true;
2424  }
2425  }
2426 
2427  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2428 }
2429 
2430 bool
2431 AMDGPUAsmParser::isRegister()
2432 {
2433  return isRegister(getToken(), peekToken());
2434 }
2435 
2436 unsigned
2437 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2438  unsigned RegNum,
2439  unsigned RegWidth,
2440  SMLoc Loc) {
2441 
2442  assert(isRegularReg(RegKind));
2443 
2444  unsigned AlignSize = 1;
2445  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2446  // SGPR and TTMP registers must be aligned.
2447  // Max required alignment is 4 dwords.
2448  AlignSize = std::min(RegWidth, 4u);
2449  }
2450 
2451  if (RegNum % AlignSize != 0) {
2452  Error(Loc, "invalid register alignment");
2453  return AMDGPU::NoRegister;
2454  }
2455 
2456  unsigned RegIdx = RegNum / AlignSize;
2457  int RCID = getRegClass(RegKind, RegWidth);
2458  if (RCID == -1) {
2459  Error(Loc, "invalid or unsupported register size");
2460  return AMDGPU::NoRegister;
2461  }
2462 
2463  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2464  const MCRegisterClass RC = TRI->getRegClass(RCID);
2465  if (RegIdx >= RC.getNumRegs()) {
2466  Error(Loc, "register index is out of range");
2467  return AMDGPU::NoRegister;
2468  }
2469 
2470  return RC.getRegister(RegIdx);
2471 }
2472 
2473 bool
2474 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2475  int64_t RegLo, RegHi;
2476  if (!skipToken(AsmToken::LBrac, "missing register index"))
2477  return false;
2478 
2479  SMLoc FirstIdxLoc = getLoc();
2480  SMLoc SecondIdxLoc;
2481 
2482  if (!parseExpr(RegLo))
2483  return false;
2484 
2485  if (trySkipToken(AsmToken::Colon)) {
2486  SecondIdxLoc = getLoc();
2487  if (!parseExpr(RegHi))
2488  return false;
2489  } else {
2490  RegHi = RegLo;
2491  }
2492 
2493  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2494  return false;
2495 
2496  if (!isUInt<32>(RegLo)) {
2497  Error(FirstIdxLoc, "invalid register index");
2498  return false;
2499  }
2500 
2501  if (!isUInt<32>(RegHi)) {
2502  Error(SecondIdxLoc, "invalid register index");
2503  return false;
2504  }
2505 
2506  if (RegLo > RegHi) {
2507  Error(FirstIdxLoc, "first register index should not exceed second index");
2508  return false;
2509  }
2510 
2511  Num = static_cast<unsigned>(RegLo);
2512  Width = (RegHi - RegLo) + 1;
2513  return true;
2514 }
2515 
2516 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2517  unsigned &RegNum, unsigned &RegWidth,
2518  SmallVectorImpl<AsmToken> &Tokens) {
2519  assert(isToken(AsmToken::Identifier));
2520  unsigned Reg = getSpecialRegForName(getTokenStr());
2521  if (Reg) {
2522  RegNum = 0;
2523  RegWidth = 1;
2524  RegKind = IS_SPECIAL;
2525  Tokens.push_back(getToken());
2526  lex(); // skip register name
2527  }
2528  return Reg;
2529 }
2530 
2531 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2532  unsigned &RegNum, unsigned &RegWidth,
2533  SmallVectorImpl<AsmToken> &Tokens) {
2534  assert(isToken(AsmToken::Identifier));
2535  StringRef RegName = getTokenStr();
2536  auto Loc = getLoc();
2537 
2538  const RegInfo *RI = getRegularRegInfo(RegName);
2539  if (!RI) {
2540  Error(Loc, "invalid register name");
2541  return AMDGPU::NoRegister;
2542  }
2543 
2544  Tokens.push_back(getToken());
2545  lex(); // skip register name
2546 
2547  RegKind = RI->Kind;
2548  StringRef RegSuffix = RegName.substr(RI->Name.size());
2549  if (!RegSuffix.empty()) {
2550  // Single 32-bit register: vXX.
2551  if (!getRegNum(RegSuffix, RegNum)) {
2552  Error(Loc, "invalid register index");
2553  return AMDGPU::NoRegister;
2554  }
2555  RegWidth = 1;
2556  } else {
2557  // Range of registers: v[XX:YY]. ":YY" is optional.
2558  if (!ParseRegRange(RegNum, RegWidth))
2559  return AMDGPU::NoRegister;
2560  }
2561 
2562  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2563 }
2564 
2565 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2566  unsigned &RegWidth,
2567  SmallVectorImpl<AsmToken> &Tokens) {
2568  unsigned Reg = AMDGPU::NoRegister;
2569  auto ListLoc = getLoc();
2570 
2571  if (!skipToken(AsmToken::LBrac,
2572  "expected a register or a list of registers")) {
2573  return AMDGPU::NoRegister;
2574  }
2575 
2576  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2577 
2578  auto Loc = getLoc();
2579  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2580  return AMDGPU::NoRegister;
2581  if (RegWidth != 1) {
2582  Error(Loc, "expected a single 32-bit register");
2583  return AMDGPU::NoRegister;
2584  }
2585 
2586  for (; trySkipToken(AsmToken::Comma); ) {
2587  RegisterKind NextRegKind;
2588  unsigned NextReg, NextRegNum, NextRegWidth;
2589  Loc = getLoc();
2590 
2591  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2592  NextRegNum, NextRegWidth,
2593  Tokens)) {
2594  return AMDGPU::NoRegister;
2595  }
2596  if (NextRegWidth != 1) {
2597  Error(Loc, "expected a single 32-bit register");
2598  return AMDGPU::NoRegister;
2599  }
2600  if (NextRegKind != RegKind) {
2601  Error(Loc, "registers in a list must be of the same kind");
2602  return AMDGPU::NoRegister;
2603  }
2604  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2605  return AMDGPU::NoRegister;
2606  }
2607 
2608  if (!skipToken(AsmToken::RBrac,
2609  "expected a comma or a closing square bracket")) {
2610  return AMDGPU::NoRegister;
2611  }
2612 
2613  if (isRegularReg(RegKind))
2614  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2615 
2616  return Reg;
2617 }
2618 
2619 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2620  unsigned &RegNum, unsigned &RegWidth,
2621  SmallVectorImpl<AsmToken> &Tokens) {
2622  auto Loc = getLoc();
2623  Reg = AMDGPU::NoRegister;
2624 
2625  if (isToken(AsmToken::Identifier)) {
2626  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2627  if (Reg == AMDGPU::NoRegister)
2628  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2629  } else {
2630  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2631  }
2632 
2633  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2634  if (Reg == AMDGPU::NoRegister) {
2635  assert(Parser.hasPendingError());
2636  return false;
2637  }
2638 
2639  if (!subtargetHasRegister(*TRI, Reg)) {
2640  if (Reg == AMDGPU::SGPR_NULL) {
2641  Error(Loc, "'null' operand is not supported on this GPU");
2642  } else {
2643  Error(Loc, "register not available on this GPU");
2644  }
2645  return false;
2646  }
2647 
2648  return true;
2649 }
2650 
2651 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2652  unsigned &RegNum, unsigned &RegWidth,
2653  bool RestoreOnFailure /*=false*/) {
2654  Reg = AMDGPU::NoRegister;
2655 
2656  SmallVector<AsmToken, 1> Tokens;
2657  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2658  if (RestoreOnFailure) {
2659  while (!Tokens.empty()) {
2660  getLexer().UnLex(Tokens.pop_back_val());
2661  }
2662  }
2663  return true;
2664  }
2665  return false;
2666 }
2667 
2669 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2670  switch (RegKind) {
2671  case IS_VGPR:
2672  return StringRef(".amdgcn.next_free_vgpr");
2673  case IS_SGPR:
2674  return StringRef(".amdgcn.next_free_sgpr");
2675  default:
2676  return None;
2677  }
2678 }
2679 
2680 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2681  auto SymbolName = getGprCountSymbolName(RegKind);
2682  assert(SymbolName && "initializing invalid register kind");
2683  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2684  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2685 }
2686 
2687 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2688  unsigned DwordRegIndex,
2689  unsigned RegWidth) {
2690  // Symbols are only defined for GCN targets
2691  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2692  return true;
2693 
2694  auto SymbolName = getGprCountSymbolName(RegKind);
2695  if (!SymbolName)
2696  return true;
2697  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2698 
2699  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2700  int64_t OldCount;
2701 
2702  if (!Sym->isVariable())
2703  return !Error(getLoc(),
2704  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2705  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2706  return !Error(
2707  getLoc(),
2708  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2709 
2710  if (OldCount <= NewMax)
2711  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2712 
2713  return true;
2714 }
2715 
2716 std::unique_ptr<AMDGPUOperand>
2717 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2718  const auto &Tok = getToken();
2719  SMLoc StartLoc = Tok.getLoc();
2720  SMLoc EndLoc = Tok.getEndLoc();
2721  RegisterKind RegKind;
2722  unsigned Reg, RegNum, RegWidth;
2723 
2724  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2725  return nullptr;
2726  }
2727  if (isHsaAbiVersion3Or4(&getSTI())) {
2728  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2729  return nullptr;
2730  } else
2731  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2732  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2733 }
2734 
2736 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2737  // TODO: add syntactic sugar for 1/(2*PI)
2738 
2739  assert(!isRegister());
2740  assert(!isModifier());
2741 
2742  const auto& Tok = getToken();
2743  const auto& NextTok = peekToken();
2744  bool IsReal = Tok.is(AsmToken::Real);
2745  SMLoc S = getLoc();
2746  bool Negate = false;
2747 
2748  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2749  lex();
2750  IsReal = true;
2751  Negate = true;
2752  }
2753 
2754  if (IsReal) {
2755  // Floating-point expressions are not supported.
2756  // Can only allow floating-point literals with an
2757  // optional sign.
2758 
2759  StringRef Num = getTokenStr();
2760  lex();
2761 
2762  APFloat RealVal(APFloat::IEEEdouble());
2763  auto roundMode = APFloat::rmNearestTiesToEven;
2764  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2765  return MatchOperand_ParseFail;
2766  }
2767  if (Negate)
2768  RealVal.changeSign();
2769 
2770  Operands.push_back(
2771  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2772  AMDGPUOperand::ImmTyNone, true));
2773 
2774  return MatchOperand_Success;
2775 
2776  } else {
2777  int64_t IntVal;
2778  const MCExpr *Expr;
2779  SMLoc S = getLoc();
2780 
2781  if (HasSP3AbsModifier) {
2782  // This is a workaround for handling expressions
2783  // as arguments of SP3 'abs' modifier, for example:
2784  // |1.0|
2785  // |-1|
2786  // |1+x|
2787  // This syntax is not compatible with syntax of standard
2788  // MC expressions (due to the trailing '|').
2789  SMLoc EndLoc;
2790  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2791  return MatchOperand_ParseFail;
2792  } else {
2793  if (Parser.parseExpression(Expr))
2794  return MatchOperand_ParseFail;
2795  }
2796 
2797  if (Expr->evaluateAsAbsolute(IntVal)) {
2798  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2799  } else {
2800  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2801  }
2802 
2803  return MatchOperand_Success;
2804  }
2805 
2806  return MatchOperand_NoMatch;
2807 }
2808 
2810 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2811  if (!isRegister())
2812  return MatchOperand_NoMatch;
2813 
2814  if (auto R = parseRegister()) {
2815  assert(R->isReg());
2816  Operands.push_back(std::move(R));
2817  return MatchOperand_Success;
2818  }
2819  return MatchOperand_ParseFail;
2820 }
2821 
2823 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2824  auto res = parseReg(Operands);
2825  if (res != MatchOperand_NoMatch) {
2826  return res;
2827  } else if (isModifier()) {
2828  return MatchOperand_NoMatch;
2829  } else {
2830  return parseImm(Operands, HasSP3AbsMod);
2831  }
2832 }
2833 
2834 bool
2835 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2836  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2837  const auto &str = Token.getString();
2838  return str == "abs" || str == "neg" || str == "sext";
2839  }
2840  return false;
2841 }
2842 
2843 bool
2844 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2845  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2846 }
2847 
2848 bool
2849 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2850  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2851 }
2852 
2853 bool
2854 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2855  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2856 }
2857 
2858 // Check if this is an operand modifier or an opcode modifier
2859 // which may look like an expression but it is not. We should
2860 // avoid parsing these modifiers as expressions. Currently
2861 // recognized sequences are:
2862 // |...|
2863 // abs(...)
2864 // neg(...)
2865 // sext(...)
2866 // -reg
2867 // -|...|
2868 // -abs(...)
2869 // name:...
2870 // Note that simple opcode modifiers like 'gds' may be parsed as
2871 // expressions; this is a special case. See getExpressionAsToken.
2872 //
2873 bool
2874 AMDGPUAsmParser::isModifier() {
2875 
2876  AsmToken Tok = getToken();
2877  AsmToken NextToken[2];
2878  peekTokens(NextToken);
2879 
2880  return isOperandModifier(Tok, NextToken[0]) ||
2881  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2882  isOpcodeModifierWithVal(Tok, NextToken[0]);
2883 }
2884 
2885 // Check if the current token is an SP3 'neg' modifier.
2886 // Currently this modifier is allowed in the following context:
2887 //
2888 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2889 // 2. Before an 'abs' modifier: -abs(...)
2890 // 3. Before an SP3 'abs' modifier: -|...|
2891 //
2892 // In all other cases "-" is handled as a part
2893 // of an expression that follows the sign.
2894 //
2895 // Note: When "-" is followed by an integer literal,
2896 // this is interpreted as integer negation rather
2897 // than a floating-point NEG modifier applied to N.
2898 // Beside being contr-intuitive, such use of floating-point
2899 // NEG modifier would have resulted in different meaning
2900 // of integer literals used with VOP1/2/C and VOP3,
2901 // for example:
2902 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2903 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2904 // Negative fp literals with preceding "-" are
2905 // handled likewise for unifomtity
2906 //
2907 bool
2908 AMDGPUAsmParser::parseSP3NegModifier() {
2909 
2910  AsmToken NextToken[2];
2911  peekTokens(NextToken);
2912 
2913  if (isToken(AsmToken::Minus) &&
2914  (isRegister(NextToken[0], NextToken[1]) ||
2915  NextToken[0].is(AsmToken::Pipe) ||
2916  isId(NextToken[0], "abs"))) {
2917  lex();
2918  return true;
2919  }
2920 
2921  return false;
2922 }
2923 
2925 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2926  bool AllowImm) {
2927  bool Neg, SP3Neg;
2928  bool Abs, SP3Abs;
2929  SMLoc Loc;
2930 
2931  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2932  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2933  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2934  return MatchOperand_ParseFail;
2935  }
2936 
2937  SP3Neg = parseSP3NegModifier();
2938 
2939  Loc = getLoc();
2940  Neg = trySkipId("neg");
2941  if (Neg && SP3Neg) {
2942  Error(Loc, "expected register or immediate");
2943  return MatchOperand_ParseFail;
2944  }
2945  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2946  return MatchOperand_ParseFail;
2947 
2948  Abs = trySkipId("abs");
2949  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2950  return MatchOperand_ParseFail;
2951 
2952  Loc = getLoc();
2953  SP3Abs = trySkipToken(AsmToken::Pipe);
2954  if (Abs && SP3Abs) {
2955  Error(Loc, "expected register or immediate");
2956  return MatchOperand_ParseFail;
2957  }
2958 
2960  if (AllowImm) {
2961  Res = parseRegOrImm(Operands, SP3Abs);
2962  } else {
2963  Res = parseReg(Operands);
2964  }
2965  if (Res != MatchOperand_Success) {
2966  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2967  }
2968 
2969  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2970  return MatchOperand_ParseFail;
2971  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2972  return MatchOperand_ParseFail;
2973  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2974  return MatchOperand_ParseFail;
2975 
2976  AMDGPUOperand::Modifiers Mods;
2977  Mods.Abs = Abs || SP3Abs;
2978  Mods.Neg = Neg || SP3Neg;
2979 
2980  if (Mods.hasFPModifiers()) {
2981  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2982  if (Op.isExpr()) {
2983  Error(Op.getStartLoc(), "expected an absolute expression");
2984  return MatchOperand_ParseFail;
2985  }
2986  Op.setModifiers(Mods);
2987  }
2988  return MatchOperand_Success;
2989 }
2990 
2992 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2993  bool AllowImm) {
2994  bool Sext = trySkipId("sext");
2995  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2996  return MatchOperand_ParseFail;
2997 
2999  if (AllowImm) {
3000  Res = parseRegOrImm(Operands);
3001  } else {
3002  Res = parseReg(Operands);
3003  }
3004  if (Res != MatchOperand_Success) {
3005  return Sext? MatchOperand_ParseFail : Res;
3006  }
3007 
3008  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3009  return MatchOperand_ParseFail;
3010 
3011  AMDGPUOperand::Modifiers Mods;
3012  Mods.Sext = Sext;
3013 
3014  if (Mods.hasIntModifiers()) {
3015  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3016  if (Op.isExpr()) {
3017  Error(Op.getStartLoc(), "expected an absolute expression");
3018  return MatchOperand_ParseFail;
3019  }
3020  Op.setModifiers(Mods);
3021  }
3022 
3023  return MatchOperand_Success;
3024 }
3025 
3027 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3028  return parseRegOrImmWithFPInputMods(Operands, false);
3029 }
3030 
3032 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3033  return parseRegOrImmWithIntInputMods(Operands, false);
3034 }
3035 
3036 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3037  auto Loc = getLoc();
3038  if (trySkipId("off")) {
3039  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3040  AMDGPUOperand::ImmTyOff, false));
3041  return MatchOperand_Success;
3042  }
3043 
3044  if (!isRegister())
3045  return MatchOperand_NoMatch;
3046 
3047  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3048  if (Reg) {
3049  Operands.push_back(std::move(Reg));
3050  return MatchOperand_Success;
3051  }
3052 
3053  return MatchOperand_ParseFail;
3054 
3055 }
3056 
3057 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3058  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3059 
3060  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3061  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3062  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3063  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3064  return Match_InvalidOperand;
3065 
3066  if ((TSFlags & SIInstrFlags::VOP3) &&
3067  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3068  getForcedEncodingSize() != 64)
3069  return Match_PreferE32;
3070 
3071  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3072  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3073  // v_mac_f32/16 allow only dst_sel == DWORD;
3074  auto OpNum =
3075  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3076  const auto &Op = Inst.getOperand(OpNum);
3077  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3078  return Match_InvalidOperand;
3079  }
3080  }
3081 
3082  return Match_Success;
3083 }
3084 
3086  static const unsigned Variants[] = {
3089  };
3090 
3091  return makeArrayRef(Variants);
3092 }
3093 
3094 // What asm variants we should check
3095 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3096  if (getForcedEncodingSize() == 32) {
3097  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3098  return makeArrayRef(Variants);
3099  }
3100 
3101  if (isForcedVOP3()) {
3102  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3103  return makeArrayRef(Variants);
3104  }
3105 
3106  if (isForcedSDWA()) {
3107  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3109  return makeArrayRef(Variants);
3110  }
3111 
3112  if (isForcedDPP()) {
3113  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3114  return makeArrayRef(Variants);
3115  }
3116 
3117  return getAllVariants();
3118 }
3119 
3120 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3121  if (getForcedEncodingSize() == 32)
3122  return "e32";
3123 
3124  if (isForcedVOP3())
3125  return "e64";
3126 
3127  if (isForcedSDWA())
3128  return "sdwa";
3129 
3130  if (isForcedDPP())
3131  return "dpp";
3132 
3133  return "";
3134 }
3135 
3136 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3137  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3138  const unsigned Num = Desc.getNumImplicitUses();
3139  for (unsigned i = 0; i < Num; ++i) {
3140  unsigned Reg = Desc.ImplicitUses[i];
3141  switch (Reg) {
3142  case AMDGPU::FLAT_SCR:
3143  case AMDGPU::VCC:
3144  case AMDGPU::VCC_LO:
3145  case AMDGPU::VCC_HI:
3146  case AMDGPU::M0:
3147  return Reg;
3148  default:
3149  break;
3150  }
3151  }
3152  return AMDGPU::NoRegister;
3153 }
3154 
3155 // NB: This code is correct only when used to check constant
3156 // bus limitations because GFX7 support no f16 inline constants.
3157 // Note that there are no cases when a GFX7 opcode violates
3158 // constant bus limitations due to the use of an f16 constant.
3159 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3160  unsigned OpIdx) const {
3161  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3162 
3163  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3164  return false;
3165  }
3166 
3167  const MCOperand &MO = Inst.getOperand(OpIdx);
3168 
3169  int64_t Val = MO.getImm();
3170  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3171 
3172  switch (OpSize) { // expected operand size
3173  case 8:
3174  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3175  case 4:
3176  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3177  case 2: {
3178  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3182  return AMDGPU::isInlinableIntLiteral(Val);
3183 
3188 
3192  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3193 
3194  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3195  }
3196  default:
3197  llvm_unreachable("invalid operand size");
3198  }
3199 }
3200 
3201 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3202  if (!isGFX10Plus())
3203  return 1;
3204 
3205  switch (Opcode) {
3206  // 64-bit shift instructions can use only one scalar value input
3207  case AMDGPU::V_LSHLREV_B64_e64:
3208  case AMDGPU::V_LSHLREV_B64_gfx10:
3209  case AMDGPU::V_LSHRREV_B64_e64:
3210  case AMDGPU::V_LSHRREV_B64_gfx10:
3211  case AMDGPU::V_ASHRREV_I64_e64:
3212  case AMDGPU::V_ASHRREV_I64_gfx10:
3213  case AMDGPU::V_LSHL_B64_e64:
3214  case AMDGPU::V_LSHR_B64_e64:
3215  case AMDGPU::V_ASHR_I64_e64:
3216  return 1;
3217  default:
3218  return 2;
3219  }
3220 }
3221 
3222 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3223  const MCOperand &MO = Inst.getOperand(OpIdx);
3224  if (MO.isImm()) {
3225  return !isInlineConstant(Inst, OpIdx);
3226  } else if (MO.isReg()) {
3227  auto Reg = MO.getReg();
3228  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3229  auto PReg = mc2PseudoReg(Reg);
3230  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3231  } else {
3232  return true;
3233  }
3234 }
3235 
3236 bool
3237 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3238  const OperandVector &Operands) {
3239  const unsigned Opcode = Inst.getOpcode();
3240  const MCInstrDesc &Desc = MII.get(Opcode);
3241  unsigned LastSGPR = AMDGPU::NoRegister;
3242  unsigned ConstantBusUseCount = 0;
3243  unsigned NumLiterals = 0;
3244  unsigned LiteralSize;
3245 
3246  if (Desc.TSFlags &
3250  SIInstrFlags::SDWA)) {
3251  // Check special imm operands (used by madmk, etc)
3252  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3253  ++ConstantBusUseCount;
3254  }
3255 
3256  SmallDenseSet<unsigned> SGPRsUsed;
3257  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3258  if (SGPRUsed != AMDGPU::NoRegister) {
3259  SGPRsUsed.insert(SGPRUsed);
3260  ++ConstantBusUseCount;
3261  }
3262 
3263  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3264  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3265  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3266 
3267  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3268 
3269  for (int OpIdx : OpIndices) {
3270  if (OpIdx == -1) break;
3271 
3272  const MCOperand &MO = Inst.getOperand(OpIdx);
3273  if (usesConstantBus(Inst, OpIdx)) {
3274  if (MO.isReg()) {
3275  LastSGPR = mc2PseudoReg(MO.getReg());
3276  // Pairs of registers with a partial intersections like these
3277  // s0, s[0:1]
3278  // flat_scratch_lo, flat_scratch
3279  // flat_scratch_lo, flat_scratch_hi
3280  // are theoretically valid but they are disabled anyway.
3281  // Note that this code mimics SIInstrInfo::verifyInstruction
3282  if (!SGPRsUsed.count(LastSGPR)) {
3283  SGPRsUsed.insert(LastSGPR);
3284  ++ConstantBusUseCount;
3285  }
3286  } else { // Expression or a literal
3287 
3288  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3289  continue; // special operand like VINTERP attr_chan
3290 
3291  // An instruction may use only one literal.
3292  // This has been validated on the previous step.
3293  // See validateVOP3Literal.
3294  // This literal may be used as more than one operand.
3295  // If all these operands are of the same size,
3296  // this literal counts as one scalar value.
3297  // Otherwise it counts as 2 scalar values.
3298  // See "GFX10 Shader Programming", section 3.6.2.3.
3299 
3300  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3301  if (Size < 4) Size = 4;
3302 
3303  if (NumLiterals == 0) {
3304  NumLiterals = 1;
3305  LiteralSize = Size;
3306  } else if (LiteralSize != Size) {
3307  NumLiterals = 2;
3308  }
3309  }
3310  }
3311  }
3312  }
3313  ConstantBusUseCount += NumLiterals;
3314 
3315  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3316  return true;
3317 
3318  SMLoc LitLoc = getLitLoc(Operands);
3319  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3320  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3321  Error(Loc, "invalid operand (violates constant bus restrictions)");
3322  return false;
3323 }
3324 
3325 bool
3326 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3327  const OperandVector &Operands) {
3328  const unsigned Opcode = Inst.getOpcode();
3329  const MCInstrDesc &Desc = MII.get(Opcode);
3330 
3331  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3332  if (DstIdx == -1 ||
3333  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3334  return true;
3335  }
3336 
3337  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3338 
3339  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3340  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3341  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3342 
3343  assert(DstIdx != -1);
3344  const MCOperand &Dst = Inst.getOperand(DstIdx);
3345  assert(Dst.isReg());
3346  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3347 
3348  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3349 
3350  for (int SrcIdx : SrcIndices) {
3351  if (SrcIdx == -1) break;
3352  const MCOperand &Src = Inst.getOperand(SrcIdx);
3353  if (Src.isReg()) {
3354  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3355  if (isRegIntersect(DstReg, SrcReg, TRI)) {
3356  Error(getRegLoc(SrcReg, Operands),
3357  "destination must be different than all sources");
3358  return false;
3359  }
3360  }
3361  }
3362 
3363  return true;
3364 }
3365 
3366 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3367 
3368  const unsigned Opc = Inst.getOpcode();
3369  const MCInstrDesc &Desc = MII.get(Opc);
3370 
3371  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3372  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3373  assert(ClampIdx != -1);
3374  return Inst.getOperand(ClampIdx).getImm() == 0;
3375  }
3376 
3377  return true;
3378 }
3379 
3380 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3381 
3382  const unsigned Opc = Inst.getOpcode();
3383  const MCInstrDesc &Desc = MII.get(Opc);
3384 
3385  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3386  return true;
3387 
3388  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3389  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3390  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3391 
3392  assert(VDataIdx != -1);
3393 
3394  if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3395  return true;
3396 
3397  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3398  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3399  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3400  if (DMask == 0)
3401  DMask = 1;
3402 
3403  unsigned DataSize =
3404  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3405  if (hasPackedD16()) {
3406  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3407  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3408  DataSize = (DataSize + 1) / 2;
3409  }
3410 
3411  return (VDataSize / 4) == DataSize + TFESize;
3412 }
3413 
3414 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3415  const unsigned Opc = Inst.getOpcode();
3416  const MCInstrDesc &Desc = MII.get(Opc);
3417 
3418  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3419  return true;
3420 
3422 
3423  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3424  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3425  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3426  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3427  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3428  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3429 
3430  assert(VAddr0Idx != -1);
3431  assert(SrsrcIdx != -1);
3432  assert(SrsrcIdx > VAddr0Idx);
3433 
3434  if (DimIdx == -1)
3435  return true; // intersect_ray
3436 
3437  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3439  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3440  unsigned ActualAddrSize =
3441  IsNSA ? SrsrcIdx - VAddr0Idx
3442  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3443  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3444 
3445  unsigned ExpectedAddrSize =
3446  AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3447 
3448  if (!IsNSA) {
3449  if (ExpectedAddrSize > 8)
3450  ExpectedAddrSize = 16;
3451 
3452  // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3453  // This provides backward compatibility for assembly created
3454  // before 160b/192b/224b types were directly supported.
3455  if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3456  return true;
3457  }
3458 
3459  return ActualAddrSize == ExpectedAddrSize;
3460 }
3461 
3462 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3463 
3464  const unsigned Opc = Inst.getOpcode();
3465  const MCInstrDesc &Desc = MII.get(Opc);
3466 
3467  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3468  return true;
3469  if (!Desc.mayLoad() || !Desc.mayStore())
3470  return true; // Not atomic
3471 
3472  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3473  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3474 
3475  // This is an incomplete check because image_atomic_cmpswap
3476  // may only use 0x3 and 0xf while other atomic operations
3477  // may use 0x1 and 0x3. However these limitations are
3478  // verified when we check that dmask matches dst size.
3479  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3480 }
3481 
3482 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3483 
3484  const unsigned Opc = Inst.getOpcode();
3485  const MCInstrDesc &Desc = MII.get(Opc);
3486 
3487  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3488  return true;
3489 
3490  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3491  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3492 
3493  // GATHER4 instructions use dmask in a different fashion compared to
3494  // other MIMG instructions. The only useful DMASK values are
3495  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3496  // (red,red,red,red) etc.) The ISA document doesn't mention
3497  // this.
3498  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3499 }
3500 
3501 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3502  const unsigned Opc = Inst.getOpcode();
3503  const MCInstrDesc &Desc = MII.get(Opc);
3504 
3505  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3506  return true;
3507 
3509  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3510  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3511 
3512  if (!BaseOpcode->MSAA)
3513  return true;
3514 
3515  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3516  assert(DimIdx != -1);
3517 
3518  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3520 
3521  return DimInfo->MSAA;
3522 }
3523 
3524 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3525 {
3526  switch (Opcode) {
3527  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3528  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3529  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3530  return true;
3531  default:
3532  return false;
3533  }
3534 }
3535 
3536 // movrels* opcodes should only allow VGPRS as src0.
3537 // This is specified in .td description for vop1/vop3,
3538 // but sdwa is handled differently. See isSDWAOperand.
3539 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3540  const OperandVector &Operands) {
3541 
3542  const unsigned Opc = Inst.getOpcode();
3543  const MCInstrDesc &Desc = MII.get(Opc);
3544 
3545  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3546  return true;
3547 
3548  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3549  assert(Src0Idx != -1);
3550 
3551  SMLoc ErrLoc;
3552  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3553  if (Src0.isReg()) {
3554  auto Reg = mc2PseudoReg(Src0.getReg());
3555  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3556  if (!isSGPR(Reg, TRI))
3557  return true;
3558  ErrLoc = getRegLoc(Reg, Operands);
3559  } else {
3560  ErrLoc = getConstLoc(Operands);
3561  }
3562 
3563  Error(ErrLoc, "source operand must be a VGPR");
3564  return false;
3565 }
3566 
3567 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3568  const OperandVector &Operands) {
3569 
3570  const unsigned Opc = Inst.getOpcode();
3571 
3572  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3573  return true;
3574 
3575  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3576  assert(Src0Idx != -1);
3577 
3578  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3579  if (!Src0.isReg())
3580  return true;
3581 
3582  auto Reg = mc2PseudoReg(Src0.getReg());
3583  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3584  if (isSGPR(Reg, TRI)) {
3585  Error(getRegLoc(Reg, Operands),
3586  "source operand must be either a VGPR or an inline constant");
3587  return false;
3588  }
3589 
3590  return true;
3591 }
3592 
3593 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3594  switch (Inst.getOpcode()) {
3595  default:
3596  return true;
3597  case V_DIV_SCALE_F32_gfx6_gfx7:
3598  case V_DIV_SCALE_F32_vi:
3599  case V_DIV_SCALE_F32_gfx10:
3600  case V_DIV_SCALE_F64_gfx6_gfx7:
3601  case V_DIV_SCALE_F64_vi:
3602  case V_DIV_SCALE_F64_gfx10:
3603  break;
3604  }
3605 
3606  // TODO: Check that src0 = src1 or src2.
3607 
3608  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3609  AMDGPU::OpName::src2_modifiers,
3610  AMDGPU::OpName::src2_modifiers}) {
3612  .getImm() &
3613  SISrcMods::ABS) {
3614  return false;
3615  }
3616  }
3617 
3618  return true;
3619 }
3620 
3621 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3622 
3623  const unsigned Opc = Inst.getOpcode();
3624  const MCInstrDesc &Desc = MII.get(Opc);
3625 
3626  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3627  return true;
3628 
3629  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3630  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3631  if (isCI() || isSI())
3632  return false;
3633  }
3634 
3635  return true;
3636 }
3637 
3638 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3639  const unsigned Opc = Inst.getOpcode();
3640  const MCInstrDesc &Desc = MII.get(Opc);
3641 
3642  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3643  return true;
3644 
3645  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3646  if (DimIdx < 0)
3647  return true;
3648 
3649  long Imm = Inst.getOperand(DimIdx).getImm();
3650  if (Imm < 0 || Imm >= 8)
3651  return false;
3652 
3653  return true;
3654 }
3655 
3656 static bool IsRevOpcode(const unsigned Opcode)
3657 {
3658  switch (Opcode) {
3659  case AMDGPU::V_SUBREV_F32_e32:
3660  case AMDGPU::V_SUBREV_F32_e64:
3661  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3662  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3663  case AMDGPU::V_SUBREV_F32_e32_vi:
3664  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3665  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3666  case AMDGPU::V_SUBREV_F32_e64_vi:
3667 
3668  case AMDGPU::V_SUBREV_CO_U32_e32:
3669  case AMDGPU::V_SUBREV_CO_U32_e64:
3670  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3671  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3672 
3673  case AMDGPU::V_SUBBREV_U32_e32:
3674  case AMDGPU::V_SUBBREV_U32_e64:
3675  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3676  case AMDGPU::V_SUBBREV_U32_e32_vi:
3677  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3678  case AMDGPU::V_SUBBREV_U32_e64_vi:
3679 
3680  case AMDGPU::V_SUBREV_U32_e32:
3681  case AMDGPU::V_SUBREV_U32_e64:
3682  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3683  case AMDGPU::V_SUBREV_U32_e32_vi:
3684  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3685  case AMDGPU::V_SUBREV_U32_e64_vi:
3686 
3687  case AMDGPU::V_SUBREV_F16_e32:
3688  case AMDGPU::V_SUBREV_F16_e64:
3689  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3690  case AMDGPU::V_SUBREV_F16_e32_vi:
3691  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3692  case AMDGPU::V_SUBREV_F16_e64_vi:
3693 
3694  case AMDGPU::V_SUBREV_U16_e32:
3695  case AMDGPU::V_SUBREV_U16_e64:
3696  case AMDGPU::V_SUBREV_U16_e32_vi:
3697  case AMDGPU::V_SUBREV_U16_e64_vi:
3698 
3699  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3700  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3701  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3702 
3703  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3704  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3705 
3706  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3707  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3708 
3709  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3710  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3711 
3712  case AMDGPU::V_LSHRREV_B32_e32:
3713  case AMDGPU::V_LSHRREV_B32_e64:
3714  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3715  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3716  case AMDGPU::V_LSHRREV_B32_e32_vi:
3717  case AMDGPU::V_LSHRREV_B32_e64_vi:
3718  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3719  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3720 
3721  case AMDGPU::V_ASHRREV_I32_e32:
3722  case AMDGPU::V_ASHRREV_I32_e64:
3723  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3724  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3725  case AMDGPU::V_ASHRREV_I32_e32_vi:
3726  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3727  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3728  case AMDGPU::V_ASHRREV_I32_e64_vi:
3729 
3730  case AMDGPU::V_LSHLREV_B32_e32:
3731  case AMDGPU::V_LSHLREV_B32_e64:
3732  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3733  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3734  case AMDGPU::V_LSHLREV_B32_e32_vi:
3735  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3736  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3737  case AMDGPU::V_LSHLREV_B32_e64_vi:
3738 
3739  case AMDGPU::V_LSHLREV_B16_e32:
3740  case AMDGPU::V_LSHLREV_B16_e64:
3741  case AMDGPU::V_LSHLREV_B16_e32_vi:
3742  case AMDGPU::V_LSHLREV_B16_e64_vi:
3743  case AMDGPU::V_LSHLREV_B16_gfx10:
3744 
3745  case AMDGPU::V_LSHRREV_B16_e32:
3746  case AMDGPU::V_LSHRREV_B16_e64:
3747  case AMDGPU::V_LSHRREV_B16_e32_vi:
3748  case AMDGPU::V_LSHRREV_B16_e64_vi:
3749  case AMDGPU::V_LSHRREV_B16_gfx10:
3750 
3751  case AMDGPU::V_ASHRREV_I16_e32:
3752  case AMDGPU::V_ASHRREV_I16_e64:
3753  case AMDGPU::V_ASHRREV_I16_e32_vi:
3754  case AMDGPU::V_ASHRREV_I16_e64_vi:
3755  case AMDGPU::V_ASHRREV_I16_gfx10:
3756 
3757  case AMDGPU::V_LSHLREV_B64_e64:
3758  case AMDGPU::V_LSHLREV_B64_gfx10:
3759  case AMDGPU::V_LSHLREV_B64_vi:
3760 
3761  case AMDGPU::V_LSHRREV_B64_e64:
3762  case AMDGPU::V_LSHRREV_B64_gfx10:
3763  case AMDGPU::V_LSHRREV_B64_vi:
3764 
3765  case AMDGPU::V_ASHRREV_I64_e64:
3766  case AMDGPU::V_ASHRREV_I64_gfx10:
3767  case AMDGPU::V_ASHRREV_I64_vi:
3768 
3769  case AMDGPU::V_PK_LSHLREV_B16:
3770  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3771  case AMDGPU::V_PK_LSHLREV_B16_vi:
3772 
3773  case AMDGPU::V_PK_LSHRREV_B16:
3774  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3775  case AMDGPU::V_PK_LSHRREV_B16_vi:
3776  case AMDGPU::V_PK_ASHRREV_I16:
3777  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3778  case AMDGPU::V_PK_ASHRREV_I16_vi:
3779  return true;
3780  default:
3781  return false;
3782  }
3783 }
3784 
3785 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3786 
3787  using namespace SIInstrFlags;
3788  const unsigned Opcode = Inst.getOpcode();
3789  const MCInstrDesc &Desc = MII.get(Opcode);
3790 
3791  // lds_direct register is defined so that it can be used
3792  // with 9-bit operands only. Ignore encodings which do not accept these.
3793  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3794  if ((Desc.TSFlags & Enc) == 0)
3795  return None;
3796 
3797  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3798  auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3799  if (SrcIdx == -1)
3800  break;
3801  const auto &Src = Inst.getOperand(SrcIdx);
3802  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3803 
3804  if (isGFX90A())
3805  return StringRef("lds_direct is not supported on this GPU");
3806 
3807  if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3808  return StringRef("lds_direct cannot be used with this instruction");
3809 
3810  if (SrcName != OpName::src0)
3811  return StringRef("lds_direct may be used as src0 only");
3812  }
3813  }
3814 
3815  return None;
3816 }
3817 
3818 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3819  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3820  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3821  if (Op.isFlatOffset())
3822  return Op.getStartLoc();
3823  }
3824  return getLoc();
3825 }
3826 
3827 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3828  const OperandVector &Operands) {
3829  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3830  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3831  return true;
3832 
3833  auto Opcode = Inst.getOpcode();
3834  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3835  assert(OpNum != -1);
3836 
3837  const auto &Op = Inst.getOperand(OpNum);
3838  if (!hasFlatOffsets() && Op.getImm() != 0) {
3839  Error(getFlatOffsetLoc(Operands),
3840  "flat offset modifier is not supported on this GPU");
3841  return false;
3842  }
3843 
3844  // For FLAT segment the offset must be positive;
3845  // MSB is ignored and forced to zero.
3847  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3848  if (!isIntN(OffsetSize, Op.getImm())) {
3849  Error(getFlatOffsetLoc(Operands),
3850  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3851  return false;
3852  }
3853  } else {
3854  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3855  if (!isUIntN(OffsetSize, Op.getImm())) {
3856  Error(getFlatOffsetLoc(Operands),
3857  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3858  return false;
3859  }
3860  }
3861 
3862  return true;
3863 }
3864 
3865 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3866  // Start with second operand because SMEM Offset cannot be dst or src0.
3867  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3868  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3869  if (Op.isSMEMOffset())
3870  return Op.getStartLoc();
3871  }
3872  return getLoc();
3873 }
3874 
3875 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3876  const OperandVector &Operands) {
3877  if (isCI() || isSI())
3878  return true;
3879 
3880  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3881  if ((TSFlags & SIInstrFlags::SMRD) == 0)
3882  return true;
3883 
3884  auto Opcode = Inst.getOpcode();
3885  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3886  if (OpNum == -1)
3887  return true;
3888 
3889  const auto &Op = Inst.getOperand(OpNum);
3890  if (!Op.isImm())
3891  return true;
3892 
3893  uint64_t Offset = Op.getImm();
3894  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3896  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3897  return true;
3898 
3899  Error(getSMEMOffsetLoc(Operands),
3900  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3901  "expected a 21-bit signed offset");
3902 
3903  return false;
3904 }
3905 
3906 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3907  unsigned Opcode = Inst.getOpcode();
3908  const MCInstrDesc &Desc = MII.get(Opcode);
3909  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3910  return true;
3911 
3912  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3913  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3914 
3915  const int OpIndices[] = { Src0Idx, Src1Idx };
3916 
3917  unsigned NumExprs = 0;
3918  unsigned NumLiterals = 0;
3919  uint32_t LiteralValue;
3920 
3921  for (int OpIdx : OpIndices) {
3922  if (OpIdx == -1) break;
3923 
3924  const MCOperand &MO = Inst.getOperand(OpIdx);
3925  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3926  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3927  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3928  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3929  if (NumLiterals == 0 || LiteralValue != Value) {
3930  LiteralValue = Value;
3931  ++NumLiterals;
3932  }
3933  } else if (MO.isExpr()) {
3934  ++NumExprs;
3935  }
3936  }
3937  }
3938 
3939  return NumLiterals + NumExprs <= 1;
3940 }
3941 
3942 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3943  const unsigned Opc = Inst.getOpcode();
3944  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3945  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3946  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3947  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3948 
3949  if (OpSel & ~3)
3950  return false;
3951  }
3952  return true;
3953 }
3954 
3955 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3956  const OperandVector &Operands) {
3957  const unsigned Opc = Inst.getOpcode();
3958  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3959  if (DppCtrlIdx < 0)
3960  return true;
3961  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3962 
3964  // DPP64 is supported for row_newbcast only.
3965  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3966  if (Src0Idx >= 0 &&
3967  getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3968  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3969  Error(S, "64 bit dpp only supports row_newbcast");
3970  return false;
3971  }
3972  }
3973 
3974  return true;
3975 }
3976 
3977 // Check if VCC register matches wavefront size
3978 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3979  auto FB = getFeatureBits();
3980  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3981  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3982 }
3983 
3984 // VOP3 literal is only allowed in GFX10+ and only one can be used
3985 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3986  const OperandVector &Operands) {
3987  unsigned Opcode = Inst.getOpcode();
3988  const MCInstrDesc &Desc = MII.get(Opcode);
3990  return true;
3991 
3992  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3993  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3994  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3995 
3996  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3997 
3998  unsigned NumExprs = 0;
3999  unsigned NumLiterals = 0;
4000  uint32_t LiteralValue;
4001 
4002  for (int OpIdx : OpIndices) {
4003  if (OpIdx == -1) break;
4004 
4005  const MCOperand &MO = Inst.getOperand(OpIdx);
4006  if (!MO.isImm() && !MO.isExpr())
4007  continue;
4008  if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4009  continue;
4010 
4011  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4012  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4013  Error(getConstLoc(Operands),
4014  "inline constants are not allowed for this operand");
4015  return false;
4016  }
4017 
4018  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4019  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4020  if (NumLiterals == 0 || LiteralValue != Value) {
4021  LiteralValue = Value;
4022  ++NumLiterals;
4023  }
4024  } else if (MO.isExpr()) {
4025  ++NumExprs;
4026  }
4027  }
4028  NumLiterals += NumExprs;
4029 
4030  if (!NumLiterals)
4031  return true;
4032 
4033  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4034  Error(getLitLoc(Operands), "literal operands are not supported");
4035  return false;
4036  }
4037 
4038  if (NumLiterals > 1) {
4039  Error(getLitLoc(Operands), "only one literal operand is allowed");
4040  return false;
4041  }
4042 
4043  return true;
4044 }
4045 
4046 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4047 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4048  const MCRegisterInfo *MRI) {
4049  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4050  if (OpIdx < 0)
4051  return -1;
4052 
4053  const MCOperand &Op = Inst.getOperand(OpIdx);
4054  if (!Op.isReg())
4055  return -1;
4056 
4057  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4058  auto Reg = Sub ? Sub : Op.getReg();
4059  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4060  return AGPR32.contains(Reg) ? 1 : 0;
4061 }
4062 
4063 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4064  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4065  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4067  SIInstrFlags::DS)) == 0)
4068  return true;
4069 
4070  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4071  : AMDGPU::OpName::vdata;
4072 
4073  const MCRegisterInfo *MRI = getMRI();
4074  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4075  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4076 
4077  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4078  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4079  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4080  return false;
4081  }
4082 
4083  auto FB = getFeatureBits();
4084  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4085  if (DataAreg < 0 || DstAreg < 0)
4086  return true;
4087  return DstAreg == DataAreg;
4088  }
4089 
4090  return DstAreg < 1 && DataAreg < 1;
4091 }
4092 
4093 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4094  auto FB = getFeatureBits();
4095  if (!FB[AMDGPU::FeatureGFX90AInsts])
4096  return true;
4097 
4098  const MCRegisterInfo *MRI = getMRI();
4099  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4100  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4101  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4102  const MCOperand &Op = Inst.getOperand(I);
4103  if (!Op.isReg())
4104  continue;
4105 
4106  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4107  if (!Sub)
4108  continue;
4109 
4110  if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4111  return false;
4112  if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4113  return false;
4114  }
4115 
4116  return true;
4117 }
4118 
4119 // gfx90a has an undocumented limitation:
4120 // DS_GWS opcodes must use even aligned registers.
4121 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4122  const OperandVector &Operands) {
4123  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4124  return true;
4125 
4126  int Opc = Inst.getOpcode();
4127  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4128  Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4129  return true;
4130 
4131  const MCRegisterInfo *MRI = getMRI();
4132  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4133  int Data0Pos =
4134  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4135  assert(Data0Pos != -1);
4136  auto Reg = Inst.getOperand(Data0Pos).getReg();
4137  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4138  if (RegIdx & 1) {
4139  SMLoc RegLoc = getRegLoc(Reg, Operands);
4140  Error(RegLoc, "vgpr must be even aligned");
4141  return false;
4142  }
4143 
4144  return true;
4145 }
4146 
4147 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4148  const OperandVector &Operands,
4149  const SMLoc &IDLoc) {
4150  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4151  AMDGPU::OpName::cpol);
4152  if (CPolPos == -1)
4153  return true;
4154 
4155  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4156 
4157  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4158  if ((TSFlags & (SIInstrFlags::SMRD)) &&
4160  Error(IDLoc, "invalid cache policy for SMRD instruction");
4161  return false;
4162  }
4163 
4164  if (isGFX90A() && (CPol & CPol::SCC)) {
4165  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4166  StringRef CStr(S.getPointer());
4167  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4168  Error(S, "scc is not supported on this GPU");
4169  return false;
4170  }
4171 
4173  return true;
4174 
4175  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4176  if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4177  Error(IDLoc, "instruction must use glc");
4178  return false;
4179  }
4180  } else {
4181  if (CPol & CPol::GLC) {
4182  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4183  StringRef CStr(S.getPointer());
4184  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4185  Error(S, "instruction must not use glc");
4186  return false;
4187  }
4188  }
4189 
4190  return true;
4191 }
4192 
4193 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4194  const SMLoc &IDLoc,
4195  const OperandVector &Operands) {
4196  if (auto ErrMsg = validateLdsDirect(Inst)) {
4197  Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4198  return false;
4199  }
4200  if (!validateSOPLiteral(Inst)) {
4201  Error(getLitLoc(Operands),
4202  "only one literal operand is allowed");
4203  return false;
4204  }
4205  if (!validateVOP3Literal(Inst, Operands)) {
4206  return false;
4207  }
4208  if (!validateConstantBusLimitations(Inst, Operands)) {
4209  return false;
4210  }
4211  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4212  return false;
4213  }
4214  if (!validateIntClampSupported(Inst)) {
4215  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4216  "integer clamping is not supported on this GPU");
4217  return false;
4218  }
4219  if (!validateOpSel(Inst)) {
4220  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4221  "invalid op_sel operand");
4222  return false;
4223  }
4224  if (!validateDPP(Inst, Operands)) {
4225  return false;
4226  }
4227  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4228  if (!validateMIMGD16(Inst)) {
4229  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4230  "d16 modifier is not supported on this GPU");
4231  return false;
4232  }
4233  if (!validateMIMGDim(Inst)) {
4234  Error(IDLoc, "dim modifier is required on this GPU");
4235  return false;
4236  }
4237  if (!validateMIMGMSAA(Inst)) {
4238  Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4239  "invalid dim; must be MSAA type");
4240  return false;
4241  }
4242  if (!validateMIMGDataSize(Inst)) {
4243  Error(IDLoc,
4244  "image data size does not match dmask and tfe");
4245  return false;
4246  }
4247  if (!validateMIMGAddrSize(Inst)) {
4248  Error(IDLoc,
4249  "image address size does not match dim and a16");
4250  return false;
4251  }
4252  if (!validateMIMGAtomicDMask(Inst)) {
4253  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4254  "invalid atomic image dmask");
4255  return false;
4256  }
4257  if (!validateMIMGGatherDMask(Inst)) {
4258  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4259  "invalid image_gather dmask: only one bit must be set");
4260  return false;
4261  }
4262  if (!validateMovrels(Inst, Operands)) {
4263  return false;
4264  }
4265  if (!validateFlatOffset(Inst, Operands)) {
4266  return false;
4267  }
4268  if (!validateSMEMOffset(Inst, Operands)) {
4269  return false;
4270  }
4271  if (!validateMAIAccWrite(Inst, Operands)) {
4272  return false;
4273  }
4274  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4275  return false;
4276  }
4277 
4278  if (!validateAGPRLdSt(Inst)) {
4279  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4280  ? "invalid register class: data and dst should be all VGPR or AGPR"
4281  : "invalid register class: agpr loads and stores not supported on this GPU"
4282  );
4283  return false;
4284  }
4285  if (!validateVGPRAlign(Inst)) {
4286  Error(IDLoc,
4287  "invalid register class: vgpr tuples must be 64 bit aligned");
4288  return false;
4289  }
4290  if (!validateGWS(Inst, Operands)) {
4291  return false;
4292  }
4293 
4294  if (!validateDivScale(Inst)) {
4295  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4296  return false;
4297  }
4298  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4299  return false;
4300  }
4301 
4302  return true;
4303 }
4304 
4305 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4306  const FeatureBitset &FBS,
4307  unsigned VariantID = 0);
4308 
4309 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4310  const FeatureBitset &AvailableFeatures,
4311  unsigned VariantID);
4312 
4313 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4314  const FeatureBitset &FBS) {
4315  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4316 }
4317 
4318 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4319  const FeatureBitset &FBS,
4320  ArrayRef<unsigned> Variants) {
4321  for (auto Variant : Variants) {
4322  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4323  return true;
4324  }
4325 
4326  return false;
4327 }
4328 
4329 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4330  const SMLoc &IDLoc) {
4331  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4332 
4333  // Check if requested instruction variant is supported.
4334  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4335  return false;
4336 
4337  // This instruction is not supported.
4338  // Clear any other pending errors because they are no longer relevant.
4339  getParser().clearPendingErrors();
4340 
4341  // Requested instruction variant is not supported.
4342  // Check if any other variants are supported.
4343  StringRef VariantName = getMatchedVariantName();
4344  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4345  return Error(IDLoc,
4346  Twine(VariantName,
4347  " variant of this instruction is not supported"));
4348  }
4349 
4350  // Finally check if this instruction is supported on any other GPU.
4351  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4352  return Error(IDLoc, "instruction not supported on this GPU");
4353  }
4354 
4355  // Instruction not supported on any GPU. Probably a typo.
4356  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4357  return Error(IDLoc, "invalid instruction" + Suggestion);
4358 }
4359 
4360 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4362  MCStreamer &Out,
4364  bool MatchingInlineAsm) {
4365  MCInst Inst;
4366  unsigned Result = Match_Success;
4367  for (auto Variant : getMatchedVariants()) {
4368  uint64_t EI;
4369  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4370  Variant);
4371  // We order match statuses from least to most specific. We use most specific
4372  // status as resulting
4373  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4374  if ((R == Match_Success) ||
4375  (R == Match_PreferE32) ||
4376  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4377  (R == Match_InvalidOperand && Result != Match_MissingFeature
4378  && Result != Match_PreferE32) ||
4379  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4380  && Result != Match_MissingFeature
4381  && Result != Match_PreferE32)) {
4382  Result = R;
4383  ErrorInfo = EI;
4384  }
4385  if (R == Match_Success)
4386  break;
4387  }
4388 
4389  if (Result == Match_Success) {
4390  if (!validateInstruction(Inst, IDLoc, Operands)) {
4391  return true;
4392  }
4393  Inst.setLoc(IDLoc);
4394  Out.emitInstruction(Inst, getSTI());
4395  return false;
4396  }
4397 
4398  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4399  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4400  return true;
4401  }
4402 
4403  switch (Result) {
4404  default: break;
4405  case Match_MissingFeature:
4406  // It has been verified that the specified instruction
4407  // mnemonic is valid. A match was found but it requires
4408  // features which are not supported on this GPU.
4409  return Error(IDLoc, "operands are not valid for this GPU or mode");
4410 
4411  case Match_InvalidOperand: {
4412  SMLoc ErrorLoc = IDLoc;
4413  if (ErrorInfo != ~0ULL) {
4414  if (ErrorInfo >= Operands.size()) {
4415  return Error(IDLoc, "too few operands for instruction");
4416  }
4417  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4418  if (ErrorLoc == SMLoc())
4419  ErrorLoc = IDLoc;
4420  }
4421  return Error(ErrorLoc, "invalid operand for instruction");
4422  }
4423 
4424  case Match_PreferE32:
4425  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4426  "should be encoded as e32");
4427  case Match_MnemonicFail:
4428  llvm_unreachable("Invalid instructions should have been handled already");
4429  }
4430  llvm_unreachable("Implement any new match types added!");
4431 }
4432 
4433 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4434  int64_t Tmp = -1;
4435  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4436  return true;
4437  }
4438  if (getParser().parseAbsoluteExpression(Tmp)) {
4439  return true;
4440  }
4441  Ret = static_cast<uint32_t>(Tmp);
4442  return false;
4443 }
4444 
4445 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4446  uint32_t &Minor) {
4447  if (ParseAsAbsoluteExpression(Major))
4448  return TokError("invalid major version");
4449 
4450  if (!trySkipToken(AsmToken::Comma))
4451  return TokError("minor version number required, comma expected");
4452 
4453  if (ParseAsAbsoluteExpression(Minor))
4454  return TokError("invalid minor version");
4455 
4456  return false;
4457 }
4458 
4459 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4460  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4461  return TokError("directive only supported for amdgcn architecture");
4462 
4463  std::string TargetIDDirective;
4464  SMLoc TargetStart = getTok().getLoc();
4465  if (getParser().parseEscapedString(TargetIDDirective))
4466  return true;
4467 
4468  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4469  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4470  return getParser().Error(TargetRange.Start,
4471  (Twine(".amdgcn_target directive's target id ") +
4472  Twine(TargetIDDirective) +
4473  Twine(" does not match the specified target id ") +
4474  Twine(getTargetStreamer().getTargetID()->toString())).str());
4475 
4476  return false;
4477 }
4478 
4479 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4480  return Error(Range.Start, "value out of range", Range);
4481 }
4482 
4483 bool AMDGPUAsmParser::calculateGPRBlocks(
4484  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4485  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4486  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4487  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4488  // TODO(scott.linder): These calculations are duplicated from
4489  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4490  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4491 
4492  unsigned NumVGPRs = NextFreeVGPR;
4493  unsigned NumSGPRs = NextFreeSGPR;
4494 
4495  if (Version.Major >= 10)
4496  NumSGPRs = 0;
4497  else {
4498  unsigned MaxAddressableNumSGPRs =
4500 
4501  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4502  NumSGPRs > MaxAddressableNumSGPRs)
4503  return OutOfRangeError(SGPRRange);
4504 
4505  NumSGPRs +=
4506  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4507 
4508  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4509  NumSGPRs > MaxAddressableNumSGPRs)
4510  return OutOfRangeError(SGPRRange);
4511 
4512  if (Features.test(FeatureSGPRInitBug))
4514  }
4515 
4516  VGPRBlocks =
4517  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4518  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4519 
4520  return false;
4521 }
4522 
4523 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4524  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4525  return TokError("directive only supported for amdgcn architecture");
4526 
4527  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4528  return TokError("directive only supported for amdhsa OS");
4529 
4530  StringRef KernelName;
4531  if (getParser().parseIdentifier(KernelName))
4532  return true;
4533 
4535 
4536  StringSet<> Seen;
4537 
4538  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4539 
4540  SMRange VGPRRange;
4541  uint64_t NextFreeVGPR = 0;
4542  uint64_t AccumOffset = 0;
4543  SMRange SGPRRange;
4544  uint64_t NextFreeSGPR = 0;
4545  unsigned UserSGPRCount = 0;
4546  bool ReserveVCC = true;
4547  bool ReserveFlatScr = true;
4548  Optional<bool> EnableWavefrontSize32;
4549 
4550  while (true) {
4551  while (trySkipToken(AsmToken::EndOfStatement));
4552 
4553  StringRef ID;
4554  SMRange IDRange = getTok().getLocRange();
4555  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4556  return true;
4557 
4558  if (ID == ".end_amdhsa_kernel")
4559  break;
4560 
4561  if (Seen.find(ID) != Seen.end())
4562  return TokError(".amdhsa_ directives cannot be repeated");
4563  Seen.insert(ID);
4564 
4565  SMLoc ValStart = getLoc();
4566  int64_t IVal;
4567  if (getParser().parseAbsoluteExpression(IVal))
4568  return true;
4569  SMLoc ValEnd = getLoc();
4570  SMRange ValRange = SMRange(ValStart, ValEnd);
4571 
4572  if (IVal < 0)
4573  return OutOfRangeError(ValRange);
4574 
4575  uint64_t Val = IVal;
4576 
4577 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4578  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4579  return OutOfRangeError(RANGE); \
4580  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4581 
4582  if (ID == ".amdhsa_group_segment_fixed_size") {
4583  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4584  return OutOfRangeError(ValRange);
4585  KD.group_segment_fixed_size = Val;
4586  } else if (ID == ".amdhsa_private_segment_fixed_size") {
4587  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4588  return OutOfRangeError(ValRange);
4589  KD.private_segment_fixed_size = Val;
4590  } else if (ID == ".amdhsa_kernarg_size") {
4591  if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4592  return OutOfRangeError(ValRange);
4593  KD.kernarg_size = Val;
4594  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4596  return Error(IDRange.Start,
4597  "directive is not supported with architected flat scratch",
4598  IDRange);
4600  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4601  Val, ValRange);
4602  if (Val)
4603  UserSGPRCount += 4;
4604  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4606  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4607  ValRange);
4608  if (Val)
4609  UserSGPRCount += 2;
4610  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4612  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4613  ValRange);
4614  if (Val)
4615  UserSGPRCount += 2;
4616  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4618  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4619  Val, ValRange);
4620  if (Val)
4621  UserSGPRCount += 2;
4622  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4624  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4625  ValRange);
4626  if (Val)
4627  UserSGPRCount += 2;
4628  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4630  return Error(IDRange.Start,
4631  "directive is not supported with architected flat scratch",
4632  IDRange);
4634  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4635  ValRange);
4636  if (Val)
4637  UserSGPRCount += 2;
4638  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4640  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4641  Val, ValRange);
4642  if (Val)
4643  UserSGPRCount += 1;
4644  } else if (ID == ".amdhsa_wavefront_size32") {
4645  if (IVersion.Major < 10)
4646  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4647  EnableWavefrontSize32 = Val;
4649  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4650  Val, ValRange);
4651  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4653  return Error(IDRange.Start,
4654  "directive is not supported with architected flat scratch",
4655  IDRange);
4657  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4658  } else if (ID == ".amdhsa_enable_private_segment") {
4660  return Error(
4661  IDRange.Start,
4662  "directive is not supported without architected flat scratch",
4663  IDRange);
4665  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4666  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4668  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4669  ValRange);
4670  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4672  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4673  ValRange);
4674  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4676  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4677  ValRange);
4678  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4680  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4681  ValRange);
4682  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4684  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4685  ValRange);
4686  } else if (ID == ".amdhsa_next_free_vgpr") {
4687  VGPRRange = ValRange;
4688  NextFreeVGPR = Val;
4689  } else if (ID == ".amdhsa_next_free_sgpr") {
4690  SGPRRange = ValRange;
4691  NextFreeSGPR = Val;
4692  } else if (ID == ".amdhsa_accum_offset") {
4693  if (!isGFX90A())
4694  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4695  AccumOffset = Val;
4696  } else if (ID == ".amdhsa_reserve_vcc") {
4697  if (!isUInt<1>(Val))
4698  return OutOfRangeError(ValRange);
4699  ReserveVCC = Val;
4700  } else if (ID == ".amdhsa_reserve_flat_scratch") {
4701  if (IVersion.Major < 7)
4702  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4704  return Error(IDRange.Start,
4705  "directive is not supported with architected flat scratch",
4706  IDRange);
4707  if (!isUInt<1>(Val))
4708  return OutOfRangeError(ValRange);
4709  ReserveFlatScr = Val;
4710  } else if (ID == ".amdhsa_reserve_xnack_mask") {
4711  if (IVersion.Major < 8)
4712  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4713  if (!isUInt<1>(Val))
4714  return OutOfRangeError(ValRange);
4715  if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4716  return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4717  IDRange);
4718  } else if (ID == ".amdhsa_float_round_mode_32") {
4720  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4721  } else if (ID == ".amdhsa_float_round_mode_16_64") {
4723  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4724  } else if (ID == ".amdhsa_float_denorm_mode_32") {
4726  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4727  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4729  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4730  ValRange);
4731  } else if (ID == ".amdhsa_dx10_clamp") {
4733  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4734  } else if (ID == ".amdhsa_ieee_mode") {
4735  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4736  Val, ValRange);
4737  } else if (ID == ".amdhsa_fp16_overflow") {
4738  if (IVersion.Major < 9)
4739  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4740  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4741  ValRange);
4742  } else if (ID == ".amdhsa_tg_split") {
4743  if (!isGFX90A())
4744  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4745  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4746  ValRange);
4747  } else if (ID == ".amdhsa_workgroup_processor_mode") {
4748  if (IVersion.Major < 10)
4749  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4750  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4751  ValRange);
4752  } else if (ID == ".amdhsa_memory_ordered") {
4753  if (IVersion.Major < 10)
4754  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4755  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4756  ValRange);
4757  } else if (ID == ".amdhsa_forward_progress") {
4758  if (IVersion.Major < 10)
4759  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4760  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4761  ValRange);
4762  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4764  KD.compute_pgm_rsrc2,
4765  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4766  ValRange);
4767  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4769  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4770  Val, ValRange);
4771  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4773  KD.compute_pgm_rsrc2,
4774  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4775  ValRange);
4776  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4778  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4779  Val, ValRange);
4780  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4782  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4783  Val, ValRange);
4784  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4786  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4787  Val, ValRange);
4788  } else if (ID == ".amdhsa_exception_int_div_zero") {
4790  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4791  Val, ValRange);
4792  } else {
4793  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4794  }
4795 
4796 #undef PARSE_BITS_ENTRY
4797  }
4798 
4799  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4800  return TokError(".amdhsa_next_free_vgpr directive is required");
4801 
4802  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4803  return TokError(".amdhsa_next_free_sgpr directive is required");
4804 
4805  unsigned VGPRBlocks;
4806  unsigned SGPRBlocks;
4807  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4808  getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4809  EnableWavefrontSize32, NextFreeVGPR,
4810  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4811  SGPRBlocks))
4812  return true;
4813 
4814  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4815  VGPRBlocks))
4816  return OutOfRangeError(VGPRRange);
4818  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4819 
4820  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4821  SGPRBlocks))
4822  return OutOfRangeError(SGPRRange);
4824  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4825  SGPRBlocks);
4826 
4827  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4828  return TokError("too many user SGPRs enabled");
4829  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4830  UserSGPRCount);
4831 
4832  if (isGFX90A()) {
4833  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4834  return TokError(".amdhsa_accum_offset directive is required");
4835  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4836  return TokError("accum_offset should be in range [4..256] in "
4837  "increments of 4");
4838  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4839  return TokError("accum_offset exceeds total VGPR allocation");
4840  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4841  (AccumOffset / 4 - 1));
4842  }
4843 
4844  getTargetStreamer().EmitAmdhsaKernelDescriptor(
4845  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4846  ReserveFlatScr);
4847  return false;
4848 }
4849 
4850 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4851  uint32_t Major;
4852  uint32_t Minor;
4853 
4854  if (ParseDirectiveMajorMinor(Major, Minor))
4855  return true;
4856 
4857  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4858  return false;
4859 }
4860 
4861 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4862  uint32_t Major;
4863  uint32_t Minor;
4864  uint32_t Stepping;
4865  StringRef VendorName;
4866  StringRef ArchName;
4867 
4868  // If this directive has no arguments, then use the ISA version for the
4869  // targeted GPU.
4870  if (isToken(AsmToken::EndOfStatement)) {
4872  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4873  ISA.Stepping,
4874  "AMD", "AMDGPU");
4875  return false;
4876  }
4877 
4878  if (ParseDirectiveMajorMinor(Major, Minor))
4879  return true;
4880 
4881  if (!trySkipToken(AsmToken::Comma))
4882  return TokError("stepping version number required, comma expected");
4883 
4884  if (ParseAsAbsoluteExpression(Stepping))
4885  return TokError("invalid stepping version");
4886 
4887  if (!trySkipToken(AsmToken::Comma))
4888  return TokError("vendor name required, comma expected");
4889 
4890  if (!parseString(VendorName, "invalid vendor name"))
4891  return true;
4892 
4893  if (!trySkipToken(AsmToken::Comma))
4894  return TokError("arch name required, comma expected");
4895 
4896  if (!parseString(ArchName, "invalid arch name"))
4897  return true;
4898 
4899  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4900  VendorName, ArchName);
4901  return false;
4902 }
4903 
4904 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4905  amd_kernel_code_t &Header) {
4906  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4907  // assembly for backwards compatibility.
4908  if (ID == "max_scratch_backing_memory_byte_size") {
4909  Parser.eatToEndOfStatement();
4910  return false;
4911  }
4912 
4913  SmallString<40> ErrStr;
4914  raw_svector_ostream Err(ErrStr);
4915  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4916  return TokError(Err.str());
4917  }
4918  Lex();
4919 
4920  if (ID == "enable_wavefront_size32") {
4922  if (!isGFX10Plus())
4923  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4924  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4925  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4926  } else {
4927  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4928  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4929  }
4930  }
4931 
4932  if (ID == "wavefront_size") {
4933  if (Header.wavefront_size == 5) {
4934  if (!isGFX10Plus())
4935  return TokError("wavefront_size=5 is only allowed on GFX10+");
4936  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4937  return TokError("wavefront_size=5 requires +WavefrontSize32");
4938  } else if (Header.wavefront_size == 6) {
4939  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4940  return TokError("wavefront_size=6 requires +WavefrontSize64");
4941  }
4942  }
4943 
4944  if (ID == "enable_wgp_mode") {
4946  !isGFX10Plus())
4947  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4948  }
4949 
4950  if (ID == "enable_mem_ordered") {
4952  !isGFX10Plus())
4953  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4954  }
4955 
4956  if (ID == "enable_fwd_progress") {
4958  !isGFX10Plus())
4959  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4960  }
4961 
4962  return false;
4963 }
4964 
4965 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4966  amd_kernel_code_t Header;
4967  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4968 
4969  while (true) {
4970  // Lex EndOfStatement. This is in a while loop, because lexing a comment
4971  // will set the current token to EndOfStatement.
4972  while(trySkipToken(AsmToken::EndOfStatement));
4973 
4974  StringRef ID;
4975  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4976  return true;
4977 
4978  if (ID == ".end_amd_kernel_code_t")
4979  break;
4980 
4981  if (ParseAMDKernelCodeTValue(ID, Header))
4982  return true;
4983  }
4984 
4985  getTargetStreamer().EmitAMDKernelCodeT(Header);
4986 
4987  return false;
4988 }
4989 
4990 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4991  StringRef KernelName;
4992  if (!parseId(KernelName, "expected symbol name"))
4993  return true;
4994 
4995  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4997 
4998  KernelScope.initialize(getContext());
4999  return false;
5000 }
5001 
5002 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5003  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5004  return Error(getLoc(),
5005  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5006  "architectures");
5007  }
5008 
5009  auto TargetIDDirective = getLexer().getTok().getStringContents();
5010  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5011  return Error(getParser().getTok().getLoc(), "target id must match options");
5012 
5013  getTargetStreamer().EmitISAVersion();
5014  Lex();
5015 
5016  return false;
5017 }
5018 
5019 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5020  const char *AssemblerDirectiveBegin;
5021  const char *AssemblerDirectiveEnd;
5023  isHsaAbiVersion3Or4(&getSTI())
5024  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5026  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5027  HSAMD::AssemblerDirectiveEnd);
5028 
5029  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5030  return Error(getLoc(),
5031  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5032  "not available on non-amdhsa OSes")).str());
5033  }
5034 
5035  std::string HSAMetadataString;
5036  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5037  HSAMetadataString))
5038  return true;
5039 
5040  if (isHsaAbiVersion3Or4(&getSTI())) {
5041  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5042  return Error(getLoc(), "invalid HSA metadata");
5043  } else {
5044  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5045  return Error(getLoc(), "invalid HSA metadata");
5046  }
5047 
5048  return false;
5049 }
5050 
5051 /// Common code to parse out a block of text (typically YAML) between start and
5052 /// end directives.
5053 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5054  const char *AssemblerDirectiveEnd,
5055  std::string &CollectString) {
5056 
5057  raw_string_ostream CollectStream(CollectString);
5058 
5059  getLexer().setSkipSpace(false);
5060 
5061  bool FoundEnd = false;
5062  while (!isToken(AsmToken::Eof)) {
5063  while (isToken(AsmToken::Space)) {
5064  CollectStream << getTokenStr();
5065  Lex();
5066  }
5067 
5068  if (trySkipId(AssemblerDirectiveEnd)) {
5069  FoundEnd = true;
5070  break;
5071  }
5072 
5073  CollectStream << Parser.parseStringToEndOfStatement()
5074  << getContext().getAsmInfo()->getSeparatorString();
5075 
5076  Parser.eatToEndOfStatement();
5077  }
5078 
5079  getLexer().setSkipSpace(true);
5080 
5081  if (isToken(AsmToken::Eof) && !FoundEnd) {
5082  return TokError(Twine("expected directive ") +
5083  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5084  }
5085 
5086  CollectStream.flush();
5087  return false;
5088 }
5089 
5090 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5091 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5092  std::string String;
5093  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5095  return true;
5096 
5097  auto PALMetadata = getTargetStreamer().getPALMetadata();
5098  if (!PALMetadata->setFromString(String))
5099  return Error(getLoc(), "invalid PAL metadata");
5100  return false;
5101 }
5102 
5103 /// Parse the assembler directive for old linear-format PAL metadata.
5104 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5105  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5106  return Error(getLoc(),
5107  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5108  "not available on non-amdpal OSes")).str());
5109  }
5110 
5111  auto PALMetadata = getTargetStreamer().getPALMetadata();
5112  PALMetadata->setLegacy();
5113  for (;;) {
5114  uint32_t Key, Value;
5115  if (ParseAsAbsoluteExpression(Key)) {
5116  return TokError(Twine("invalid value in ") +
5118  }
5119  if (!trySkipToken(AsmToken::Comma)) {
5120  return TokError(Twine("expected an even number of values in ") +
5122  }
5123  if (ParseAsAbsoluteExpression(Value)) {
5124  return TokError(Twine("invalid value in ") +
5126  }
5127  PALMetadata->setRegister(Key, Value);
5128  if (!trySkipToken(AsmToken::Comma))
5129  break;
5130  }
5131  return false;
5132 }
5133 
5134 /// ParseDirectiveAMDGPULDS
5135 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5136 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5137  if (getParser().checkForValidSection())
5138  return true;
5139 
5140  StringRef Name;
5141  SMLoc NameLoc = getLoc();
5142  if (getParser().parseIdentifier(Name))
5143  return TokError("expected identifier in directive");
5144 
5145  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5146  if (parseToken(AsmToken::Comma, "expected ','"))
5147  return true;
5148 
5149  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5150 
5151  int64_t Size;
5152  SMLoc SizeLoc = getLoc();
5153  if (getParser().parseAbsoluteExpression(Size))
5154  return true;
5155  if (Size < 0)
5156  return Error(SizeLoc, "size must be non-negative");
5157  if (Size > LocalMemorySize)
5158  return Error(SizeLoc, "size is too large");
5159 
5160  int64_t Alignment = 4;
5161  if (trySkipToken(AsmToken::Comma)) {
5162  SMLoc AlignLoc = getLoc();
5163  if (getParser().parseAbsoluteExpression(Alignment))
5164  return true;
5165  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5166  return Error(AlignLoc, "alignment must be a power of two");
5167 
5168  // Alignment larger than the size of LDS is possible in theory, as long
5169  // as the linker manages to place to symbol at address 0, but we do want
5170  // to make sure the alignment fits nicely into a 32-bit integer.
5171  if (Alignment >= 1u << 31)
5172  return Error(AlignLoc, "alignment is too large");
5173  }
5174 
5175  if (parseToken(AsmToken::EndOfStatement,
5176  "unexpected token in '.amdgpu_lds' directive"))
5177  return true;
5178 
5179  Symbol->redefineIfPossible();
5180  if (!Symbol->isUndefined())
5181  return Error(NameLoc, "invalid symbol redefinition");
5182 
5183  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5184  return false;
5185 }
5186 
5187 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5188  StringRef IDVal = DirectiveID.getString();
5189 
5190  if (isHsaAbiVersion3Or4(&getSTI())) {
5191  if (IDVal == ".amdhsa_kernel")
5192  return ParseDirectiveAMDHSAKernel();
5193 
5194  // TODO: Restructure/combine with PAL metadata directive.
5196  return ParseDirectiveHSAMetadata();
5197  } else {
5198  if (IDVal == ".hsa_code_object_version")
5199  return ParseDirectiveHSACodeObjectVersion();
5200 
5201  if (IDVal == ".hsa_code_object_isa")
5202  return ParseDirectiveHSACodeObjectISA();
5203 
5204  if (IDVal == ".amd_kernel_code_t")
5205  return ParseDirectiveAMDKernelCodeT();
5206 
5207  if (IDVal == ".amdgpu_hsa_kernel")
5208  return ParseDirectiveAMDGPUHsaKernel();
5209 
5210  if (IDVal == ".amd_amdgpu_isa")
5211  return ParseDirectiveISAVersion();
5212 
5214  return ParseDirectiveHSAMetadata();
5215  }
5216 
5217  if (IDVal == ".amdgcn_target")
5218  return ParseDirectiveAMDGCNTarget();
5219 
5220  if (IDVal == ".amdgpu_lds")
5221  return ParseDirectiveAMDGPULDS();
5222 
5223  if (IDVal == PALMD::AssemblerDirectiveBegin)
5224  return ParseDirectivePALMetadataBegin();
5225 
5226  if (IDVal == PALMD::AssemblerDirective)
5227  return ParseDirectivePALMetadata();
5228 
5229  return true;
5230 }
5231 
5232 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5233  unsigned RegNo) {
5234 
5235  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5236  R.isValid(); ++R) {
5237  if (*R == RegNo)
5238  return isGFX9Plus();
5239  }
5240 
5241  // GFX10 has 2 more SGPRs 104 and 105.
5242  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5243  R.isValid(); ++R) {
5244  if (*R == RegNo)
5245  return hasSGPR104_SGPR105();
5246  }
5247 
5248  switch (RegNo) {
5249  case AMDGPU::SRC_SHARED_BASE:
5250  case AMDGPU::SRC_SHARED_LIMIT:
5251  case AMDGPU::SRC_PRIVATE_BASE:
5252  case AMDGPU::SRC_PRIVATE_LIMIT:
5253  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5254  return isGFX9Plus();
5255  case AMDGPU::TBA:
5256  case AMDGPU::TBA_LO:
5257  case AMDGPU::TBA_HI:
5258  case AMDGPU::TMA:
5259  case AMDGPU::TMA_LO:
5260  case AMDGPU::TMA_HI:
5261  return !isGFX9Plus();
5262  case AMDGPU::XNACK_MASK:
5263  case AMDGPU::XNACK_MASK_LO:
5264  case AMDGPU::XNACK_MASK_HI:
5265  return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5266  case AMDGPU::SGPR_NULL:
5267  return isGFX10Plus();
5268  default:
5269  break;
5270  }
5271 
5272  if (isCI())
5273  return true;
5274 
5275  if (isSI() || isGFX10Plus()) {
5276  // No flat_scr on SI.
5277  // On GFX10 flat scratch is not a valid register operand and can only be
5278  // accessed with s_setreg/s_getreg.
5279  switch (RegNo) {
5280  case AMDGPU::FLAT_SCR:
5281  case AMDGPU::FLAT_SCR_LO:
5282  case AMDGPU::FLAT_SCR_HI:
5283  return false;
5284  default:
5285  return true;
5286  }
5287  }
5288 
5289  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5290  // SI/CI have.
5291  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5292  R.isValid(); ++R) {
5293  if (*R == RegNo)
5294  return hasSGPR102_SGPR103();
5295  }
5296 
5297  return true;
5298 }
5299 
5301 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5302  OperandMode Mode) {
5303  // Try to parse with a custom parser
5304  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5305 
5306  // If we successfully parsed the operand or if there as an error parsing,
5307  // we are done.
5308  //
5309  // If we are parsing after we reach EndOfStatement then this means we
5310  // are appending default values to the Operands list. This is only done
5311  // by custom parser, so we shouldn't continue on to the generic parsing.
5312  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5313  isToken(AsmToken::EndOfStatement))
5314  return ResTy;
5315 
5316  SMLoc RBraceLoc;
5317  SMLoc LBraceLoc = getLoc();
5318  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5319  unsigned Prefix = Operands.size();
5320 
5321  for (;;) {
5322  auto Loc = getLoc();
5323  ResTy = parseReg(Operands);
5324  if (ResTy == MatchOperand_NoMatch)
5325  Error(Loc, "expected a register");
5326  if (ResTy != MatchOperand_Success)
5327  return MatchOperand_ParseFail;
5328 
5329  RBraceLoc = getLoc();
5330  if (trySkipToken(AsmToken::RBrac))
5331  break;
5332 
5333  if (!skipToken(AsmToken::Comma,
5334  "expected a comma or a closing square bracket")) {
5335  return MatchOperand_ParseFail;
5336  }
5337  }
5338 
5339  if (Operands.size() - Prefix > 1) {
5340  Operands.insert(Operands.begin() + Prefix,
5341  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5342  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5343  }
5344 
5345  return MatchOperand_Success;
5346  }
5347 
5348  return parseRegOrImm(Operands);
5349 }
5350 
5351 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5352  // Clear any forced encodings from the previous instruction.
5353  setForcedEncodingSize(0);
5354  setForcedDPP(false);
5355  setForcedSDWA(false);
5356 
5357  if (Name.endswith("_e64")) {
5358  setForcedEncodingSize(64);
5359  return Name.substr(0, Name.size() - 4);
5360  } else if (Name.endswith("_e32")) {
5361  setForcedEncodingSize(32);
5362  return Name.substr(0, Name.size() - 4);
5363  } else if (Name.endswith("_dpp")) {
5364  setForcedDPP(true);
5365  return Name.substr(0, Name.size() - 4);
5366  } else if (Name.endswith("_sdwa")) {
5367  setForcedSDWA(true);
5368  return Name.substr(0, Name.size() - 5);
5369  }
5370  return Name;
5371 }
5372 
5373 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5374  StringRef Name,
5375  SMLoc NameLoc, OperandVector &Operands) {
5376  // Add the instruction mnemonic
5377  Name = parseMnemonicSuffix(Name);
5378  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5379 
5380  bool IsMIMG = Name.startswith("image_");
5381 
5382  while (!trySkipToken(AsmToken::EndOfStatement)) {
5383  OperandMode Mode = OperandMode_Default;
5384  if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5385  Mode = OperandMode_NSA;
5386  CPolSeen = 0;
5387  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5388 
5389  if (Res != MatchOperand_Success) {
5390  checkUnsupportedInstruction(Name, NameLoc);
5391  if (!Parser.hasPendingError()) {
5392  // FIXME: use real operand location rather than the current location.
5393  StringRef Msg =
5394  (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5395  "not a valid operand.";
5396  Error(getLoc(), Msg);
5397  }
5398  while (!trySkipToken(AsmToken::EndOfStatement)) {
5399  lex();
5400  }
5401  return true;
5402  }
5403 
5404  // Eat the comma or space if there is one.
5405  trySkipToken(AsmToken::Comma);
5406  }
5407 
5408  return false;
5409 }
5410 
5411 //===----------------------------------------------------------------------===//
5412 // Utility functions
5413 //===----------------------------------------------------------------------===//
5414 
5416 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5417 
5418  if (!trySkipId(Prefix, AsmToken::Colon))
5419  return MatchOperand_NoMatch;
5420 
5421  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5422 }
5423 
5425 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5426  AMDGPUOperand::ImmTy ImmTy,
5427  bool (*ConvertResult)(int64_t&)) {
5428  SMLoc S = getLoc();
5429  int64_t Value = 0;
5430 
5431  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5432  if (Res != MatchOperand_Success)
5433  return Res;
5434 
5435  if (ConvertResult && !ConvertResult(Value)) {
5436  Error(S, "invalid " + StringRef(Prefix) + " value.");
5437  }
5438 
5439  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5440  return MatchOperand_Success;
5441 }
5442 
5444 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5446  AMDGPUOperand::ImmTy ImmTy,
5447  bool (*ConvertResult)(int64_t&)) {
5448  SMLoc S = getLoc();
5449  if (!trySkipId(Prefix, AsmToken::Colon))
5450  return MatchOperand_NoMatch;
5451 
5452  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5453  return MatchOperand_ParseFail;
5454 
5455  unsigned Val = 0;
5456  const unsigned MaxSize = 4;
5457 
5458  // FIXME: How to verify the number of elements matches the number of src
5459  // operands?
5460  for (int I = 0; ; ++I) {
5461  int64_t Op;
5462  SMLoc Loc = getLoc();
5463  if (!parseExpr(Op))
5464  return MatchOperand_ParseFail;
5465 
5466  if (Op != 0 && Op != 1) {
5467  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5468  return MatchOperand_ParseFail;
5469  }
5470 
5471  Val |= (Op << I);
5472 
5473  if (trySkipToken(AsmToken::RBrac))
5474  break;
5475 
5476  if (I + 1 == MaxSize) {
5477  Error(getLoc(), "expected a closing square bracket");
5478  return MatchOperand_ParseFail;
5479  }
5480 
5481  if (!skipToken(AsmToken::Comma, "expected a comma"))
5482  return MatchOperand_ParseFail;
5483  }
5484 
5485  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5486  return MatchOperand_Success;
5487 }
5488 
5490 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5491  AMDGPUOperand::ImmTy ImmTy) {
5492  int64_t Bit;
5493  SMLoc S = getLoc();
5494 
5495  if (trySkipId(Name)) {
5496  Bit = 1;
5497  } else if (trySkipId("no", Name)) {
5498  Bit = 0;
5499  } else {
5500  return MatchOperand_NoMatch;
5501  }
5502 
5503  if (Name == "r128" && !hasMIMG_R128()) {
5504  Error(S, "r128 modifier is not supported on this GPU");
5505  return MatchOperand_ParseFail;
5506  }
5507  if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5508  Error(S, "a16 modifier is not supported on this GPU");
5509  return MatchOperand_ParseFail;
5510  }
5511 
5512  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5513  ImmTy = AMDGPUOperand::ImmTyR128A16;
5514 
5515  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5516  return MatchOperand_Success;
5517 }
5518 
5520 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5521  unsigned CPolOn = 0;
5522  unsigned CPolOff = 0;
5523  SMLoc S = getLoc();
5524 
5525  if (trySkipId("glc"))
5526  CPolOn = AMDGPU::CPol::GLC;
5527  else if (trySkipId("noglc"))
5528  CPolOff = AMDGPU::CPol::GLC;
5529  else if (trySkipId("slc"))
5530  CPolOn = AMDGPU::CPol::SLC;
5531  else if (trySkipId("noslc"))
5532  CPolOff = AMDGPU::CPol::SLC;
5533  else if (trySkipId("dlc"))
5534  CPolOn = AMDGPU::CPol::DLC;
5535  else if (trySkipId("nodlc"))
5536  CPolOff = AMDGPU::CPol::DLC;
5537  else if (trySkipId("scc"))
5538  CPolOn = AMDGPU::CPol::SCC;
5539  else if (trySkipId("noscc"))
5540  CPolOff = AMDGPU::CPol::SCC;
5541  else
5542  return MatchOperand_NoMatch;
5543 
5544  if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5545  Error(S, "dlc modifier is not supported on this GPU");
5546  return MatchOperand_ParseFail;
5547  }
5548 
5549  if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5550  Error(S, "scc modifier is not supported on this GPU");
5551  return MatchOperand_ParseFail;
5552  }
5553 
5554  if (CPolSeen & (CPolOn | CPolOff)) {
5555  Error(S, "duplicate cache policy modifier");
5556  return MatchOperand_ParseFail;
5557  }
5558 
5559  CPolSeen |= (CPolOn | CPolOff);
5560 
5561  for (unsigned I = 1; I != Operands.size(); ++I) {
5562  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5563  if (Op.isCPol()) {
5564  Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5565  return MatchOperand_Success;
5566  }
5567  }
5568 
5569  Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5570  AMDGPUOperand::ImmTyCPol));
5571 
5572  return MatchOperand_Success;
5573 }
5574 
5576  MCInst& Inst, const OperandVector& Operands,
5577  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5578  AMDGPUOperand::ImmTy ImmT,
5579  int64_t Default = 0) {
5580  auto i = OptionalIdx.find(ImmT);
5581  if (i != OptionalIdx.end()) {
5582  unsigned Idx = i->second;
5583  ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5584  } else {
5585  Inst.addOperand(MCOperand::createImm(Default));
5586  }
5587 }
5588 
5590 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5591  StringRef &Value,
5592  SMLoc &StringLoc) {
5593  if (!trySkipId(Prefix, AsmToken::Colon))
5594  return MatchOperand_NoMatch;
5595 
5596  StringLoc = getLoc();
5597  return parseId(Value, "expected an identifier") ? MatchOperand_Success
5599 }
5600 
5601 //===----------------------------------------------------------------------===//
5602 // MTBUF format
5603 //===----------------------------------------------------------------------===//
5604 
5605 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5606  int64_t MaxVal,
5607  int64_t &Fmt) {
5608  int64_t Val;
5609  SMLoc Loc = getLoc();
5610 
5611  auto Res = parseIntWithPrefix(Pref, Val);
5612  if (Res == MatchOperand_ParseFail)
5613  return false;
5614  if (Res == MatchOperand_NoMatch)
5615  return true;
5616 
5617  if (Val < 0 || Val > MaxVal) {
5618  Error(Loc, Twine("out of range ", StringRef(Pref)));
5619  return false;
5620  }
5621 
5622  Fmt = Val;
5623  return true;
5624 }
5625 
5626 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5627 // values to live in a joint format operand in the MCInst encoding.
5629 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5630  using namespace llvm::AMDGPU::MTBUFFormat;
5631 
5632  int64_t Dfmt = DFMT_UNDEF;
5633  int64_t Nfmt = NFMT_UNDEF;
5634 
5635  // dfmt and nfmt can appear in either order, and each is optional.
5636  for (int I = 0; I < 2; ++I) {
5637  if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5638  return MatchOperand_ParseFail;
5639 
5640  if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5641  return MatchOperand_ParseFail;
5642  }
5643  // Skip optional comma between dfmt/nfmt
5644  // but guard against 2 commas following each other.
5645  if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5646  !peekToken().is(AsmToken::Comma)) {
5647  trySkipToken(AsmToken::Comma);
5648  }
5649  }
5650 
5651  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5652  return MatchOperand_NoMatch;
5653 
5654  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5655  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5656 
5657  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5658  return MatchOperand_Success;
5659 }
5660 
5662 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5663  using namespace llvm::AMDGPU::MTBUFFormat;
5664 
5665  int64_t Fmt = UFMT_UNDEF;
5666 
5667  if (!tryParseFmt("format", UFMT_MAX, Fmt))
5668  return MatchOperand_ParseFail;
5669 
5670  if (Fmt == UFMT_UNDEF)
5671  return MatchOperand_NoMatch;
5672 
5673  Format = Fmt;
5674  return MatchOperand_Success;
5675 }
5676 
5677 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5678  int64_t &Nfmt,
5679  StringRef FormatStr,
5680  SMLoc Loc) {
5681  using namespace llvm::AMDGPU::MTBUFFormat;
5682  int64_t Format;
5683 
5684  Format = getDfmt(FormatStr);
5685  if (Format != DFMT_UNDEF) {
5686  Dfmt = Format;
5687  return true;
5688  }
5689 
5690  Format = getNfmt(FormatStr, getSTI());
5691  if (Format != NFMT_UNDEF) {
5692  Nfmt = Format;
5693  return true;
5694  }
5695 
5696  Error(Loc, "unsupported format");
5697  return false;
5698 }
5699 
5701 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5702  SMLoc FormatLoc,
5703  int64_t &Format) {
5704  using namespace llvm::AMDGPU::MTBUFFormat;
5705 
5706  int64_t Dfmt = DFMT_UNDEF;
5707  int64_t Nfmt = NFMT_UNDEF;
5708  if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5709  return MatchOperand_ParseFail;
5710 
5711  if (trySkipToken(AsmToken::Comma)) {
5712  StringRef Str;
5713  SMLoc Loc = getLoc();
5714  if (!parseId(Str, "expected a format string") ||
5715  !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5716  return MatchOperand_ParseFail;
5717  }
5718  if (Dfmt == DFMT_UNDEF) {
5719  Error(Loc, "duplicate numeric format");
5720  return MatchOperand_ParseFail;
5721  } else if (Nfmt == NFMT_UNDEF) {
5722  Error(Loc, "duplicate data format");
5723  return MatchOperand_ParseFail;
5724  }
5725  }
5726 
5727  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5728  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5729 
5730  if (isGFX10Plus()) {
5731  auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5732  if (Ufmt == UFMT_UNDEF) {
5733  Error(FormatLoc, "unsupported format");
5734  return MatchOperand_ParseFail;
5735  }
5736  Format = Ufmt;
5737  } else {
5738  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5739  }
5740 
5741  return MatchOperand_Success;
5742 }
5743 
5745 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5746  SMLoc Loc,
5747  int64_t &Format) {
5748  using namespace llvm::AMDGPU::MTBUFFormat;
5749 
5750  auto Id = getUnifiedFormat(FormatStr);
5751  if (Id == UFMT_UNDEF)
5752  return MatchOperand_NoMatch;
5753 
5754  if (!isGFX10Plus()) {
5755  Error(Loc, "unified format is not supported on this GPU");
5756  return MatchOperand_ParseFail;
5757  }
5758 
5759  Format = Id;
5760  return MatchOperand_Success;
5761 }
5762 
5764 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5765  using namespace llvm::AMDGPU::MTBUFFormat;
5766  SMLoc Loc = getLoc();
5767 
5768  if (!parseExpr(Format))
5769  return MatchOperand_ParseFail;
5770  if (!isValidFormatEncoding(Format, getSTI())) {
5771  Error(Loc, "out of range format");
5772  return MatchOperand_ParseFail;
5773  }
5774 
5775  return MatchOperand_Success;
5776 }
5777 
5779 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5780  using namespace llvm::AMDGPU::MTBUFFormat;
5781 
5782  if (!trySkipId("format", AsmToken::Colon))
5783  return MatchOperand_NoMatch;
5784 
5785  if (trySkipToken(AsmToken::LBrac)) {
5786  StringRef FormatStr;
5787  SMLoc Loc = getLoc();
5788  if (!parseId(FormatStr, "expected a format string"))
5789  return MatchOperand_ParseFail;
5790 
5791  auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5792  if (Res == MatchOperand_NoMatch)
5793  Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5794  if (Res != MatchOperand_Success)
5795  return Res;
5796 
5797  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5798  return MatchOperand_ParseFail;
5799 
5800  return MatchOperand_Success;
5801  }
5802 
5803  return parseNumericFormat(Format);
5804 }
5805 
5807 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5808  using namespace llvm::AMDGPU::MTBUFFormat;
5809 
5810  int64_t Format = getDefaultFormatEncoding(getSTI());
5812  SMLoc Loc = getLoc();
5813 
5814  // Parse legacy format syntax.
5815  Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5816  if (Res == MatchOperand_ParseFail)
5817  return Res;
5818 
5819  bool FormatFound = (Res == MatchOperand_Success);
5820 
5821  Operands.push_back(
5822  AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5823 
5824  if (FormatFound)
5825  trySkipToken(AsmToken::Comma);
5826 
5827  if (isToken(AsmToken::EndOfStatement)) {
5828  // We are expecting an soffset operand,
5829  // but let matcher handle the error.
5830  return MatchOperand_Success;
5831  }
5832 
5833  // Parse soffset.
5834  Res = parseRegOrImm(Operands);
5835  if (Res != MatchOperand_Success)
5836  return Res;
5837 
5838  trySkipToken(AsmToken::Comma);
5839 
5840  if (!FormatFound) {
5841  Res = parseSymbolicOrNumericFormat(Format);
5842  if (Res == MatchOperand_ParseFail)
5843  return Res;
5844  if (Res == MatchOperand_Success) {
5845  auto Size = Operands.size();
5846  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5847  assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5848  Op.setImm(Format);
5849  }
5850  return MatchOperand_Success;
5851  }
5852 
5853  if (isId("format") && peekToken().is(AsmToken::Colon)) {
5854  Error(getLoc(), "duplicate format");
5855  return MatchOperand_ParseFail;
5856  }
5857  return MatchOperand_Success;
5858 }
5859 
5860 //===----------------------------------------------------------------------===//
5861 // ds
5862 //===----------------------------------------------------------------------===//
5863 
5864 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5865  const OperandVector &Operands) {
5866  OptionalImmIndexMap OptionalIdx;
5867 
5868  for (unsigned i = 1, e =