LLVM  13.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
30 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/Support/Casting.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53  enum KindTy {
54  Token,
55  Immediate,
56  Register,
58  } Kind;
59 
60  SMLoc StartLoc, EndLoc;
61  const AMDGPUAsmParser *AsmParser;
62 
63 public:
64  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65  : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67  using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69  struct Modifiers {
70  bool Abs = false;
71  bool Neg = false;
72  bool Sext = false;
73 
74  bool hasFPModifiers() const { return Abs || Neg; }
75  bool hasIntModifiers() const { return Sext; }
76  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78  int64_t getFPModifiersOperand() const {
79  int64_t Operand = 0;
80  Operand |= Abs ? SISrcMods::ABS : 0u;
81  Operand |= Neg ? SISrcMods::NEG : 0u;
82  return Operand;
83  }
84 
85  int64_t getIntModifiersOperand() const {
86  int64_t Operand = 0;
87  Operand |= Sext ? SISrcMods::SEXT : 0u;
88  return Operand;
89  }
90 
91  int64_t getModifiersOperand() const {
92  assert(!(hasFPModifiers() && hasIntModifiers())
93  && "fp and int modifiers should not be used simultaneously");
94  if (hasFPModifiers()) {
95  return getFPModifiersOperand();
96  } else if (hasIntModifiers()) {
97  return getIntModifiersOperand();
98  } else {
99  return 0;
100  }
101  }
102 
103  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104  };
105 
106  enum ImmTy {
107  ImmTyNone,
108  ImmTyGDS,
109  ImmTyLDS,
110  ImmTyOffen,
111  ImmTyIdxen,
112  ImmTyAddr64,
113  ImmTyOffset,
114  ImmTyInstOffset,
115  ImmTyOffset0,
116  ImmTyOffset1,
117  ImmTyCPol,
118  ImmTySWZ,
119  ImmTyTFE,
120  ImmTyD16,
121  ImmTyClampSI,
122  ImmTyOModSI,
123  ImmTyDPP8,
124  ImmTyDppCtrl,
125  ImmTyDppRowMask,
126  ImmTyDppBankMask,
127  ImmTyDppBoundCtrl,
128  ImmTyDppFi,
129  ImmTySdwaDstSel,
130  ImmTySdwaSrc0Sel,
131  ImmTySdwaSrc1Sel,
132  ImmTySdwaDstUnused,
133  ImmTyDMask,
134  ImmTyDim,
135  ImmTyUNorm,
136  ImmTyDA,
137  ImmTyR128A16,
138  ImmTyA16,
139  ImmTyLWE,
140  ImmTyExpTgt,
141  ImmTyExpCompr,
142  ImmTyExpVM,
143  ImmTyFORMAT,
144  ImmTyHwreg,
145  ImmTyOff,
146  ImmTySendMsg,
147  ImmTyInterpSlot,
148  ImmTyInterpAttr,
149  ImmTyAttrChan,
150  ImmTyOpSel,
151  ImmTyOpSelHi,
152  ImmTyNegLo,
153  ImmTyNegHi,
154  ImmTySwizzle,
155  ImmTyGprIdxMode,
156  ImmTyHigh,
157  ImmTyBLGP,
158  ImmTyCBSZ,
159  ImmTyABID,
160  ImmTyEndpgm,
161  };
162 
163  enum ImmKindTy {
164  ImmKindTyNone,
165  ImmKindTyLiteral,
166  ImmKindTyConst,
167  };
168 
169 private:
170  struct TokOp {
171  const char *Data;
172  unsigned Length;
173  };
174 
175  struct ImmOp {
176  int64_t Val;
177  ImmTy Type;
178  bool IsFPImm;
179  mutable ImmKindTy Kind;
180  Modifiers Mods;
181  };
182 
183  struct RegOp {
184  unsigned RegNo;
185  Modifiers Mods;
186  };
187 
188  union {
189  TokOp Tok;
190  ImmOp Imm;
191  RegOp Reg;
192  const MCExpr *Expr;
193  };
194 
195 public:
196  bool isToken() const override {
197  if (Kind == Token)
198  return true;
199 
200  // When parsing operands, we can't always tell if something was meant to be
201  // a token, like 'gds', or an expression that references a global variable.
202  // In this case, we assume the string is an expression, and if we need to
203  // interpret is a token, then we treat the symbol name as the token.
204  return isSymbolRefExpr();
205  }
206 
207  bool isSymbolRefExpr() const {
208  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209  }
210 
211  bool isImm() const override {
212  return Kind == Immediate;
213  }
214 
215  void setImmKindNone() const {
216  assert(isImm());
217  Imm.Kind = ImmKindTyNone;
218  }
219 
220  void setImmKindLiteral() const {
221  assert(isImm());
222  Imm.Kind = ImmKindTyLiteral;
223  }
224 
225  void setImmKindConst() const {
226  assert(isImm());
227  Imm.Kind = ImmKindTyConst;
228  }
229 
230  bool IsImmKindLiteral() const {
231  return isImm() && Imm.Kind == ImmKindTyLiteral;
232  }
233 
234  bool isImmKindConst() const {
235  return isImm() && Imm.Kind == ImmKindTyConst;
236  }
237 
238  bool isInlinableImm(MVT type) const;
239  bool isLiteralImm(MVT type) const;
240 
241  bool isRegKind() const {
242  return Kind == Register;
243  }
244 
245  bool isReg() const override {
246  return isRegKind() && !hasModifiers();
247  }
248 
249  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250  return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251  }
252 
253  bool isRegOrImmWithInt16InputMods() const {
254  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255  }
256 
257  bool isRegOrImmWithInt32InputMods() const {
258  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259  }
260 
261  bool isRegOrImmWithInt64InputMods() const {
262  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263  }
264 
265  bool isRegOrImmWithFP16InputMods() const {
266  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267  }
268 
269  bool isRegOrImmWithFP32InputMods() const {
270  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271  }
272 
273  bool isRegOrImmWithFP64InputMods() const {
274  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275  }
276 
277  bool isVReg() const {
278  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279  isRegClass(AMDGPU::VReg_64RegClassID) ||
280  isRegClass(AMDGPU::VReg_96RegClassID) ||
281  isRegClass(AMDGPU::VReg_128RegClassID) ||
282  isRegClass(AMDGPU::VReg_160RegClassID) ||
283  isRegClass(AMDGPU::VReg_192RegClassID) ||
284  isRegClass(AMDGPU::VReg_256RegClassID) ||
285  isRegClass(AMDGPU::VReg_512RegClassID) ||
286  isRegClass(AMDGPU::VReg_1024RegClassID);
287  }
288 
289  bool isVReg32() const {
290  return isRegClass(AMDGPU::VGPR_32RegClassID);
291  }
292 
293  bool isVReg32OrOff() const {
294  return isOff() || isVReg32();
295  }
296 
297  bool isNull() const {
298  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299  }
300 
301  bool isVRegWithInputMods() const;
302 
303  bool isSDWAOperand(MVT type) const;
304  bool isSDWAFP16Operand() const;
305  bool isSDWAFP32Operand() const;
306  bool isSDWAInt16Operand() const;
307  bool isSDWAInt32Operand() const;
308 
309  bool isImmTy(ImmTy ImmT) const {
310  return isImm() && Imm.Type == ImmT;
311  }
312 
313  bool isImmModifier() const {
314  return isImm() && Imm.Type != ImmTyNone;
315  }
316 
317  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319  bool isDMask() const { return isImmTy(ImmTyDMask); }
320  bool isDim() const { return isImmTy(ImmTyDim); }
321  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322  bool isDA() const { return isImmTy(ImmTyDA); }
323  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325  bool isLWE() const { return isImmTy(ImmTyLWE); }
326  bool isOff() const { return isImmTy(ImmTyOff); }
327  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330  bool isOffen() const { return isImmTy(ImmTyOffen); }
331  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338  bool isGDS() const { return isImmTy(ImmTyGDS); }
339  bool isLDS() const { return isImmTy(ImmTyLDS); }
340  bool isCPol() const { return isImmTy(ImmTyCPol); }
341  bool isSWZ() const { return isImmTy(ImmTySWZ); }
342  bool isTFE() const { return isImmTy(ImmTyTFE); }
343  bool isD16() const { return isImmTy(ImmTyD16); }
344  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348  bool isFI() const { return isImmTy(ImmTyDppFi); }
349  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360  bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362  bool isMod() const {
363  return isClampSI() || isOModSI();
364  }
365 
366  bool isRegOrImm() const {
367  return isReg() || isImm();
368  }
369 
370  bool isRegClass(unsigned RCID) const;
371 
372  bool isInlineValue() const;
373 
374  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375  return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376  }
377 
378  bool isSCSrcB16() const {
379  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380  }
381 
382  bool isSCSrcV2B16() const {
383  return isSCSrcB16();
384  }
385 
386  bool isSCSrcB32() const {
387  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388  }
389 
390  bool isSCSrcB64() const {
391  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392  }
393 
394  bool isBoolReg() const;
395 
396  bool isSCSrcF16() const {
397  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398  }
399 
400  bool isSCSrcV2F16() const {
401  return isSCSrcF16();
402  }
403 
404  bool isSCSrcF32() const {
405  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406  }
407 
408  bool isSCSrcF64() const {
409  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410  }
411 
412  bool isSSrcB32() const {
413  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414  }
415 
416  bool isSSrcB16() const {
417  return isSCSrcB16() || isLiteralImm(MVT::i16);
418  }
419 
420  bool isSSrcV2B16() const {
421  llvm_unreachable("cannot happen");
422  return isSSrcB16();
423  }
424 
425  bool isSSrcB64() const {
426  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427  // See isVSrc64().
428  return isSCSrcB64() || isLiteralImm(MVT::i64);
429  }
430 
431  bool isSSrcF32() const {
432  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433  }
434 
435  bool isSSrcF64() const {
436  return isSCSrcB64() || isLiteralImm(MVT::f64);
437  }
438 
439  bool isSSrcF16() const {
440  return isSCSrcB16() || isLiteralImm(MVT::f16);
441  }
442 
443  bool isSSrcV2F16() const {
444  llvm_unreachable("cannot happen");
445  return isSSrcF16();
446  }
447 
448  bool isSSrcV2FP32() const {
449  llvm_unreachable("cannot happen");
450  return isSSrcF32();
451  }
452 
453  bool isSCSrcV2FP32() const {
454  llvm_unreachable("cannot happen");
455  return isSCSrcF32();
456  }
457 
458  bool isSSrcV2INT32() const {
459  llvm_unreachable("cannot happen");
460  return isSSrcB32();
461  }
462 
463  bool isSCSrcV2INT32() const {
464  llvm_unreachable("cannot happen");
465  return isSCSrcB32();
466  }
467 
468  bool isSSrcOrLdsB32() const {
469  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470  isLiteralImm(MVT::i32) || isExpr();
471  }
472 
473  bool isVCSrcB32() const {
474  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475  }
476 
477  bool isVCSrcB64() const {
478  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479  }
480 
481  bool isVCSrcB16() const {
482  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483  }
484 
485  bool isVCSrcV2B16() const {
486  return isVCSrcB16();
487  }
488 
489  bool isVCSrcF32() const {
490  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491  }
492 
493  bool isVCSrcF64() const {
494  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495  }
496 
497  bool isVCSrcF16() const {
498  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499  }
500 
501  bool isVCSrcV2F16() const {
502  return isVCSrcF16();
503  }
504 
505  bool isVSrcB32() const {
506  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507  }
508 
509  bool isVSrcB64() const {
510  return isVCSrcF64() || isLiteralImm(MVT::i64);
511  }
512 
513  bool isVSrcB16() const {
514  return isVCSrcB16() || isLiteralImm(MVT::i16);
515  }
516 
517  bool isVSrcV2B16() const {
518  return isVSrcB16() || isLiteralImm(MVT::v2i16);
519  }
520 
521  bool isVCSrcV2FP32() const {
522  return isVCSrcF64();
523  }
524 
525  bool isVSrcV2FP32() const {
526  return isVSrcF64() || isLiteralImm(MVT::v2f32);
527  }
528 
529  bool isVCSrcV2INT32() const {
530  return isVCSrcB64();
531  }
532 
533  bool isVSrcV2INT32() const {
534  return isVSrcB64() || isLiteralImm(MVT::v2i32);
535  }
536 
537  bool isVSrcF32() const {
538  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539  }
540 
541  bool isVSrcF64() const {
542  return isVCSrcF64() || isLiteralImm(MVT::f64);
543  }
544 
545  bool isVSrcF16() const {
546  return isVCSrcF16() || isLiteralImm(MVT::f16);
547  }
548 
549  bool isVSrcV2F16() const {
550  return isVSrcF16() || isLiteralImm(MVT::v2f16);
551  }
552 
553  bool isVISrcB32() const {
554  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555  }
556 
557  bool isVISrcB16() const {
558  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559  }
560 
561  bool isVISrcV2B16() const {
562  return isVISrcB16();
563  }
564 
565  bool isVISrcF32() const {
566  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567  }
568 
569  bool isVISrcF16() const {
570  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571  }
572 
573  bool isVISrcV2F16() const {
574  return isVISrcF16() || isVISrcB32();
575  }
576 
577  bool isVISrc_64B64() const {
578  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579  }
580 
581  bool isVISrc_64F64() const {
582  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583  }
584 
585  bool isVISrc_64V2FP32() const {
586  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587  }
588 
589  bool isVISrc_64V2INT32() const {
590  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591  }
592 
593  bool isVISrc_256B64() const {
594  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595  }
596 
597  bool isVISrc_256F64() const {
598  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599  }
600 
601  bool isVISrc_128B16() const {
602  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603  }
604 
605  bool isVISrc_128V2B16() const {
606  return isVISrc_128B16();
607  }
608 
609  bool isVISrc_128B32() const {
610  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611  }
612 
613  bool isVISrc_128F32() const {
614  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615  }
616 
617  bool isVISrc_256V2FP32() const {
618  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619  }
620 
621  bool isVISrc_256V2INT32() const {
622  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623  }
624 
625  bool isVISrc_512B32() const {
626  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627  }
628 
629  bool isVISrc_512B16() const {
630  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631  }
632 
633  bool isVISrc_512V2B16() const {
634  return isVISrc_512B16();
635  }
636 
637  bool isVISrc_512F32() const {
638  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639  }
640 
641  bool isVISrc_512F16() const {
642  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643  }
644 
645  bool isVISrc_512V2F16() const {
646  return isVISrc_512F16() || isVISrc_512B32();
647  }
648 
649  bool isVISrc_1024B32() const {
650  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651  }
652 
653  bool isVISrc_1024B16() const {
654  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655  }
656 
657  bool isVISrc_1024V2B16() const {
658  return isVISrc_1024B16();
659  }
660 
661  bool isVISrc_1024F32() const {
662  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663  }
664 
665  bool isVISrc_1024F16() const {
666  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667  }
668 
669  bool isVISrc_1024V2F16() const {
670  return isVISrc_1024F16() || isVISrc_1024B32();
671  }
672 
673  bool isAISrcB32() const {
674  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675  }
676 
677  bool isAISrcB16() const {
678  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679  }
680 
681  bool isAISrcV2B16() const {
682  return isAISrcB16();
683  }
684 
685  bool isAISrcF32() const {
686  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687  }
688 
689  bool isAISrcF16() const {
690  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691  }
692 
693  bool isAISrcV2F16() const {
694  return isAISrcF16() || isAISrcB32();
695  }
696 
697  bool isAISrc_64B64() const {
698  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699  }
700 
701  bool isAISrc_64F64() const {
702  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703  }
704 
705  bool isAISrc_128B32() const {
706  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707  }
708 
709  bool isAISrc_128B16() const {
710  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711  }
712 
713  bool isAISrc_128V2B16() const {
714  return isAISrc_128B16();
715  }
716 
717  bool isAISrc_128F32() const {
718  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719  }
720 
721  bool isAISrc_128F16() const {
722  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723  }
724 
725  bool isAISrc_128V2F16() const {
726  return isAISrc_128F16() || isAISrc_128B32();
727  }
728 
729  bool isVISrc_128F16() const {
730  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731  }
732 
733  bool isVISrc_128V2F16() const {
734  return isVISrc_128F16() || isVISrc_128B32();
735  }
736 
737  bool isAISrc_256B64() const {
738  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739  }
740 
741  bool isAISrc_256F64() const {
742  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743  }
744 
745  bool isAISrc_512B32() const {
746  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747  }
748 
749  bool isAISrc_512B16() const {
750  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751  }
752 
753  bool isAISrc_512V2B16() const {
754  return isAISrc_512B16();
755  }
756 
757  bool isAISrc_512F32() const {
758  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759  }
760 
761  bool isAISrc_512F16() const {
762  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763  }
764 
765  bool isAISrc_512V2F16() const {
766  return isAISrc_512F16() || isAISrc_512B32();
767  }
768 
769  bool isAISrc_1024B32() const {
770  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771  }
772 
773  bool isAISrc_1024B16() const {
774  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775  }
776 
777  bool isAISrc_1024V2B16() const {
778  return isAISrc_1024B16();
779  }
780 
781  bool isAISrc_1024F32() const {
782  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783  }
784 
785  bool isAISrc_1024F16() const {
786  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787  }
788 
789  bool isAISrc_1024V2F16() const {
790  return isAISrc_1024F16() || isAISrc_1024B32();
791  }
792 
793  bool isKImmFP32() const {
794  return isLiteralImm(MVT::f32);
795  }
796 
797  bool isKImmFP16() const {
798  return isLiteralImm(MVT::f16);
799  }
800 
801  bool isMem() const override {
802  return false;
803  }
804 
805  bool isExpr() const {
806  return Kind == Expression;
807  }
808 
809  bool isSoppBrTarget() const {
810  return isExpr() || isImm();
811  }
812 
813  bool isSWaitCnt() const;
814  bool isHwreg() const;
815  bool isSendMsg() const;
816  bool isSwizzle() const;
817  bool isSMRDOffset8() const;
818  bool isSMEMOffset() const;
819  bool isSMRDLiteralOffset() const;
820  bool isDPP8() const;
821  bool isDPPCtrl() const;
822  bool isBLGP() const;
823  bool isCBSZ() const;
824  bool isABID() const;
825  bool isGPRIdxMode() const;
826  bool isS16Imm() const;
827  bool isU16Imm() const;
828  bool isEndpgm() const;
829 
830  StringRef getExpressionAsToken() const {
831  assert(isExpr());
832  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833  return S->getSymbol().getName();
834  }
835 
836  StringRef getToken() const {
837  assert(isToken());
838 
839  if (Kind == Expression)
840  return getExpressionAsToken();
841 
842  return StringRef(Tok.Data, Tok.Length);
843  }
844 
845  int64_t getImm() const {
846  assert(isImm());
847  return Imm.Val;
848  }
849 
850  void setImm(int64_t Val) {
851  assert(isImm());
852  Imm.Val = Val;
853  }
854 
855  ImmTy getImmTy() const {
856  assert(isImm());
857  return Imm.Type;
858  }
859 
860  unsigned getReg() const override {
861  assert(isRegKind());
862  return Reg.RegNo;
863  }
864 
865  SMLoc getStartLoc() const override {
866  return StartLoc;
867  }
868 
869  SMLoc getEndLoc() const override {
870  return EndLoc;
871  }
872 
873  SMRange getLocRange() const {
874  return SMRange(StartLoc, EndLoc);
875  }
876 
877  Modifiers getModifiers() const {
878  assert(isRegKind() || isImmTy(ImmTyNone));
879  return isRegKind() ? Reg.Mods : Imm.Mods;
880  }
881 
882  void setModifiers(Modifiers Mods) {
883  assert(isRegKind() || isImmTy(ImmTyNone));
884  if (isRegKind())
885  Reg.Mods = Mods;
886  else
887  Imm.Mods = Mods;
888  }
889 
890  bool hasModifiers() const {
891  return getModifiers().hasModifiers();
892  }
893 
894  bool hasFPModifiers() const {
895  return getModifiers().hasFPModifiers();
896  }
897 
898  bool hasIntModifiers() const {
899  return getModifiers().hasIntModifiers();
900  }
901 
902  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908  template <unsigned Bitwidth>
909  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912  addKImmFPOperands<16>(Inst, N);
913  }
914 
915  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916  addKImmFPOperands<32>(Inst, N);
917  }
918 
919  void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922  addRegOperands(Inst, N);
923  }
924 
925  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926  if (isRegKind())
927  addRegOperands(Inst, N);
928  else if (isExpr())
929  Inst.addOperand(MCOperand::createExpr(Expr));
930  else
931  addImmOperands(Inst, N);
932  }
933 
934  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935  Modifiers Mods = getModifiers();
936  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937  if (isRegKind()) {
938  addRegOperands(Inst, N);
939  } else {
940  addImmOperands(Inst, N, false);
941  }
942  }
943 
944  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945  assert(!hasIntModifiers());
946  addRegOrImmWithInputModsOperands(Inst, N);
947  }
948 
949  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950  assert(!hasFPModifiers());
951  addRegOrImmWithInputModsOperands(Inst, N);
952  }
953 
954  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955  Modifiers Mods = getModifiers();
956  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957  assert(isRegKind());
958  addRegOperands(Inst, N);
959  }
960 
961  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962  assert(!hasIntModifiers());
963  addRegWithInputModsOperands(Inst, N);
964  }
965 
966  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967  assert(!hasFPModifiers());
968  addRegWithInputModsOperands(Inst, N);
969  }
970 
971  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972  if (isImm())
973  addImmOperands(Inst, N);
974  else {
975  assert(isExpr());
976  Inst.addOperand(MCOperand::createExpr(Expr));
977  }
978  }
979 
980  static void printImmTy(raw_ostream& OS, ImmTy Type) {
981  switch (Type) {
982  case ImmTyNone: OS << "None"; break;
983  case ImmTyGDS: OS << "GDS"; break;
984  case ImmTyLDS: OS << "LDS"; break;
985  case ImmTyOffen: OS << "Offen"; break;
986  case ImmTyIdxen: OS << "Idxen"; break;
987  case ImmTyAddr64: OS << "Addr64"; break;
988  case ImmTyOffset: OS << "Offset"; break;
989  case ImmTyInstOffset: OS << "InstOffset"; break;
990  case ImmTyOffset0: OS << "Offset0"; break;
991  case ImmTyOffset1: OS << "Offset1"; break;
992  case ImmTyCPol: OS << "CPol"; break;
993  case ImmTySWZ: OS << "SWZ"; break;
994  case ImmTyTFE: OS << "TFE"; break;
995  case ImmTyD16: OS << "D16"; break;
996  case ImmTyFORMAT: OS << "FORMAT"; break;
997  case ImmTyClampSI: OS << "ClampSI"; break;
998  case ImmTyOModSI: OS << "OModSI"; break;
999  case ImmTyDPP8: OS << "DPP8"; break;
1000  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004  case ImmTyDppFi: OS << "FI"; break;
1005  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009  case ImmTyDMask: OS << "DMask"; break;
1010  case ImmTyDim: OS << "Dim"; break;
1011  case ImmTyUNorm: OS << "UNorm"; break;
1012  case ImmTyDA: OS << "DA"; break;
1013  case ImmTyR128A16: OS << "R128A16"; break;
1014  case ImmTyA16: OS << "A16"; break;
1015  case ImmTyLWE: OS << "LWE"; break;
1016  case ImmTyOff: OS << "Off"; break;
1017  case ImmTyExpTgt: OS << "ExpTgt"; break;
1018  case ImmTyExpCompr: OS << "ExpCompr"; break;
1019  case ImmTyExpVM: OS << "ExpVM"; break;
1020  case ImmTyHwreg: OS << "Hwreg"; break;
1021  case ImmTySendMsg: OS << "SendMsg"; break;
1022  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024  case ImmTyAttrChan: OS << "AttrChan"; break;
1025  case ImmTyOpSel: OS << "OpSel"; break;
1026  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027  case ImmTyNegLo: OS << "NegLo"; break;
1028  case ImmTyNegHi: OS << "NegHi"; break;
1029  case ImmTySwizzle: OS << "Swizzle"; break;
1030  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031  case ImmTyHigh: OS << "High"; break;
1032  case ImmTyBLGP: OS << "BLGP"; break;
1033  case ImmTyCBSZ: OS << "CBSZ"; break;
1034  case ImmTyABID: OS << "ABID"; break;
1035  case ImmTyEndpgm: OS << "Endpgm"; break;
1036  }
1037  }
1038 
1039  void print(raw_ostream &OS) const override {
1040  switch (Kind) {
1041  case Register:
1042  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043  break;
1044  case Immediate:
1045  OS << '<' << getImm();
1046  if (getImmTy() != ImmTyNone) {
1047  OS << " type: "; printImmTy(OS, getImmTy());
1048  }
1049  OS << " mods: " << Imm.Mods << '>';
1050  break;
1051  case Token:
1052  OS << '\'' << getToken() << '\'';
1053  break;
1054  case Expression:
1055  OS << "<expr " << *Expr << '>';
1056  break;
1057  }
1058  }
1059 
1060  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061  int64_t Val, SMLoc Loc,
1062  ImmTy Type = ImmTyNone,
1063  bool IsFPImm = false) {
1064  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065  Op->Imm.Val = Val;
1066  Op->Imm.IsFPImm = IsFPImm;
1067  Op->Imm.Kind = ImmKindTyNone;
1068  Op->Imm.Type = Type;
1069  Op->Imm.Mods = Modifiers();
1070  Op->StartLoc = Loc;
1071  Op->EndLoc = Loc;
1072  return Op;
1073  }
1074 
1075  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076  StringRef Str, SMLoc Loc,
1077  bool HasExplicitEncodingSize = true) {
1078  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079  Res->Tok.Data = Str.data();
1080  Res->Tok.Length = Str.size();
1081  Res->StartLoc = Loc;
1082  Res->EndLoc = Loc;
1083  return Res;
1084  }
1085 
1086  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087  unsigned RegNo, SMLoc S,
1088  SMLoc E) {
1089  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090  Op->Reg.RegNo = RegNo;
1091  Op->Reg.Mods = Modifiers();
1092  Op->StartLoc = S;
1093  Op->EndLoc = E;
1094  return Op;
1095  }
1096 
1097  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098  const class MCExpr *Expr, SMLoc S) {
1099  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100  Op->Expr = Expr;
1101  Op->StartLoc = S;
1102  Op->EndLoc = S;
1103  return Op;
1104  }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109  return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120  int SgprIndexUnusedMin = -1;
1121  int VgprIndexUnusedMin = -1;
1122  MCContext *Ctx = nullptr;
1123 
1124  void usesSgprAt(int i) {
1125  if (i >= SgprIndexUnusedMin) {
1126  SgprIndexUnusedMin = ++i;
1127  if (Ctx) {
1128  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130  }
1131  }
1132  }
1133 
1134  void usesVgprAt(int i) {
1135  if (i >= VgprIndexUnusedMin) {
1136  VgprIndexUnusedMin = ++i;
1137  if (Ctx) {
1138  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139  Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140  }
1141  }
1142  }
1143 
1144 public:
1145  KernelScopeInfo() = default;
1146 
1147  void initialize(MCContext &Context) {
1148  Ctx = &Context;
1149  usesSgprAt(SgprIndexUnusedMin = -1);
1150  usesVgprAt(VgprIndexUnusedMin = -1);
1151  }
1152 
1153  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154  switch (RegKind) {
1155  case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156  case IS_AGPR: // fall through
1157  case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158  default: break;
1159  }
1160  }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164  MCAsmParser &Parser;
1165 
1166  // Number of extra operands parsed after the first optional operand.
1167  // This may be necessary to skip hardcoded mandatory operands.
1168  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170  unsigned ForcedEncodingSize = 0;
1171  bool ForcedDPP = false;
1172  bool ForcedSDWA = false;
1173  KernelScopeInfo KernelScope;
1174  unsigned CPolSeen;
1175 
1176  /// @name Auto-generated Match Functions
1177  /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182  /// }
1183 
1184 private:
1185  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186  bool OutOfRangeError(SMRange Range);
1187  /// Calculate VGPR/SGPR blocks required for given target, reserved
1188  /// registers, and user-specified NextFreeXGPR values.
1189  ///
1190  /// \param Features [in] Target features, used for bug corrections.
1191  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195  /// descriptor field, if valid.
1196  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200  /// \param VGPRBlocks [out] Result VGPR block count.
1201  /// \param SGPRBlocks [out] Result SGPR block count.
1202  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203  bool FlatScrUsed, bool XNACKUsed,
1204  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205  SMRange VGPRRange, unsigned NextFreeSGPR,
1206  SMRange SGPRRange, unsigned &VGPRBlocks,
1207  unsigned &SGPRBlocks);
1208  bool ParseDirectiveAMDGCNTarget();
1209  bool ParseDirectiveAMDHSAKernel();
1210  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211  bool ParseDirectiveHSACodeObjectVersion();
1212  bool ParseDirectiveHSACodeObjectISA();
1213  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214  bool ParseDirectiveAMDKernelCodeT();
1215  // TODO: Possibly make subtargetHasRegister const.
1216  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217  bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219  bool ParseDirectiveISAVersion();
1220  bool ParseDirectiveHSAMetadata();
1221  bool ParseDirectivePALMetadataBegin();
1222  bool ParseDirectivePALMetadata();
1223  bool ParseDirectiveAMDGPULDS();
1224 
1225  /// Common code to parse out a block of text (typically YAML) between start and
1226  /// end directives.
1227  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228  const char *AssemblerDirectiveEnd,
1229  std::string &CollectString);
1230 
1231  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234  unsigned &RegNum, unsigned &RegWidth,
1235  bool RestoreOnFailure = false);
1236  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237  unsigned &RegNum, unsigned &RegWidth,
1238  SmallVectorImpl<AsmToken> &Tokens);
1239  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240  unsigned &RegWidth,
1241  SmallVectorImpl<AsmToken> &Tokens);
1242  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243  unsigned &RegWidth,
1244  SmallVectorImpl<AsmToken> &Tokens);
1245  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247  bool ParseRegRange(unsigned& Num, unsigned& Width);
1248  unsigned getRegularReg(RegisterKind RegKind,
1249  unsigned RegNum,
1250  unsigned RegWidth,
1251  SMLoc Loc);
1252 
1253  bool isRegister();
1254  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256  void initializeGprCountSymbol(RegisterKind RegKind);
1257  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258  unsigned RegWidth);
1259  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260  bool IsAtomic, bool IsLds = false);
1261  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262  bool IsGdsHardcoded);
1263 
1264 public:
1265  enum AMDGPUMatchResultTy {
1266  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267  };
1268  enum OperandMode {
1269  OperandMode_Default,
1270  OperandMode_NSA,
1271  };
1272 
1273  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276  const MCInstrInfo &MII,
1277  const MCTargetOptions &Options)
1278  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279  MCAsmParserExtension::Initialize(Parser);
1280 
1281  if (getFeatureBits().none()) {
1282  // Set default features.
1283  copySTI().ToggleFeature("southern-islands");
1284  }
1285 
1286  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288  {
1289  // TODO: make those pre-defined variables read-only.
1290  // Currently there is none suitable machinery in the core llvm-mc for this.
1291  // MCSymbol::isRedefinable is intended for another purpose, and
1292  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294  MCContext &Ctx = getContext();
1295  if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296  MCSymbol *Sym =
1297  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303  } else {
1304  MCSymbol *Sym =
1305  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311  }
1312  if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313  initializeGprCountSymbol(IS_VGPR);
1314  initializeGprCountSymbol(IS_SGPR);
1315  } else
1316  KernelScope.initialize(getContext());
1317  }
1318  }
1319 
1320  bool hasMIMG_R128() const {
1321  return AMDGPU::hasMIMG_R128(getSTI());
1322  }
1323 
1324  bool hasPackedD16() const {
1325  return AMDGPU::hasPackedD16(getSTI());
1326  }
1327 
1328  bool hasGFX10A16() const {
1329  return AMDGPU::hasGFX10A16(getSTI());
1330  }
1331 
1332  bool isSI() const {
1333  return AMDGPU::isSI(getSTI());
1334  }
1335 
1336  bool isCI() const {
1337  return AMDGPU::isCI(getSTI());
1338  }
1339 
1340  bool isVI() const {
1341  return AMDGPU::isVI(getSTI());
1342  }
1343 
1344  bool isGFX9() const {
1345  return AMDGPU::isGFX9(getSTI());
1346  }
1347 
1348  bool isGFX90A() const {
1349  return AMDGPU::isGFX90A(getSTI());
1350  }
1351 
1352  bool isGFX9Plus() const {
1353  return AMDGPU::isGFX9Plus(getSTI());
1354  }
1355 
1356  bool isGFX10() const {
1357  return AMDGPU::isGFX10(getSTI());
1358  }
1359 
1360  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1361 
1362  bool isGFX10_BEncoding() const {
1363  return AMDGPU::isGFX10_BEncoding(getSTI());
1364  }
1365 
1366  bool hasInv2PiInlineImm() const {
1367  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1368  }
1369 
1370  bool hasFlatOffsets() const {
1371  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1372  }
1373 
1374  bool hasSGPR102_SGPR103() const {
1375  return !isVI() && !isGFX9();
1376  }
1377 
1378  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1379 
1380  bool hasIntClamp() const {
1381  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1382  }
1383 
1384  AMDGPUTargetStreamer &getTargetStreamer() {
1385  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1386  return static_cast<AMDGPUTargetStreamer &>(TS);
1387  }
1388 
1389  const MCRegisterInfo *getMRI() const {
1390  // We need this const_cast because for some reason getContext() is not const
1391  // in MCAsmParser.
1392  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1393  }
1394 
1395  const MCInstrInfo *getMII() const {
1396  return &MII;
1397  }
1398 
1399  const FeatureBitset &getFeatureBits() const {
1400  return getSTI().getFeatureBits();
1401  }
1402 
1403  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1404  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1405  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1406 
1407  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1408  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1409  bool isForcedDPP() const { return ForcedDPP; }
1410  bool isForcedSDWA() const { return ForcedSDWA; }
1411  ArrayRef<unsigned> getMatchedVariants() const;
1412  StringRef getMatchedVariantName() const;
1413 
1414  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1415  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1416  bool RestoreOnFailure);
1417  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1418  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1419  SMLoc &EndLoc) override;
1420  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1421  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1422  unsigned Kind) override;
1423  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1424  OperandVector &Operands, MCStreamer &Out,
1425  uint64_t &ErrorInfo,
1426  bool MatchingInlineAsm) override;
1427  bool ParseDirective(AsmToken DirectiveID) override;
1428  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1429  OperandMode Mode = OperandMode_Default);
1430  StringRef parseMnemonicSuffix(StringRef Name);
1431  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1432  SMLoc NameLoc, OperandVector &Operands) override;
1433  //bool ProcessInstruction(MCInst &Inst);
1434 
1435  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1436 
1437  OperandMatchResultTy
1438  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1439  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1440  bool (*ConvertResult)(int64_t &) = nullptr);
1441 
1442  OperandMatchResultTy
1443  parseOperandArrayWithPrefix(const char *Prefix,
1444  OperandVector &Operands,
1445  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446  bool (*ConvertResult)(int64_t&) = nullptr);
1447 
1448  OperandMatchResultTy
1449  parseNamedBit(StringRef Name, OperandVector &Operands,
1450  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1451  OperandMatchResultTy parseCPol(OperandVector &Operands);
1452  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1453  StringRef &Value,
1454  SMLoc &StringLoc);
1455 
1456  bool isModifier();
1457  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1458  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1459  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1460  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1461  bool parseSP3NegModifier();
1462  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1463  OperandMatchResultTy parseReg(OperandVector &Operands);
1464  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1465  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1466  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1467  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1468  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1469  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1470  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1471  OperandMatchResultTy parseUfmt(int64_t &Format);
1472  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1473  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1474  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1475  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1476  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1477  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1478  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1479 
1480  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1481  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1482  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1483  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1484 
1485  bool parseCnt(int64_t &IntVal);
1486  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1487  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1488 
1489 private:
1490  struct OperandInfoTy {
1491  SMLoc Loc;
1492  int64_t Id;
1493  bool IsSymbolic = false;
1494  bool IsDefined = false;
1495 
1496  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1497  };
1498 
1499  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1500  bool validateSendMsg(const OperandInfoTy &Msg,
1501  const OperandInfoTy &Op,
1502  const OperandInfoTy &Stream);
1503 
1504  bool parseHwregBody(OperandInfoTy &HwReg,
1505  OperandInfoTy &Offset,
1506  OperandInfoTy &Width);
1507  bool validateHwreg(const OperandInfoTy &HwReg,
1508  const OperandInfoTy &Offset,
1509  const OperandInfoTy &Width);
1510 
1511  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1512  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1513 
1514  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1515  const OperandVector &Operands) const;
1516  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1517  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1518  SMLoc getLitLoc(const OperandVector &Operands) const;
1519  SMLoc getConstLoc(const OperandVector &Operands) const;
1520 
1521  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1522  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1523  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1524  bool validateSOPLiteral(const MCInst &Inst) const;
1525  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1526  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1527  bool validateIntClampSupported(const MCInst &Inst);
1528  bool validateMIMGAtomicDMask(const MCInst &Inst);
1529  bool validateMIMGGatherDMask(const MCInst &Inst);
1530  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1531  bool validateMIMGDataSize(const MCInst &Inst);
1532  bool validateMIMGAddrSize(const MCInst &Inst);
1533  bool validateMIMGD16(const MCInst &Inst);
1534  bool validateMIMGDim(const MCInst &Inst);
1535  bool validateMIMGMSAA(const MCInst &Inst);
1536  bool validateOpSel(const MCInst &Inst);
1537  bool validateVccOperand(unsigned Reg) const;
1538  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1539  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1540  bool validateAGPRLdSt(const MCInst &Inst) const;
1541  bool validateVGPRAlign(const MCInst &Inst) const;
1542  bool validateDivScale(const MCInst &Inst);
1543  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1544  const SMLoc &IDLoc);
1545  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1546  unsigned getConstantBusLimit(unsigned Opcode) const;
1547  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1548  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1549  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1550 
1551  bool isSupportedMnemo(StringRef Mnemo,
1552  const FeatureBitset &FBS);
1553  bool isSupportedMnemo(StringRef Mnemo,
1554  const FeatureBitset &FBS,
1555  ArrayRef<unsigned> Variants);
1556  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1557 
1558  bool isId(const StringRef Id) const;
1559  bool isId(const AsmToken &Token, const StringRef Id) const;
1560  bool isToken(const AsmToken::TokenKind Kind) const;
1561  bool trySkipId(const StringRef Id);
1562  bool trySkipId(const StringRef Pref, const StringRef Id);
1563  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1564  bool trySkipToken(const AsmToken::TokenKind Kind);
1565  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1566  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1567  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1568 
1569  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1570  AsmToken::TokenKind getTokenKind() const;
1571  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1572  bool parseExpr(OperandVector &Operands);
1573  StringRef getTokenStr() const;
1574  AsmToken peekToken();
1575  AsmToken getToken() const;
1576  SMLoc getLoc() const;
1577  void lex();
1578 
1579 public:
1580  void onBeginOfFile() override;
1581 
1582  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1583  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1584 
1585  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1586  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1587  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1588  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1589  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1590  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1591 
1592  bool parseSwizzleOperand(int64_t &Op,
1593  const unsigned MinVal,
1594  const unsigned MaxVal,
1595  const StringRef ErrMsg,
1596  SMLoc &Loc);
1597  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1598  const unsigned MinVal,
1599  const unsigned MaxVal,
1600  const StringRef ErrMsg);
1601  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1602  bool parseSwizzleOffset(int64_t &Imm);
1603  bool parseSwizzleMacro(int64_t &Imm);
1604  bool parseSwizzleQuadPerm(int64_t &Imm);
1605  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1606  bool parseSwizzleBroadcast(int64_t &Imm);
1607  bool parseSwizzleSwap(int64_t &Imm);
1608  bool parseSwizzleReverse(int64_t &Imm);
1609 
1610  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1611  int64_t parseGPRIdxMacro();
1612 
1613  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1614  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1615  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1616  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1617 
1618  AMDGPUOperand::Ptr defaultCPol() const;
1619 
1620  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1621  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1622  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1623  AMDGPUOperand::Ptr defaultFlatOffset() const;
1624 
1625  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1626 
1627  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1628  OptionalImmIndexMap &OptionalIdx);
1629  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1630  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1631  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1632 
1633  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1634 
1635  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1636  bool IsAtomic = false);
1637  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1638  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1639 
1640  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1641 
1642  bool parseDimId(unsigned &Encoding);
1643  OperandMatchResultTy parseDim(OperandVector &Operands);
1644  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1645  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1646  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1647  int64_t parseDPPCtrlSel(StringRef Ctrl);
1648  int64_t parseDPPCtrlPerm();
1649  AMDGPUOperand::Ptr defaultRowMask() const;
1650  AMDGPUOperand::Ptr defaultBankMask() const;
1651  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1652  AMDGPUOperand::Ptr defaultFI() const;
1653  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1654  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1655 
1656  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1657  AMDGPUOperand::ImmTy Type);
1658  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1659  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1660  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1661  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1662  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1663  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1664  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1665  uint64_t BasicInstType,
1666  bool SkipDstVcc = false,
1667  bool SkipSrcVcc = false);
1668 
1669  AMDGPUOperand::Ptr defaultBLGP() const;
1670  AMDGPUOperand::Ptr defaultCBSZ() const;
1671  AMDGPUOperand::Ptr defaultABID() const;
1672 
1673  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1674  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1675 };
1676 
1677 struct OptionalOperand {
1678  const char *Name;
1679  AMDGPUOperand::ImmTy Type;
1680  bool IsBit;
1681  bool (*ConvertResult)(int64_t&);
1682 };
1683 
1684 } // end anonymous namespace
1685 
1686 // May be called with integer type with equivalent bitwidth.
1687 static const fltSemantics *getFltSemantics(unsigned Size) {
1688  switch (Size) {
1689  case 4:
1690  return &APFloat::IEEEsingle();
1691  case 8:
1692  return &APFloat::IEEEdouble();
1693  case 2:
1694  return &APFloat::IEEEhalf();
1695  default:
1696  llvm_unreachable("unsupported fp type");
1697  }
1698 }
1699 
1700 static const fltSemantics *getFltSemantics(MVT VT) {
1701  return getFltSemantics(VT.getSizeInBits() / 8);
1702 }
1703 
1704 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1705  switch (OperandType) {
1706  case AMDGPU::OPERAND_REG_IMM_INT32:
1707  case AMDGPU::OPERAND_REG_IMM_FP32:
1708  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1709  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1710  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1711  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1712  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1713  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1714  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1715  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1716  return &APFloat::IEEEsingle();
1717  case AMDGPU::OPERAND_REG_IMM_INT64:
1718  case AMDGPU::OPERAND_REG_IMM_FP64:
1719  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1720  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1721  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1722  return &APFloat::IEEEdouble();
1723  case AMDGPU::OPERAND_REG_IMM_INT16:
1724  case AMDGPU::OPERAND_REG_IMM_FP16:
1725  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1726  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1727  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1728  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1729  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1730  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1731  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1732  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1733  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1734  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1735  return &APFloat::IEEEhalf();
1736  default:
1737  llvm_unreachable("unsupported fp type");
1738  }
1739 }
1740 
1741 //===----------------------------------------------------------------------===//
1742 // Operand
1743 //===----------------------------------------------------------------------===//
1744 
1745 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1746  bool Lost;
1747 
1748  // Convert literal to single precision
1749  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1750  APFloat::rmNearestTiesToEven,
1751  &Lost);
1752  // We allow precision lost but not overflow or underflow
1753  if (Status != APFloat::opOK &&
1754  Lost &&
1755  ((Status & APFloat::opOverflow) != 0 ||
1756  (Status & APFloat::opUnderflow) != 0)) {
1757  return false;
1758  }
1759 
1760  return true;
1761 }
1762 
1763 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1764  return isUIntN(Size, Val) || isIntN(Size, Val);
1765 }
1766 
1767 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1768  if (VT.getScalarType() == MVT::i16) {
1769  // FP immediate values are broken.
1770  return isInlinableIntLiteral(Val);
1771  }
1772 
1773  // f16/v2f16 operands work correctly for all values.
1774  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1775 }
1776 
1777 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1778 
1779  // This is a hack to enable named inline values like
1780  // shared_base with both 32-bit and 64-bit operands.
1781  // Note that these values are defined as
1782  // 32-bit operands only.
1783  if (isInlineValue()) {
1784  return true;
1785  }
1786 
1787  if (!isImmTy(ImmTyNone)) {
1788  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1789  return false;
1790  }
1791  // TODO: We should avoid using host float here. It would be better to
1792  // check the float bit values which is what a few other places do.
1793  // We've had bot failures before due to weird NaN support on mips hosts.
1794 
1795  APInt Literal(64, Imm.Val);
1796 
1797  if (Imm.IsFPImm) { // We got fp literal token
1798  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1799  return AMDGPU::isInlinableLiteral64(Imm.Val,
1800  AsmParser->hasInv2PiInlineImm());
1801  }
1802 
1803  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1804  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1805  return false;
1806 
1807  if (type.getScalarSizeInBits() == 16) {
1808  return isInlineableLiteralOp16(
1809  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1810  type, AsmParser->hasInv2PiInlineImm());
1811  }
1812 
1813  // Check if single precision literal is inlinable
1815  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1816  AsmParser->hasInv2PiInlineImm());
1817  }
1818 
1819  // We got int literal token.
1820  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1821  return AMDGPU::isInlinableLiteral64(Imm.Val,
1822  AsmParser->hasInv2PiInlineImm());
1823  }
1824 
1825  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1826  return false;
1827  }
1828 
1829  if (type.getScalarSizeInBits() == 16) {
1830  return isInlineableLiteralOp16(
1831  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1832  type, AsmParser->hasInv2PiInlineImm());
1833  }
1834 
1836  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1837  AsmParser->hasInv2PiInlineImm());
1838 }
1839 
1840 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1841  // Check that this immediate can be added as literal
1842  if (!isImmTy(ImmTyNone)) {
1843  return false;
1844  }
1845 
1846  if (!Imm.IsFPImm) {
1847  // We got int literal token.
1848 
1849  if (type == MVT::f64 && hasFPModifiers()) {
1850  // Cannot apply fp modifiers to int literals preserving the same semantics
1851  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1852  // disable these cases.
1853  return false;
1854  }
1855 
1856  unsigned Size = type.getSizeInBits();
1857  if (Size == 64)
1858  Size = 32;
1859 
1860  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1861  // types.
1862  return isSafeTruncation(Imm.Val, Size);
1863  }
1864 
1865  // We got fp literal token
1866  if (type == MVT::f64) { // Expected 64-bit fp operand
1867  // We would set low 64-bits of literal to zeroes but we accept this literals
1868  return true;
1869  }
1870 
1871  if (type == MVT::i64) { // Expected 64-bit int operand
1872  // We don't allow fp literals in 64-bit integer instructions. It is
1873  // unclear how we should encode them.
1874  return false;
1875  }
1876 
1877  // We allow fp literals with f16x2 operands assuming that the specified
1878  // literal goes into the lower half and the upper half is zero. We also
1879  // require that the literal may be losslesly converted to f16.
1880  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1881  (type == MVT::v2i16)? MVT::i16 :
1882  (type == MVT::v2f32)? MVT::f32 : type;
1883 
1884  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1885  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1886 }
1887 
1888 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1889  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1890 }
1891 
1892 bool AMDGPUOperand::isVRegWithInputMods() const {
1893  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1894  // GFX90A allows DPP on 64-bit operands.
1895  (isRegClass(AMDGPU::VReg_64RegClassID) &&
1896  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1897 }
1898 
1899 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1900  if (AsmParser->isVI())
1901  return isVReg32();
1902  else if (AsmParser->isGFX9Plus())
1903  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1904  else
1905  return false;
1906 }
1907 
1908 bool AMDGPUOperand::isSDWAFP16Operand() const {
1909  return isSDWAOperand(MVT::f16);
1910 }
1911 
1912 bool AMDGPUOperand::isSDWAFP32Operand() const {
1913  return isSDWAOperand(MVT::f32);
1914 }
1915 
1916 bool AMDGPUOperand::isSDWAInt16Operand() const {
1917  return isSDWAOperand(MVT::i16);
1918 }
1919 
1920 bool AMDGPUOperand::isSDWAInt32Operand() const {
1921  return isSDWAOperand(MVT::i32);
1922 }
1923 
1924 bool AMDGPUOperand::isBoolReg() const {
1925  return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1926  (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1927 }
1928 
1929 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1930 {
1931  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1932  assert(Size == 2 || Size == 4 || Size == 8);
1933 
1934  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1935 
1936  if (Imm.Mods.Abs) {
1937  Val &= ~FpSignMask;
1938  }
1939  if (Imm.Mods.Neg) {
1940  Val ^= FpSignMask;
1941  }
1942 
1943  return Val;
1944 }
1945 
1946 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1947  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1948  Inst.getNumOperands())) {
1949  addLiteralImmOperand(Inst, Imm.Val,
1950  ApplyModifiers &
1951  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1952  } else {
1953  assert(!isImmTy(ImmTyNone) || !hasModifiers());
1954  Inst.addOperand(MCOperand::createImm(Imm.Val));
1955  setImmKindNone();
1956  }
1957 }
1958 
1959 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1960  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1961  auto OpNum = Inst.getNumOperands();
1962  // Check that this operand accepts literals
1963  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1964 
1965  if (ApplyModifiers) {
1966  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1967  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1968  Val = applyInputFPModifiers(Val, Size);
1969  }
1970 
1971  APInt Literal(64, Val);
1972  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1973 
1974  if (Imm.IsFPImm) { // We got fp literal token
1975  switch (OpTy) {
1981  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1982  AsmParser->hasInv2PiInlineImm())) {
1983  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1984  setImmKindConst();
1985  return;
1986  }
1987 
1988  // Non-inlineable
1989  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1990  // For fp operands we check if low 32 bits are zeros
1991  if (Literal.getLoBits(32) != 0) {
1992  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1993  "Can't encode literal as exact 64-bit floating-point operand. "
1994  "Low 32-bits will be set to zero");
1995  }
1996 
1997  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1998  setImmKindLiteral();
1999  return;
2000  }
2001 
2002  // We don't allow fp literals in 64-bit integer instructions. It is
2003  // unclear how we should encode them. This case should be checked earlier
2004  // in predicate methods (isLiteralImm())
2005  llvm_unreachable("fp literal in 64-bit integer instruction.");
2006 
2029  bool lost;
2030  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2031  // Convert literal to single precision
2032  FPLiteral.convert(*getOpFltSemantics(OpTy),
2034  // We allow precision lost but not overflow or underflow. This should be
2035  // checked earlier in isLiteralImm()
2036 
2037  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2038  Inst.addOperand(MCOperand::createImm(ImmVal));
2039  setImmKindLiteral();
2040  return;
2041  }
2042  default:
2043  llvm_unreachable("invalid operand size");
2044  }
2045 
2046  return;
2047  }
2048 
2049  // We got int literal token.
2050  // Only sign extend inline immediates.
2051  switch (OpTy) {
2064  if (isSafeTruncation(Val, 32) &&
2065  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2066  AsmParser->hasInv2PiInlineImm())) {
2067  Inst.addOperand(MCOperand::createImm(Val));
2068  setImmKindConst();
2069  return;
2070  }
2071 
2072  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2073  setImmKindLiteral();
2074  return;
2075 
2081  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2082  Inst.addOperand(MCOperand::createImm(Val));
2083  setImmKindConst();
2084  return;
2085  }
2086 
2088  setImmKindLiteral();
2089  return;
2090 
2097  if (isSafeTruncation(Val, 16) &&
2098  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2099  AsmParser->hasInv2PiInlineImm())) {
2100  Inst.addOperand(MCOperand::createImm(Val));
2101  setImmKindConst();
2102  return;
2103  }
2104 
2105  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2106  setImmKindLiteral();
2107  return;
2108 
2113  assert(isSafeTruncation(Val, 16));
2114  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2115  AsmParser->hasInv2PiInlineImm()));
2116 
2117  Inst.addOperand(MCOperand::createImm(Val));
2118  return;
2119  }
2120  default:
2121  llvm_unreachable("invalid operand size");
2122  }
2123 }
2124 
2125 template <unsigned Bitwidth>
2126 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2127  APInt Literal(64, Imm.Val);
2128  setImmKindNone();
2129 
2130  if (!Imm.IsFPImm) {
2131  // We got int literal token.
2132  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2133  return;
2134  }
2135 
2136  bool Lost;
2137  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2138  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2140  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2141 }
2142 
2143 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2144  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2145 }
2146 
2147 static bool isInlineValue(unsigned Reg) {
2148  switch (Reg) {
2149  case AMDGPU::SRC_SHARED_BASE:
2150  case AMDGPU::SRC_SHARED_LIMIT:
2151  case AMDGPU::SRC_PRIVATE_BASE:
2152  case AMDGPU::SRC_PRIVATE_LIMIT:
2153  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2154  return true;
2155  case AMDGPU::SRC_VCCZ:
2156  case AMDGPU::SRC_EXECZ:
2157  case AMDGPU::SRC_SCC:
2158  return true;
2159  case AMDGPU::SGPR_NULL:
2160  return true;
2161  default:
2162  return false;
2163  }
2164 }
2165 
2166 bool AMDGPUOperand::isInlineValue() const {
2167  return isRegKind() && ::isInlineValue(getReg());
2168 }
2169 
2170 //===----------------------------------------------------------------------===//
2171 // AsmParser
2172 //===----------------------------------------------------------------------===//
2173 
2174 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2175  if (Is == IS_VGPR) {
2176  switch (RegWidth) {
2177  default: return -1;
2178  case 1: return AMDGPU::VGPR_32RegClassID;
2179  case 2: return AMDGPU::VReg_64RegClassID;
2180  case 3: return AMDGPU::VReg_96RegClassID;
2181  case 4: return AMDGPU::VReg_128RegClassID;
2182  case 5: return AMDGPU::VReg_160RegClassID;
2183  case 6: return AMDGPU::VReg_192RegClassID;
2184  case 8: return AMDGPU::VReg_256RegClassID;
2185  case 16: return AMDGPU::VReg_512RegClassID;
2186  case 32: return AMDGPU::VReg_1024RegClassID;
2187  }
2188  } else if (Is == IS_TTMP) {
2189  switch (RegWidth) {
2190  default: return -1;
2191  case 1: return AMDGPU::TTMP_32RegClassID;
2192  case 2: return AMDGPU::TTMP_64RegClassID;
2193  case 4: return AMDGPU::TTMP_128RegClassID;
2194  case 8: return AMDGPU::TTMP_256RegClassID;
2195  case 16: return AMDGPU::TTMP_512RegClassID;
2196  }
2197  } else if (Is == IS_SGPR) {
2198  switch (RegWidth) {
2199  default: return -1;
2200  case 1: return AMDGPU::SGPR_32RegClassID;
2201  case 2: return AMDGPU::SGPR_64RegClassID;
2202  case 3: return AMDGPU::SGPR_96RegClassID;
2203  case 4: return AMDGPU::SGPR_128RegClassID;
2204  case 5: return AMDGPU::SGPR_160RegClassID;
2205  case 6: return AMDGPU::SGPR_192RegClassID;
2206  case 8: return AMDGPU::SGPR_256RegClassID;
2207  case 16: return AMDGPU::SGPR_512RegClassID;
2208  }
2209  } else if (Is == IS_AGPR) {
2210  switch (RegWidth) {
2211  default: return -1;
2212  case 1: return AMDGPU::AGPR_32RegClassID;
2213  case 2: return AMDGPU::AReg_64RegClassID;
2214  case 3: return AMDGPU::AReg_96RegClassID;
2215  case 4: return AMDGPU::AReg_128RegClassID;
2216  case 5: return AMDGPU::AReg_160RegClassID;
2217  case 6: return AMDGPU::AReg_192RegClassID;
2218  case 8: return AMDGPU::AReg_256RegClassID;
2219  case 16: return AMDGPU::AReg_512RegClassID;
2220  case 32: return AMDGPU::AReg_1024RegClassID;
2221  }
2222  }
2223  return -1;
2224 }
2225 
2228  .Case("exec", AMDGPU::EXEC)
2229  .Case("vcc", AMDGPU::VCC)
2230  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2231  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2232  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2233  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2234  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2235  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2236  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2237  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2238  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2239  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2240  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2241  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2242  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2243  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2244  .Case("m0", AMDGPU::M0)
2245  .Case("vccz", AMDGPU::SRC_VCCZ)
2246  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2247  .Case("execz", AMDGPU::SRC_EXECZ)
2248  .Case("src_execz", AMDGPU::SRC_EXECZ)
2249  .Case("scc", AMDGPU::SRC_SCC)
2250  .Case("src_scc", AMDGPU::SRC_SCC)
2251  .Case("tba", AMDGPU::TBA)
2252  .Case("tma", AMDGPU::TMA)
2253  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2254  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2255  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2256  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2257  .Case("vcc_lo", AMDGPU::VCC_LO)
2258  .Case("vcc_hi", AMDGPU::VCC_HI)
2259  .Case("exec_lo", AMDGPU::EXEC_LO)
2260  .Case("exec_hi", AMDGPU::EXEC_HI)
2261  .Case("tma_lo", AMDGPU::TMA_LO)
2262  .Case("tma_hi", AMDGPU::TMA_HI)
2263  .Case("tba_lo", AMDGPU::TBA_LO)
2264  .Case("tba_hi", AMDGPU::TBA_HI)
2265  .Case("pc", AMDGPU::PC_REG)
2266  .Case("null", AMDGPU::SGPR_NULL)
2267  .Default(AMDGPU::NoRegister);
2268 }
2269 
2270 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2271  SMLoc &EndLoc, bool RestoreOnFailure) {
2272  auto R = parseRegister();
2273  if (!R) return true;
2274  assert(R->isReg());
2275  RegNo = R->getReg();
2276  StartLoc = R->getStartLoc();
2277  EndLoc = R->getEndLoc();
2278  return false;
2279 }
2280 
2281 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2282  SMLoc &EndLoc) {
2283  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2284 }
2285 
2286 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2287  SMLoc &StartLoc,
2288  SMLoc &EndLoc) {
2289  bool Result =
2290  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2291  bool PendingErrors = getParser().hasPendingError();
2292  getParser().clearPendingErrors();
2293  if (PendingErrors)
2294  return MatchOperand_ParseFail;
2295  if (Result)
2296  return MatchOperand_NoMatch;
2297  return MatchOperand_Success;
2298 }
2299 
2300 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2301  RegisterKind RegKind, unsigned Reg1,
2302  SMLoc Loc) {
2303  switch (RegKind) {
2304  case IS_SPECIAL:
2305  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2306  Reg = AMDGPU::EXEC;
2307  RegWidth = 2;
2308  return true;
2309  }
2310  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2311  Reg = AMDGPU::FLAT_SCR;
2312  RegWidth = 2;
2313  return true;
2314  }
2315  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2316  Reg = AMDGPU::XNACK_MASK;
2317  RegWidth = 2;
2318  return true;
2319  }
2320  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2321  Reg = AMDGPU::VCC;
2322  RegWidth = 2;
2323  return true;
2324  }
2325  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2326  Reg = AMDGPU::TBA;
2327  RegWidth = 2;
2328  return true;
2329  }
2330  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2331  Reg = AMDGPU::TMA;
2332  RegWidth = 2;
2333  return true;
2334  }
2335  Error(Loc, "register does not fit in the list");
2336  return false;
2337  case IS_VGPR:
2338  case IS_SGPR:
2339  case IS_AGPR:
2340  case IS_TTMP:
2341  if (Reg1 != Reg + RegWidth) {
2342  Error(Loc, "registers in a list must have consecutive indices");
2343  return false;
2344  }
2345  RegWidth++;
2346  return true;
2347  default:
2348  llvm_unreachable("unexpected register kind");
2349  }
2350 }
2351 
2352 struct RegInfo {
2354  RegisterKind Kind;
2355 };
2356 
2357 static constexpr RegInfo RegularRegisters[] = {
2358  {{"v"}, IS_VGPR},
2359  {{"s"}, IS_SGPR},
2360  {{"ttmp"}, IS_TTMP},
2361  {{"acc"}, IS_AGPR},
2362  {{"a"}, IS_AGPR},
2363 };
2364 
2365 static bool isRegularReg(RegisterKind Kind) {
2366  return Kind == IS_VGPR ||
2367  Kind == IS_SGPR ||
2368  Kind == IS_TTMP ||
2369  Kind == IS_AGPR;
2370 }
2371 
2372 static const RegInfo* getRegularRegInfo(StringRef Str) {
2373  for (const RegInfo &Reg : RegularRegisters)
2374  if (Str.startswith(Reg.Name))
2375  return &Reg;
2376  return nullptr;
2377 }
2378 
2379 static bool getRegNum(StringRef Str, unsigned& Num) {
2380  return !Str.getAsInteger(10, Num);
2381 }
2382 
2383 bool
2384 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2385  const AsmToken &NextToken) const {
2386 
2387  // A list of consecutive registers: [s0,s1,s2,s3]
2388  if (Token.is(AsmToken::LBrac))
2389  return true;
2390 
2391  if (!Token.is(AsmToken::Identifier))
2392  return false;
2393 
2394  // A single register like s0 or a range of registers like s[0:1]
2395 
2396  StringRef Str = Token.getString();
2397  const RegInfo *Reg = getRegularRegInfo(Str);
2398  if (Reg) {
2399  StringRef RegName = Reg->Name;
2400  StringRef RegSuffix = Str.substr(RegName.size());
2401  if (!RegSuffix.empty()) {
2402  unsigned Num;
2403  // A single register with an index: rXX
2404  if (getRegNum(RegSuffix, Num))
2405  return true;
2406  } else {
2407  // A range of registers: r[XX:YY].
2408  if (NextToken.is(AsmToken::LBrac))
2409  return true;
2410  }
2411  }
2412 
2413  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2414 }
2415 
2416 bool
2417 AMDGPUAsmParser::isRegister()
2418 {
2419  return isRegister(getToken(), peekToken());
2420 }
2421 
2422 unsigned
2423 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2424  unsigned RegNum,
2425  unsigned RegWidth,
2426  SMLoc Loc) {
2427 
2428  assert(isRegularReg(RegKind));
2429 
2430  unsigned AlignSize = 1;
2431  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2432  // SGPR and TTMP registers must be aligned.
2433  // Max required alignment is 4 dwords.
2434  AlignSize = std::min(RegWidth, 4u);
2435  }
2436 
2437  if (RegNum % AlignSize != 0) {
2438  Error(Loc, "invalid register alignment");
2439  return AMDGPU::NoRegister;
2440  }
2441 
2442  unsigned RegIdx = RegNum / AlignSize;
2443  int RCID = getRegClass(RegKind, RegWidth);
2444  if (RCID == -1) {
2445  Error(Loc, "invalid or unsupported register size");
2446  return AMDGPU::NoRegister;
2447  }
2448 
2449  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2450  const MCRegisterClass RC = TRI->getRegClass(RCID);
2451  if (RegIdx >= RC.getNumRegs()) {
2452  Error(Loc, "register index is out of range");
2453  return AMDGPU::NoRegister;
2454  }
2455 
2456  return RC.getRegister(RegIdx);
2457 }
2458 
2459 bool
2460 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2461  int64_t RegLo, RegHi;
2462  if (!skipToken(AsmToken::LBrac, "missing register index"))
2463  return false;
2464 
2465  SMLoc FirstIdxLoc = getLoc();
2466  SMLoc SecondIdxLoc;
2467 
2468  if (!parseExpr(RegLo))
2469  return false;
2470 
2471  if (trySkipToken(AsmToken::Colon)) {
2472  SecondIdxLoc = getLoc();
2473  if (!parseExpr(RegHi))
2474  return false;
2475  } else {
2476  RegHi = RegLo;
2477  }
2478 
2479  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2480  return false;
2481 
2482  if (!isUInt<32>(RegLo)) {
2483  Error(FirstIdxLoc, "invalid register index");
2484  return false;
2485  }
2486 
2487  if (!isUInt<32>(RegHi)) {
2488  Error(SecondIdxLoc, "invalid register index");
2489  return false;
2490  }
2491 
2492  if (RegLo > RegHi) {
2493  Error(FirstIdxLoc, "first register index should not exceed second index");
2494  return false;
2495  }
2496 
2497  Num = static_cast<unsigned>(RegLo);
2498  Width = (RegHi - RegLo) + 1;
2499  return true;
2500 }
2501 
2502 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2503  unsigned &RegNum, unsigned &RegWidth,
2504  SmallVectorImpl<AsmToken> &Tokens) {
2505  assert(isToken(AsmToken::Identifier));
2506  unsigned Reg = getSpecialRegForName(getTokenStr());
2507  if (Reg) {
2508  RegNum = 0;
2509  RegWidth = 1;
2510  RegKind = IS_SPECIAL;
2511  Tokens.push_back(getToken());
2512  lex(); // skip register name
2513  }
2514  return Reg;
2515 }
2516 
2517 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2518  unsigned &RegNum, unsigned &RegWidth,
2519  SmallVectorImpl<AsmToken> &Tokens) {
2520  assert(isToken(AsmToken::Identifier));
2521  StringRef RegName = getTokenStr();
2522  auto Loc = getLoc();
2523 
2524  const RegInfo *RI = getRegularRegInfo(RegName);
2525  if (!RI) {
2526  Error(Loc, "invalid register name");
2527  return AMDGPU::NoRegister;
2528  }
2529 
2530  Tokens.push_back(getToken());
2531  lex(); // skip register name
2532 
2533  RegKind = RI->Kind;
2534  StringRef RegSuffix = RegName.substr(RI->Name.size());
2535  if (!RegSuffix.empty()) {
2536  // Single 32-bit register: vXX.
2537  if (!getRegNum(RegSuffix, RegNum)) {
2538  Error(Loc, "invalid register index");
2539  return AMDGPU::NoRegister;
2540  }
2541  RegWidth = 1;
2542  } else {
2543  // Range of registers: v[XX:YY]. ":YY" is optional.
2544  if (!ParseRegRange(RegNum, RegWidth))
2545  return AMDGPU::NoRegister;
2546  }
2547 
2548  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2549 }
2550 
2551 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2552  unsigned &RegWidth,
2553  SmallVectorImpl<AsmToken> &Tokens) {
2554  unsigned Reg = AMDGPU::NoRegister;
2555  auto ListLoc = getLoc();
2556 
2557  if (!skipToken(AsmToken::LBrac,
2558  "expected a register or a list of registers")) {
2559  return AMDGPU::NoRegister;
2560  }
2561 
2562  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2563 
2564  auto Loc = getLoc();
2565  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2566  return AMDGPU::NoRegister;
2567  if (RegWidth != 1) {
2568  Error(Loc, "expected a single 32-bit register");
2569  return AMDGPU::NoRegister;
2570  }
2571 
2572  for (; trySkipToken(AsmToken::Comma); ) {
2573  RegisterKind NextRegKind;
2574  unsigned NextReg, NextRegNum, NextRegWidth;
2575  Loc = getLoc();
2576 
2577  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2578  NextRegNum, NextRegWidth,
2579  Tokens)) {
2580  return AMDGPU::NoRegister;
2581  }
2582  if (NextRegWidth != 1) {
2583  Error(Loc, "expected a single 32-bit register");
2584  return AMDGPU::NoRegister;
2585  }
2586  if (NextRegKind != RegKind) {
2587  Error(Loc, "registers in a list must be of the same kind");
2588  return AMDGPU::NoRegister;
2589  }
2590  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2591  return AMDGPU::NoRegister;
2592  }
2593 
2594  if (!skipToken(AsmToken::RBrac,
2595  "expected a comma or a closing square bracket")) {
2596  return AMDGPU::NoRegister;
2597  }
2598 
2599  if (isRegularReg(RegKind))
2600  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2601 
2602  return Reg;
2603 }
2604 
2605 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2606  unsigned &RegNum, unsigned &RegWidth,
2607  SmallVectorImpl<AsmToken> &Tokens) {
2608  auto Loc = getLoc();
2609  Reg = AMDGPU::NoRegister;
2610 
2611  if (isToken(AsmToken::Identifier)) {
2612  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2613  if (Reg == AMDGPU::NoRegister)
2614  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2615  } else {
2616  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2617  }
2618 
2619  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2620  if (Reg == AMDGPU::NoRegister) {
2621  assert(Parser.hasPendingError());
2622  return false;
2623  }
2624 
2625  if (!subtargetHasRegister(*TRI, Reg)) {
2626  if (Reg == AMDGPU::SGPR_NULL) {
2627  Error(Loc, "'null' operand is not supported on this GPU");
2628  } else {
2629  Error(Loc, "register not available on this GPU");
2630  }
2631  return false;
2632  }
2633 
2634  return true;
2635 }
2636 
2637 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2638  unsigned &RegNum, unsigned &RegWidth,
2639  bool RestoreOnFailure /*=false*/) {
2640  Reg = AMDGPU::NoRegister;
2641 
2642  SmallVector<AsmToken, 1> Tokens;
2643  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2644  if (RestoreOnFailure) {
2645  while (!Tokens.empty()) {
2646  getLexer().UnLex(Tokens.pop_back_val());
2647  }
2648  }
2649  return true;
2650  }
2651  return false;
2652 }
2653 
2655 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2656  switch (RegKind) {
2657  case IS_VGPR:
2658  return StringRef(".amdgcn.next_free_vgpr");
2659  case IS_SGPR:
2660  return StringRef(".amdgcn.next_free_sgpr");
2661  default:
2662  return None;
2663  }
2664 }
2665 
2666 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2667  auto SymbolName = getGprCountSymbolName(RegKind);
2668  assert(SymbolName && "initializing invalid register kind");
2669  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2670  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2671 }
2672 
2673 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2674  unsigned DwordRegIndex,
2675  unsigned RegWidth) {
2676  // Symbols are only defined for GCN targets
2677  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2678  return true;
2679 
2680  auto SymbolName = getGprCountSymbolName(RegKind);
2681  if (!SymbolName)
2682  return true;
2683  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2684 
2685  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2686  int64_t OldCount;
2687 
2688  if (!Sym->isVariable())
2689  return !Error(getLoc(),
2690  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2691  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2692  return !Error(
2693  getLoc(),
2694  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2695 
2696  if (OldCount <= NewMax)
2697  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2698 
2699  return true;
2700 }
2701 
2702 std::unique_ptr<AMDGPUOperand>
2703 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2704  const auto &Tok = getToken();
2705  SMLoc StartLoc = Tok.getLoc();
2706  SMLoc EndLoc = Tok.getEndLoc();
2707  RegisterKind RegKind;
2708  unsigned Reg, RegNum, RegWidth;
2709 
2710  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2711  return nullptr;
2712  }
2713  if (isHsaAbiVersion3Or4(&getSTI())) {
2714  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2715  return nullptr;
2716  } else
2717  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2718  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2719 }
2720 
2722 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2723  // TODO: add syntactic sugar for 1/(2*PI)
2724 
2725  assert(!isRegister());
2726  assert(!isModifier());
2727 
2728  const auto& Tok = getToken();
2729  const auto& NextTok = peekToken();
2730  bool IsReal = Tok.is(AsmToken::Real);
2731  SMLoc S = getLoc();
2732  bool Negate = false;
2733 
2734  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2735  lex();
2736  IsReal = true;
2737  Negate = true;
2738  }
2739 
2740  if (IsReal) {
2741  // Floating-point expressions are not supported.
2742  // Can only allow floating-point literals with an
2743  // optional sign.
2744 
2745  StringRef Num = getTokenStr();
2746  lex();
2747 
2748  APFloat RealVal(APFloat::IEEEdouble());
2749  auto roundMode = APFloat::rmNearestTiesToEven;
2750  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2751  return MatchOperand_ParseFail;
2752  }
2753  if (Negate)
2754  RealVal.changeSign();
2755 
2756  Operands.push_back(
2757  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2758  AMDGPUOperand::ImmTyNone, true));
2759 
2760  return MatchOperand_Success;
2761 
2762  } else {
2763  int64_t IntVal;
2764  const MCExpr *Expr;
2765  SMLoc S = getLoc();
2766 
2767  if (HasSP3AbsModifier) {
2768  // This is a workaround for handling expressions
2769  // as arguments of SP3 'abs' modifier, for example:
2770  // |1.0|
2771  // |-1|
2772  // |1+x|
2773  // This syntax is not compatible with syntax of standard
2774  // MC expressions (due to the trailing '|').
2775  SMLoc EndLoc;
2776  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2777  return MatchOperand_ParseFail;
2778  } else {
2779  if (Parser.parseExpression(Expr))
2780  return MatchOperand_ParseFail;
2781  }
2782 
2783  if (Expr->evaluateAsAbsolute(IntVal)) {
2784  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2785  } else {
2786  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2787  }
2788 
2789  return MatchOperand_Success;
2790  }
2791 
2792  return MatchOperand_NoMatch;
2793 }
2794 
2796 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2797  if (!isRegister())
2798  return MatchOperand_NoMatch;
2799 
2800  if (auto R = parseRegister()) {
2801  assert(R->isReg());
2802  Operands.push_back(std::move(R));
2803  return MatchOperand_Success;
2804  }
2805  return MatchOperand_ParseFail;
2806 }
2807 
2809 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2810  auto res = parseReg(Operands);
2811  if (res != MatchOperand_NoMatch) {
2812  return res;
2813  } else if (isModifier()) {
2814  return MatchOperand_NoMatch;
2815  } else {
2816  return parseImm(Operands, HasSP3AbsMod);
2817  }
2818 }
2819 
2820 bool
2821 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2822  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2823  const auto &str = Token.getString();
2824  return str == "abs" || str == "neg" || str == "sext";
2825  }
2826  return false;
2827 }
2828 
2829 bool
2830 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2831  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2832 }
2833 
2834 bool
2835 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2836  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2837 }
2838 
2839 bool
2840 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2841  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2842 }
2843 
2844 // Check if this is an operand modifier or an opcode modifier
2845 // which may look like an expression but it is not. We should
2846 // avoid parsing these modifiers as expressions. Currently
2847 // recognized sequences are:
2848 // |...|
2849 // abs(...)
2850 // neg(...)
2851 // sext(...)
2852 // -reg
2853 // -|...|
2854 // -abs(...)
2855 // name:...
2856 // Note that simple opcode modifiers like 'gds' may be parsed as
2857 // expressions; this is a special case. See getExpressionAsToken.
2858 //
2859 bool
2860 AMDGPUAsmParser::isModifier() {
2861 
2862  AsmToken Tok = getToken();
2863  AsmToken NextToken[2];
2864  peekTokens(NextToken);
2865 
2866  return isOperandModifier(Tok, NextToken[0]) ||
2867  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2868  isOpcodeModifierWithVal(Tok, NextToken[0]);
2869 }
2870 
2871 // Check if the current token is an SP3 'neg' modifier.
2872 // Currently this modifier is allowed in the following context:
2873 //
2874 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2875 // 2. Before an 'abs' modifier: -abs(...)
2876 // 3. Before an SP3 'abs' modifier: -|...|
2877 //
2878 // In all other cases "-" is handled as a part
2879 // of an expression that follows the sign.
2880 //
2881 // Note: When "-" is followed by an integer literal,
2882 // this is interpreted as integer negation rather
2883 // than a floating-point NEG modifier applied to N.
2884 // Beside being contr-intuitive, such use of floating-point
2885 // NEG modifier would have resulted in different meaning
2886 // of integer literals used with VOP1/2/C and VOP3,
2887 // for example:
2888 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2889 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2890 // Negative fp literals with preceding "-" are
2891 // handled likewise for unifomtity
2892 //
2893 bool
2894 AMDGPUAsmParser::parseSP3NegModifier() {
2895 
2896  AsmToken NextToken[2];
2897  peekTokens(NextToken);
2898 
2899  if (isToken(AsmToken::Minus) &&
2900  (isRegister(NextToken[0], NextToken[1]) ||
2901  NextToken[0].is(AsmToken::Pipe) ||
2902  isId(NextToken[0], "abs"))) {
2903  lex();
2904  return true;
2905  }
2906 
2907  return false;
2908 }
2909 
2911 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2912  bool AllowImm) {
2913  bool Neg, SP3Neg;
2914  bool Abs, SP3Abs;
2915  SMLoc Loc;
2916 
2917  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2918  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2919  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2920  return MatchOperand_ParseFail;
2921  }
2922 
2923  SP3Neg = parseSP3NegModifier();
2924 
2925  Loc = getLoc();
2926  Neg = trySkipId("neg");
2927  if (Neg && SP3Neg) {
2928  Error(Loc, "expected register or immediate");
2929  return MatchOperand_ParseFail;
2930  }
2931  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2932  return MatchOperand_ParseFail;
2933 
2934  Abs = trySkipId("abs");
2935  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2936  return MatchOperand_ParseFail;
2937 
2938  Loc = getLoc();
2939  SP3Abs = trySkipToken(AsmToken::Pipe);
2940  if (Abs && SP3Abs) {
2941  Error(Loc, "expected register or immediate");
2942  return MatchOperand_ParseFail;
2943  }
2944 
2946  if (AllowImm) {
2947  Res = parseRegOrImm(Operands, SP3Abs);
2948  } else {
2949  Res = parseReg(Operands);
2950  }
2951  if (Res != MatchOperand_Success) {
2952  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2953  }
2954 
2955  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2956  return MatchOperand_ParseFail;
2957  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2958  return MatchOperand_ParseFail;
2959  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2960  return MatchOperand_ParseFail;
2961 
2962  AMDGPUOperand::Modifiers Mods;
2963  Mods.Abs = Abs || SP3Abs;
2964  Mods.Neg = Neg || SP3Neg;
2965 
2966  if (Mods.hasFPModifiers()) {
2967  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2968  if (Op.isExpr()) {
2969  Error(Op.getStartLoc(), "expected an absolute expression");
2970  return MatchOperand_ParseFail;
2971  }
2972  Op.setModifiers(Mods);
2973  }
2974  return MatchOperand_Success;
2975 }
2976 
2978 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2979  bool AllowImm) {
2980  bool Sext = trySkipId("sext");
2981  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2982  return MatchOperand_ParseFail;
2983 
2985  if (AllowImm) {
2986  Res = parseRegOrImm(Operands);
2987  } else {
2988  Res = parseReg(Operands);
2989  }
2990  if (Res != MatchOperand_Success) {
2991  return Sext? MatchOperand_ParseFail : Res;
2992  }
2993 
2994  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2995  return MatchOperand_ParseFail;
2996 
2997  AMDGPUOperand::Modifiers Mods;
2998  Mods.Sext = Sext;
2999 
3000  if (Mods.hasIntModifiers()) {
3001  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3002  if (Op.isExpr()) {
3003  Error(Op.getStartLoc(), "expected an absolute expression");
3004  return MatchOperand_ParseFail;
3005  }
3006  Op.setModifiers(Mods);
3007  }
3008 
3009  return MatchOperand_Success;
3010 }
3011 
3013 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3014  return parseRegOrImmWithFPInputMods(Operands, false);
3015 }
3016 
3018 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3019  return parseRegOrImmWithIntInputMods(Operands, false);
3020 }
3021 
3022 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3023  auto Loc = getLoc();
3024  if (trySkipId("off")) {
3025  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3026  AMDGPUOperand::ImmTyOff, false));
3027  return MatchOperand_Success;
3028  }
3029 
3030  if (!isRegister())
3031  return MatchOperand_NoMatch;
3032 
3033  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3034  if (Reg) {
3035  Operands.push_back(std::move(Reg));
3036  return MatchOperand_Success;
3037  }
3038 
3039  return MatchOperand_ParseFail;
3040 
3041 }
3042 
3043 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3044  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3045 
3046  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3047  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3048  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3049  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3050  return Match_InvalidOperand;
3051 
3052  if ((TSFlags & SIInstrFlags::VOP3) &&
3053  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3054  getForcedEncodingSize() != 64)
3055  return Match_PreferE32;
3056 
3057  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3058  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3059  // v_mac_f32/16 allow only dst_sel == DWORD;
3060  auto OpNum =
3061  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3062  const auto &Op = Inst.getOperand(OpNum);
3063  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3064  return Match_InvalidOperand;
3065  }
3066  }
3067 
3068  return Match_Success;
3069 }
3070 
3072  static const unsigned Variants[] = {
3075  };
3076 
3077  return makeArrayRef(Variants);
3078 }
3079 
3080 // What asm variants we should check
3081 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3082  if (getForcedEncodingSize() == 32) {
3083  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3084  return makeArrayRef(Variants);
3085  }
3086 
3087  if (isForcedVOP3()) {
3088  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3089  return makeArrayRef(Variants);
3090  }
3091 
3092  if (isForcedSDWA()) {
3093  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3095  return makeArrayRef(Variants);
3096  }
3097 
3098  if (isForcedDPP()) {
3099  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3100  return makeArrayRef(Variants);
3101  }
3102 
3103  return getAllVariants();
3104 }
3105 
3106 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3107  if (getForcedEncodingSize() == 32)
3108  return "e32";
3109 
3110  if (isForcedVOP3())
3111  return "e64";
3112 
3113  if (isForcedSDWA())
3114  return "sdwa";
3115 
3116  if (isForcedDPP())
3117  return "dpp";
3118 
3119  return "";
3120 }
3121 
3122 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3123  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3124  const unsigned Num = Desc.getNumImplicitUses();
3125  for (unsigned i = 0; i < Num; ++i) {
3126  unsigned Reg = Desc.ImplicitUses[i];
3127  switch (Reg) {
3128  case AMDGPU::FLAT_SCR:
3129  case AMDGPU::VCC:
3130  case AMDGPU::VCC_LO:
3131  case AMDGPU::VCC_HI:
3132  case AMDGPU::M0:
3133  return Reg;
3134  default:
3135  break;
3136  }
3137  }
3138  return AMDGPU::NoRegister;
3139 }
3140 
3141 // NB: This code is correct only when used to check constant
3142 // bus limitations because GFX7 support no f16 inline constants.
3143 // Note that there are no cases when a GFX7 opcode violates
3144 // constant bus limitations due to the use of an f16 constant.
3145 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3146  unsigned OpIdx) const {
3147  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3148 
3149  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3150  return false;
3151  }
3152 
3153  const MCOperand &MO = Inst.getOperand(OpIdx);
3154 
3155  int64_t Val = MO.getImm();
3156  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3157 
3158  switch (OpSize) { // expected operand size
3159  case 8:
3160  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3161  case 4:
3162  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3163  case 2: {
3164  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3168  return AMDGPU::isInlinableIntLiteral(Val);
3169 
3174 
3178  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3179 
3180  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3181  }
3182  default:
3183  llvm_unreachable("invalid operand size");
3184  }
3185 }
3186 
3187 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3188  if (!isGFX10Plus())
3189  return 1;
3190 
3191  switch (Opcode) {
3192  // 64-bit shift instructions can use only one scalar value input
3193  case AMDGPU::V_LSHLREV_B64_e64:
3194  case AMDGPU::V_LSHLREV_B64_gfx10:
3195  case AMDGPU::V_LSHRREV_B64_e64:
3196  case AMDGPU::V_LSHRREV_B64_gfx10:
3197  case AMDGPU::V_ASHRREV_I64_e64:
3198  case AMDGPU::V_ASHRREV_I64_gfx10:
3199  case AMDGPU::V_LSHL_B64_e64:
3200  case AMDGPU::V_LSHR_B64_e64:
3201  case AMDGPU::V_ASHR_I64_e64:
3202  return 1;
3203  default:
3204  return 2;
3205  }
3206 }
3207 
3208 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3209  const MCOperand &MO = Inst.getOperand(OpIdx);
3210  if (MO.isImm()) {
3211  return !isInlineConstant(Inst, OpIdx);
3212  } else if (MO.isReg()) {
3213  auto Reg = MO.getReg();
3214  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3215  auto PReg = mc2PseudoReg(Reg);
3216  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3217  } else {
3218  return true;
3219  }
3220 }
3221 
3222 bool
3223 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3224  const OperandVector &Operands) {
3225  const unsigned Opcode = Inst.getOpcode();
3226  const MCInstrDesc &Desc = MII.get(Opcode);
3227  unsigned LastSGPR = AMDGPU::NoRegister;
3228  unsigned ConstantBusUseCount = 0;
3229  unsigned NumLiterals = 0;
3230  unsigned LiteralSize;
3231 
3232  if (Desc.TSFlags &
3236  SIInstrFlags::SDWA)) {
3237  // Check special imm operands (used by madmk, etc)
3238  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3239  ++ConstantBusUseCount;
3240  }
3241 
3242  SmallDenseSet<unsigned> SGPRsUsed;
3243  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3244  if (SGPRUsed != AMDGPU::NoRegister) {
3245  SGPRsUsed.insert(SGPRUsed);
3246  ++ConstantBusUseCount;
3247  }
3248 
3249  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3250  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3251  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3252 
3253  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3254 
3255  for (int OpIdx : OpIndices) {
3256  if (OpIdx == -1) break;
3257 
3258  const MCOperand &MO = Inst.getOperand(OpIdx);
3259  if (usesConstantBus(Inst, OpIdx)) {
3260  if (MO.isReg()) {
3261  LastSGPR = mc2PseudoReg(MO.getReg());
3262  // Pairs of registers with a partial intersections like these
3263  // s0, s[0:1]
3264  // flat_scratch_lo, flat_scratch
3265  // flat_scratch_lo, flat_scratch_hi
3266  // are theoretically valid but they are disabled anyway.
3267  // Note that this code mimics SIInstrInfo::verifyInstruction
3268  if (!SGPRsUsed.count(LastSGPR)) {
3269  SGPRsUsed.insert(LastSGPR);
3270  ++ConstantBusUseCount;
3271  }
3272  } else { // Expression or a literal
3273 
3274  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3275  continue; // special operand like VINTERP attr_chan
3276 
3277  // An instruction may use only one literal.
3278  // This has been validated on the previous step.
3279  // See validateVOP3Literal.
3280  // This literal may be used as more than one operand.
3281  // If all these operands are of the same size,
3282  // this literal counts as one scalar value.
3283  // Otherwise it counts as 2 scalar values.
3284  // See "GFX10 Shader Programming", section 3.6.2.3.
3285 
3286  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3287  if (Size < 4) Size = 4;
3288 
3289  if (NumLiterals == 0) {
3290  NumLiterals = 1;
3291  LiteralSize = Size;
3292  } else if (LiteralSize != Size) {
3293  NumLiterals = 2;
3294  }
3295  }
3296  }
3297  }
3298  }
3299  ConstantBusUseCount += NumLiterals;
3300 
3301  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3302  return true;
3303 
3304  SMLoc LitLoc = getLitLoc(Operands);
3305  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3306  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3307  Error(Loc, "invalid operand (violates constant bus restrictions)");
3308  return false;
3309 }
3310 
3311 bool
3312 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3313  const OperandVector &Operands) {
3314  const unsigned Opcode = Inst.getOpcode();
3315  const MCInstrDesc &Desc = MII.get(Opcode);
3316 
3317  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3318  if (DstIdx == -1 ||
3319  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3320  return true;
3321  }
3322 
3323  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3324 
3325  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3326  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3327  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3328 
3329  assert(DstIdx != -1);
3330  const MCOperand &Dst = Inst.getOperand(DstIdx);
3331  assert(Dst.isReg());
3332  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3333 
3334  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3335 
3336  for (int SrcIdx : SrcIndices) {
3337  if (SrcIdx == -1) break;
3338  const MCOperand &Src = Inst.getOperand(SrcIdx);
3339  if (Src.isReg()) {
3340  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3341  if (isRegIntersect(DstReg, SrcReg, TRI)) {
3342  Error(getRegLoc(SrcReg, Operands),
3343  "destination must be different than all sources");
3344  return false;
3345  }
3346  }
3347  }
3348 
3349  return true;
3350 }
3351 
3352 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3353 
3354  const unsigned Opc = Inst.getOpcode();
3355  const MCInstrDesc &Desc = MII.get(Opc);
3356 
3357  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3358  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3359  assert(ClampIdx != -1);
3360  return Inst.getOperand(ClampIdx).getImm() == 0;
3361  }
3362 
3363  return true;
3364 }
3365 
3366 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3367 
3368  const unsigned Opc = Inst.getOpcode();
3369  const MCInstrDesc &Desc = MII.get(Opc);
3370 
3371  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3372  return true;
3373 
3374  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3375  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3376  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3377 
3378  assert(VDataIdx != -1);
3379 
3380  if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3381  return true;
3382 
3383  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3384  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3385  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3386  if (DMask == 0)
3387  DMask = 1;
3388 
3389  unsigned DataSize =
3390  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3391  if (hasPackedD16()) {
3392  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3393  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3394  DataSize = (DataSize + 1) / 2;
3395  }
3396 
3397  return (VDataSize / 4) == DataSize + TFESize;
3398 }
3399 
3400 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3401  const unsigned Opc = Inst.getOpcode();
3402  const MCInstrDesc &Desc = MII.get(Opc);
3403 
3404  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3405  return true;
3406 
3408 
3409  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3410  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3411  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3412  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3413  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3414 
3415  assert(VAddr0Idx != -1);
3416  assert(SrsrcIdx != -1);
3417  assert(SrsrcIdx > VAddr0Idx);
3418 
3419  if (DimIdx == -1)
3420  return true; // intersect_ray
3421 
3422  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3424  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3425  unsigned VAddrSize =
3426  IsNSA ? SrsrcIdx - VAddr0Idx
3427  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3428 
3429  unsigned AddrSize = BaseOpcode->NumExtraArgs +
3430  (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3431  (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3432  (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3433  if (!IsNSA) {
3434  if (AddrSize > 8)
3435  AddrSize = 16;
3436  else if (AddrSize > 4)
3437  AddrSize = 8;
3438  }
3439 
3440  return VAddrSize == AddrSize;
3441 }
3442 
3443 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3444 
3445  const unsigned Opc = Inst.getOpcode();
3446  const MCInstrDesc &Desc = MII.get(Opc);
3447 
3448  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3449  return true;
3450  if (!Desc.mayLoad() || !Desc.mayStore())
3451  return true; // Not atomic
3452 
3453  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3454  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3455 
3456  // This is an incomplete check because image_atomic_cmpswap
3457  // may only use 0x3 and 0xf while other atomic operations
3458  // may use 0x1 and 0x3. However these limitations are
3459  // verified when we check that dmask matches dst size.
3460  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3461 }
3462 
3463 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3464 
3465  const unsigned Opc = Inst.getOpcode();
3466  const MCInstrDesc &Desc = MII.get(Opc);
3467 
3468  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3469  return true;
3470 
3471  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3472  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3473 
3474  // GATHER4 instructions use dmask in a different fashion compared to
3475  // other MIMG instructions. The only useful DMASK values are
3476  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3477  // (red,red,red,red) etc.) The ISA document doesn't mention
3478  // this.
3479  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3480 }
3481 
3482 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3483  const unsigned Opc = Inst.getOpcode();
3484  const MCInstrDesc &Desc = MII.get(Opc);
3485 
3486  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3487  return true;
3488 
3490  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3491  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3492 
3493  if (!BaseOpcode->MSAA)
3494  return true;
3495 
3496  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3497  assert(DimIdx != -1);
3498 
3499  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3501 
3502  return DimInfo->MSAA;
3503 }
3504 
3505 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3506 {
3507  switch (Opcode) {
3508  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3509  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3510  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3511  return true;
3512  default:
3513  return false;
3514  }
3515 }
3516 
3517 // movrels* opcodes should only allow VGPRS as src0.
3518 // This is specified in .td description for vop1/vop3,
3519 // but sdwa is handled differently. See isSDWAOperand.
3520 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3521  const OperandVector &Operands) {
3522 
3523  const unsigned Opc = Inst.getOpcode();
3524  const MCInstrDesc &Desc = MII.get(Opc);
3525 
3526  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3527  return true;
3528 
3529  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3530  assert(Src0Idx != -1);
3531 
3532  SMLoc ErrLoc;
3533  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3534  if (Src0.isReg()) {
3535  auto Reg = mc2PseudoReg(Src0.getReg());
3536  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3537  if (!isSGPR(Reg, TRI))
3538  return true;
3539  ErrLoc = getRegLoc(Reg, Operands);
3540  } else {
3541  ErrLoc = getConstLoc(Operands);
3542  }
3543 
3544  Error(ErrLoc, "source operand must be a VGPR");
3545  return false;
3546 }
3547 
3548 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3549  const OperandVector &Operands) {
3550 
3551  const unsigned Opc = Inst.getOpcode();
3552 
3553  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3554  return true;
3555 
3556  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3557  assert(Src0Idx != -1);
3558 
3559  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3560  if (!Src0.isReg())
3561  return true;
3562 
3563  auto Reg = mc2PseudoReg(Src0.getReg());
3564  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3565  if (isSGPR(Reg, TRI)) {
3566  Error(getRegLoc(Reg, Operands),
3567  "source operand must be either a VGPR or an inline constant");
3568  return false;
3569  }
3570 
3571  return true;
3572 }
3573 
3574 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3575  switch (Inst.getOpcode()) {
3576  default:
3577  return true;
3578  case V_DIV_SCALE_F32_gfx6_gfx7:
3579  case V_DIV_SCALE_F32_vi:
3580  case V_DIV_SCALE_F32_gfx10:
3581  case V_DIV_SCALE_F64_gfx6_gfx7:
3582  case V_DIV_SCALE_F64_vi:
3583  case V_DIV_SCALE_F64_gfx10:
3584  break;
3585  }
3586 
3587  // TODO: Check that src0 = src1 or src2.
3588 
3589  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3590  AMDGPU::OpName::src2_modifiers,
3591  AMDGPU::OpName::src2_modifiers}) {
3593  .getImm() &
3594  SISrcMods::ABS) {
3595  return false;
3596  }
3597  }
3598 
3599  return true;
3600 }
3601 
3602 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3603 
3604  const unsigned Opc = Inst.getOpcode();
3605  const MCInstrDesc &Desc = MII.get(Opc);
3606 
3607  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3608  return true;
3609 
3610  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3611  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3612  if (isCI() || isSI())
3613  return false;
3614  }
3615 
3616  return true;
3617 }
3618 
3619 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3620  const unsigned Opc = Inst.getOpcode();
3621  const MCInstrDesc &Desc = MII.get(Opc);
3622 
3623  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3624  return true;
3625 
3626  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3627  if (DimIdx < 0)
3628  return true;
3629 
3630  long Imm = Inst.getOperand(DimIdx).getImm();
3631  if (Imm < 0 || Imm >= 8)
3632  return false;
3633 
3634  return true;
3635 }
3636 
3637 static bool IsRevOpcode(const unsigned Opcode)
3638 {
3639  switch (Opcode) {
3640  case AMDGPU::V_SUBREV_F32_e32:
3641  case AMDGPU::V_SUBREV_F32_e64:
3642  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3643  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3644  case AMDGPU::V_SUBREV_F32_e32_vi:
3645  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3646  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3647  case AMDGPU::V_SUBREV_F32_e64_vi:
3648 
3649  case AMDGPU::V_SUBREV_CO_U32_e32:
3650  case AMDGPU::V_SUBREV_CO_U32_e64:
3651  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3652  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3653 
3654  case AMDGPU::V_SUBBREV_U32_e32:
3655  case AMDGPU::V_SUBBREV_U32_e64:
3656  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3657  case AMDGPU::V_SUBBREV_U32_e32_vi:
3658  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3659  case AMDGPU::V_SUBBREV_U32_e64_vi:
3660 
3661  case AMDGPU::V_SUBREV_U32_e32:
3662  case AMDGPU::V_SUBREV_U32_e64:
3663  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3664  case AMDGPU::V_SUBREV_U32_e32_vi:
3665  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3666  case AMDGPU::V_SUBREV_U32_e64_vi:
3667 
3668  case AMDGPU::V_SUBREV_F16_e32:
3669  case AMDGPU::V_SUBREV_F16_e64:
3670  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3671  case AMDGPU::V_SUBREV_F16_e32_vi:
3672  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3673  case AMDGPU::V_SUBREV_F16_e64_vi:
3674 
3675  case AMDGPU::V_SUBREV_U16_e32:
3676  case AMDGPU::V_SUBREV_U16_e64:
3677  case AMDGPU::V_SUBREV_U16_e32_vi:
3678  case AMDGPU::V_SUBREV_U16_e64_vi:
3679 
3680  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3681  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3682  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3683 
3684  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3685  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3686 
3687  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3688  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3689 
3690  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3691  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3692 
3693  case AMDGPU::V_LSHRREV_B32_e32:
3694  case AMDGPU::V_LSHRREV_B32_e64:
3695  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3696  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3697  case AMDGPU::V_LSHRREV_B32_e32_vi:
3698  case AMDGPU::V_LSHRREV_B32_e64_vi:
3699  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3700  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3701 
3702  case AMDGPU::V_ASHRREV_I32_e32:
3703  case AMDGPU::V_ASHRREV_I32_e64:
3704  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3705  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3706  case AMDGPU::V_ASHRREV_I32_e32_vi:
3707  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3708  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3709  case AMDGPU::V_ASHRREV_I32_e64_vi:
3710 
3711  case AMDGPU::V_LSHLREV_B32_e32:
3712  case AMDGPU::V_LSHLREV_B32_e64:
3713  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3714  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3715  case AMDGPU::V_LSHLREV_B32_e32_vi:
3716  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3717  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3718  case AMDGPU::V_LSHLREV_B32_e64_vi:
3719 
3720  case AMDGPU::V_LSHLREV_B16_e32:
3721  case AMDGPU::V_LSHLREV_B16_e64:
3722  case AMDGPU::V_LSHLREV_B16_e32_vi:
3723  case AMDGPU::V_LSHLREV_B16_e64_vi:
3724  case AMDGPU::V_LSHLREV_B16_gfx10:
3725 
3726  case AMDGPU::V_LSHRREV_B16_e32:
3727  case AMDGPU::V_LSHRREV_B16_e64:
3728  case AMDGPU::V_LSHRREV_B16_e32_vi:
3729  case AMDGPU::V_LSHRREV_B16_e64_vi:
3730  case AMDGPU::V_LSHRREV_B16_gfx10:
3731 
3732  case AMDGPU::V_ASHRREV_I16_e32:
3733  case AMDGPU::V_ASHRREV_I16_e64:
3734  case AMDGPU::V_ASHRREV_I16_e32_vi:
3735  case AMDGPU::V_ASHRREV_I16_e64_vi:
3736  case AMDGPU::V_ASHRREV_I16_gfx10:
3737 
3738  case AMDGPU::V_LSHLREV_B64_e64:
3739  case AMDGPU::V_LSHLREV_B64_gfx10:
3740  case AMDGPU::V_LSHLREV_B64_vi:
3741 
3742  case AMDGPU::V_LSHRREV_B64_e64:
3743  case AMDGPU::V_LSHRREV_B64_gfx10:
3744  case AMDGPU::V_LSHRREV_B64_vi:
3745 
3746  case AMDGPU::V_ASHRREV_I64_e64:
3747  case AMDGPU::V_ASHRREV_I64_gfx10:
3748  case AMDGPU::V_ASHRREV_I64_vi:
3749 
3750  case AMDGPU::V_PK_LSHLREV_B16:
3751  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3752  case AMDGPU::V_PK_LSHLREV_B16_vi:
3753 
3754  case AMDGPU::V_PK_LSHRREV_B16:
3755  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3756  case AMDGPU::V_PK_LSHRREV_B16_vi:
3757  case AMDGPU::V_PK_ASHRREV_I16:
3758  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3759  case AMDGPU::V_PK_ASHRREV_I16_vi:
3760  return true;
3761  default:
3762  return false;
3763  }
3764 }
3765 
3766 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3767 
3768  using namespace SIInstrFlags;
3769  const unsigned Opcode = Inst.getOpcode();
3770  const MCInstrDesc &Desc = MII.get(Opcode);
3771 
3772  // lds_direct register is defined so that it can be used
3773  // with 9-bit operands only. Ignore encodings which do not accept these.
3774  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3775  if ((Desc.TSFlags & Enc) == 0)
3776  return None;
3777 
3778  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3779  auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3780  if (SrcIdx == -1)
3781  break;
3782  const auto &Src = Inst.getOperand(SrcIdx);
3783  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3784 
3785  if (isGFX90A())
3786  return StringRef("lds_direct is not supported on this GPU");
3787 
3788  if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3789  return StringRef("lds_direct cannot be used with this instruction");
3790 
3791  if (SrcName != OpName::src0)
3792  return StringRef("lds_direct may be used as src0 only");
3793  }
3794  }
3795 
3796  return None;
3797 }
3798 
3799 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3800  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3801  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3802  if (Op.isFlatOffset())
3803  return Op.getStartLoc();
3804  }
3805  return getLoc();
3806 }
3807 
3808 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3809  const OperandVector &Operands) {
3810  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3811  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3812  return true;
3813 
3814  auto Opcode = Inst.getOpcode();
3815  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3816  assert(OpNum != -1);
3817 
3818  const auto &Op = Inst.getOperand(OpNum);
3819  if (!hasFlatOffsets() && Op.getImm() != 0) {
3820  Error(getFlatOffsetLoc(Operands),
3821  "flat offset modifier is not supported on this GPU");
3822  return false;
3823  }
3824 
3825  // For FLAT segment the offset must be positive;
3826  // MSB is ignored and forced to zero.
3828  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3829  if (!isIntN(OffsetSize, Op.getImm())) {
3830  Error(getFlatOffsetLoc(Operands),
3831  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3832  return false;
3833  }
3834  } else {
3835  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3836  if (!isUIntN(OffsetSize, Op.getImm())) {
3837  Error(getFlatOffsetLoc(Operands),
3838  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3839  return false;
3840  }
3841  }
3842 
3843  return true;
3844 }
3845 
3846 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3847  // Start with second operand because SMEM Offset cannot be dst or src0.
3848  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3849  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3850  if (Op.isSMEMOffset())
3851  return Op.getStartLoc();
3852  }
3853  return getLoc();
3854 }
3855 
3856 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3857  const OperandVector &Operands) {
3858  if (isCI() || isSI())
3859  return true;
3860 
3861  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3862  if ((TSFlags & SIInstrFlags::SMRD) == 0)
3863  return true;
3864 
3865  auto Opcode = Inst.getOpcode();
3866  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3867  if (OpNum == -1)
3868  return true;
3869 
3870  const auto &Op = Inst.getOperand(OpNum);
3871  if (!Op.isImm())
3872  return true;
3873 
3874  uint64_t Offset = Op.getImm();
3875  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3877  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3878  return true;
3879 
3880  Error(getSMEMOffsetLoc(Operands),
3881  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3882  "expected a 21-bit signed offset");
3883 
3884  return false;
3885 }
3886 
3887 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3888  unsigned Opcode = Inst.getOpcode();
3889  const MCInstrDesc &Desc = MII.get(Opcode);
3890  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3891  return true;
3892 
3893  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3894  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3895 
3896  const int OpIndices[] = { Src0Idx, Src1Idx };
3897 
3898  unsigned NumExprs = 0;
3899  unsigned NumLiterals = 0;
3900  uint32_t LiteralValue;
3901 
3902  for (int OpIdx : OpIndices) {
3903  if (OpIdx == -1) break;
3904 
3905  const MCOperand &MO = Inst.getOperand(OpIdx);
3906  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3907  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3908  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3909  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3910  if (NumLiterals == 0 || LiteralValue != Value) {
3911  LiteralValue = Value;
3912  ++NumLiterals;
3913  }
3914  } else if (MO.isExpr()) {
3915  ++NumExprs;
3916  }
3917  }
3918  }
3919 
3920  return NumLiterals + NumExprs <= 1;
3921 }
3922 
3923 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3924  const unsigned Opc = Inst.getOpcode();
3925  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3926  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3927  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3928  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3929 
3930  if (OpSel & ~3)
3931  return false;
3932  }
3933  return true;
3934 }
3935 
3936 // Check if VCC register matches wavefront size
3937 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3938  auto FB = getFeatureBits();
3939  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3940  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3941 }
3942 
3943 // VOP3 literal is only allowed in GFX10+ and only one can be used
3944 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3945  const OperandVector &Operands) {
3946  unsigned Opcode = Inst.getOpcode();
3947  const MCInstrDesc &Desc = MII.get(Opcode);
3949  return true;
3950 
3951  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3952  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3953  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3954 
3955  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3956 
3957  unsigned NumExprs = 0;
3958  unsigned NumLiterals = 0;
3959  uint32_t LiteralValue;
3960 
3961  for (int OpIdx : OpIndices) {
3962  if (OpIdx == -1) break;
3963 
3964  const MCOperand &MO = Inst.getOperand(OpIdx);
3965  if (!MO.isImm() && !MO.isExpr())
3966  continue;
3967  if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3968  continue;
3969 
3970  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3971  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3972  Error(getConstLoc(Operands),
3973  "inline constants are not allowed for this operand");
3974  return false;
3975  }
3976 
3977  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3978  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3979  if (NumLiterals == 0 || LiteralValue != Value) {
3980  LiteralValue = Value;
3981  ++NumLiterals;
3982  }
3983  } else if (MO.isExpr()) {
3984  ++NumExprs;
3985  }
3986  }
3987  NumLiterals += NumExprs;
3988 
3989  if (!NumLiterals)
3990  return true;
3991 
3992  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3993  Error(getLitLoc(Operands), "literal operands are not supported");
3994  return false;
3995  }
3996 
3997  if (NumLiterals > 1) {
3998  Error(getLitLoc(Operands), "only one literal operand is allowed");
3999  return false;
4000  }
4001 
4002  return true;
4003 }
4004 
4005 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4006 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4007  const MCRegisterInfo *MRI) {
4008  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4009  if (OpIdx < 0)
4010  return -1;
4011 
4012  const MCOperand &Op = Inst.getOperand(OpIdx);
4013  if (!Op.isReg())
4014  return -1;
4015 
4016  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4017  auto Reg = Sub ? Sub : Op.getReg();
4018  const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4019  return AGRP32.contains(Reg) ? 1 : 0;
4020 }
4021 
4022 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4023  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4024  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4026  SIInstrFlags::DS)) == 0)
4027  return true;
4028 
4029  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4030  : AMDGPU::OpName::vdata;
4031 
4032  const MCRegisterInfo *MRI = getMRI();
4033  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4034  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4035 
4036  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4037  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4038  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4039  return false;
4040  }
4041 
4042  auto FB = getFeatureBits();
4043  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4044  if (DataAreg < 0 || DstAreg < 0)
4045  return true;
4046  return DstAreg == DataAreg;
4047  }
4048 
4049  return DstAreg < 1 && DataAreg < 1;
4050 }
4051 
4052 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4053  auto FB = getFeatureBits();
4054  if (!FB[AMDGPU::FeatureGFX90AInsts])
4055  return true;
4056 
4057  const MCRegisterInfo *MRI = getMRI();
4058  const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4059  const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4060  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4061  const MCOperand &Op = Inst.getOperand(I);
4062  if (!Op.isReg())
4063  continue;
4064 
4065  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4066  if (!Sub)
4067  continue;
4068 
4069  if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4070  return false;
4071  if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4072  return false;
4073  }
4074 
4075  return true;
4076 }
4077 
4078 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4079  const OperandVector &Operands,
4080  const SMLoc &IDLoc) {
4081  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4082  AMDGPU::OpName::cpol);
4083  if (CPolPos == -1)
4084  return true;
4085 
4086  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4087 
4088  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4089  if ((TSFlags & (SIInstrFlags::SMRD)) &&
4091  Error(IDLoc, "invalid cache policy for SMRD instruction");
4092  return false;
4093  }
4094 
4095  if (isGFX90A() && (CPol & CPol::SCC)) {
4096  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4097  StringRef CStr(S.getPointer());
4098  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4099  Error(S, "scc is not supported on this GPU");
4100  return false;
4101  }
4102 
4104  return true;
4105 
4106  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4107  if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4108  Error(IDLoc, "instruction must use glc");
4109  return false;
4110  }
4111  } else {
4112  if (CPol & CPol::GLC) {
4113  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4114  StringRef CStr(S.getPointer());
4115  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4116  Error(S, "instruction must not use glc");
4117  return false;
4118  }
4119  }
4120 
4121  return true;
4122 }
4123 
4124 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4125  const SMLoc &IDLoc,
4126  const OperandVector &Operands) {
4127  if (auto ErrMsg = validateLdsDirect(Inst)) {
4128  Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4129  return false;
4130  }
4131  if (!validateSOPLiteral(Inst)) {
4132  Error(getLitLoc(Operands),
4133  "only one literal operand is allowed");
4134  return false;
4135  }
4136  if (!validateVOP3Literal(Inst, Operands)) {
4137  return false;
4138  }
4139  if (!validateConstantBusLimitations(Inst, Operands)) {
4140  return false;
4141  }
4142  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4143  return false;
4144  }
4145  if (!validateIntClampSupported(Inst)) {
4146  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4147  "integer clamping is not supported on this GPU");
4148  return false;
4149  }
4150  if (!validateOpSel(Inst)) {
4151  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4152  "invalid op_sel operand");
4153  return false;
4154  }
4155  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4156  if (!validateMIMGD16(Inst)) {
4157  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4158  "d16 modifier is not supported on this GPU");
4159  return false;
4160  }
4161  if (!validateMIMGDim(Inst)) {
4162  Error(IDLoc, "dim modifier is required on this GPU");
4163  return false;
4164  }
4165  if (!validateMIMGMSAA(Inst)) {
4166  Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4167  "invalid dim; must be MSAA type");
4168  return false;
4169  }
4170  if (!validateMIMGDataSize(Inst)) {
4171  Error(IDLoc,
4172  "image data size does not match dmask and tfe");
4173  return false;
4174  }
4175  if (!validateMIMGAddrSize(Inst)) {
4176  Error(IDLoc,
4177  "image address size does not match dim and a16");
4178  return false;
4179  }
4180  if (!validateMIMGAtomicDMask(Inst)) {
4181  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4182  "invalid atomic image dmask");
4183  return false;
4184  }
4185  if (!validateMIMGGatherDMask(Inst)) {
4186  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4187  "invalid image_gather dmask: only one bit must be set");
4188  return false;
4189  }
4190  if (!validateMovrels(Inst, Operands)) {
4191  return false;
4192  }
4193  if (!validateFlatOffset(Inst, Operands)) {
4194  return false;
4195  }
4196  if (!validateSMEMOffset(Inst, Operands)) {
4197  return false;
4198  }
4199  if (!validateMAIAccWrite(Inst, Operands)) {
4200  return false;
4201  }
4202  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4203  return false;
4204  }
4205 
4206  if (!validateAGPRLdSt(Inst)) {
4207  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4208  ? "invalid register class: data and dst should be all VGPR or AGPR"
4209  : "invalid register class: agpr loads and stores not supported on this GPU"
4210  );
4211  return false;
4212  }
4213  if (!validateVGPRAlign(Inst)) {
4214  Error(IDLoc,
4215  "invalid register class: vgpr tuples must be 64 bit aligned");
4216  return false;
4217  }
4218 
4219  if (!validateDivScale(Inst)) {
4220  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4221  return false;
4222  }
4223  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4224  return false;
4225  }
4226 
4227  return true;
4228 }
4229 
4230 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4231  const FeatureBitset &FBS,
4232  unsigned VariantID = 0);
4233 
4234 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4235  const FeatureBitset &AvailableFeatures,
4236  unsigned VariantID);
4237 
4238 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4239  const FeatureBitset &FBS) {
4240  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4241 }
4242 
4243 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4244  const FeatureBitset &FBS,
4245  ArrayRef<unsigned> Variants) {
4246  for (auto Variant : Variants) {
4247  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4248  return true;
4249  }
4250 
4251  return false;
4252 }
4253 
4254 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4255  const SMLoc &IDLoc) {
4256  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4257 
4258  // Check if requested instruction variant is supported.
4259  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4260  return false;
4261 
4262  // This instruction is not supported.
4263  // Clear any other pending errors because they are no longer relevant.
4264  getParser().clearPendingErrors();
4265 
4266  // Requested instruction variant is not supported.
4267  // Check if any other variants are supported.
4268  StringRef VariantName = getMatchedVariantName();
4269  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4270  return Error(IDLoc,
4271  Twine(VariantName,
4272  " variant of this instruction is not supported"));
4273  }
4274 
4275  // Finally check if this instruction is supported on any other GPU.
4276  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4277  return Error(IDLoc, "instruction not supported on this GPU");
4278  }
4279 
4280  // Instruction not supported on any GPU. Probably a typo.
4281  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4282  return Error(IDLoc, "invalid instruction" + Suggestion);
4283 }
4284 
4285 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4287  MCStreamer &Out,
4288  uint64_t &ErrorInfo,
4289  bool MatchingInlineAsm) {
4290  MCInst Inst;
4291  unsigned Result = Match_Success;
4292  for (auto Variant : getMatchedVariants()) {
4293  uint64_t EI;
4294  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4295  Variant);
4296  // We order match statuses from least to most specific. We use most specific
4297  // status as resulting
4298  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4299  if ((R == Match_Success) ||
4300  (R == Match_PreferE32) ||
4301  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4302  (R == Match_InvalidOperand && Result != Match_MissingFeature
4303  && Result != Match_PreferE32) ||
4304  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4305  && Result != Match_MissingFeature
4306  && Result != Match_PreferE32)) {
4307  Result = R;
4308  ErrorInfo = EI;
4309  }
4310  if (R == Match_Success)
4311  break;
4312  }
4313 
4314  if (Result == Match_Success) {
4315  if (!validateInstruction(Inst, IDLoc, Operands)) {
4316  return true;
4317  }
4318  Inst.setLoc(IDLoc);
4319  Out.emitInstruction(Inst, getSTI());
4320  return false;
4321  }
4322 
4323  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4324  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4325  return true;
4326  }
4327 
4328  switch (Result) {
4329  default: break;
4330  case Match_MissingFeature:
4331  // It has been verified that the specified instruction
4332  // mnemonic is valid. A match was found but it requires
4333  // features which are not supported on this GPU.
4334  return Error(IDLoc, "operands are not valid for this GPU or mode");
4335 
4336  case Match_InvalidOperand: {
4337  SMLoc ErrorLoc = IDLoc;
4338  if (ErrorInfo != ~0ULL) {
4339  if (ErrorInfo >= Operands.size()) {
4340  return Error(IDLoc, "too few operands for instruction");
4341  }
4342  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4343  if (ErrorLoc == SMLoc())
4344  ErrorLoc = IDLoc;
4345  }
4346  return Error(ErrorLoc, "invalid operand for instruction");
4347  }
4348 
4349  case Match_PreferE32:
4350  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4351  "should be encoded as e32");
4352  case Match_MnemonicFail:
4353  llvm_unreachable("Invalid instructions should have been handled already");
4354  }
4355  llvm_unreachable("Implement any new match types added!");
4356 }
4357 
4358 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4359  int64_t Tmp = -1;
4360  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4361  return true;
4362  }
4363  if (getParser().parseAbsoluteExpression(Tmp)) {
4364  return true;
4365  }
4366  Ret = static_cast<uint32_t>(Tmp);
4367  return false;
4368 }
4369 
4370 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4371  uint32_t &Minor) {
4372  if (ParseAsAbsoluteExpression(Major))
4373  return TokError("invalid major version");
4374 
4375  if (!trySkipToken(AsmToken::Comma))
4376  return TokError("minor version number required, comma expected");
4377 
4378  if (ParseAsAbsoluteExpression(Minor))
4379  return TokError("invalid minor version");
4380 
4381  return false;
4382 }
4383 
4384 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4385  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4386  return TokError("directive only supported for amdgcn architecture");
4387 
4388  std::string TargetIDDirective;
4389  SMLoc TargetStart = getTok().getLoc();
4390  if (getParser().parseEscapedString(TargetIDDirective))
4391  return true;
4392 
4393  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4394  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4395  return getParser().Error(TargetRange.Start,
4396  (Twine(".amdgcn_target directive's target id ") +
4397  Twine(TargetIDDirective) +
4398  Twine(" does not match the specified target id ") +
4399  Twine(getTargetStreamer().getTargetID()->toString())).str());
4400 
4401  return false;
4402 }
4403 
4404 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4405  return Error(Range.Start, "value out of range", Range);
4406 }
4407 
4408 bool AMDGPUAsmParser::calculateGPRBlocks(
4409  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4410  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4411  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4412  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4413  // TODO(scott.linder): These calculations are duplicated from
4414  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4415  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4416 
4417  unsigned NumVGPRs = NextFreeVGPR;
4418  unsigned NumSGPRs = NextFreeSGPR;
4419 
4420  if (Version.Major >= 10)
4421  NumSGPRs = 0;
4422  else {
4423  unsigned MaxAddressableNumSGPRs =
4425 
4426  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4427  NumSGPRs > MaxAddressableNumSGPRs)
4428  return OutOfRangeError(SGPRRange);
4429 
4430  NumSGPRs +=
4431  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4432 
4433  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4434  NumSGPRs > MaxAddressableNumSGPRs)
4435  return OutOfRangeError(SGPRRange);
4436 
4437  if (Features.test(FeatureSGPRInitBug))
4439  }
4440 
4441  VGPRBlocks =
4442  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4443  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4444 
4445  return false;
4446 }
4447 
4448 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4449  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4450  return TokError("directive only supported for amdgcn architecture");
4451 
4452  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4453  return TokError("directive only supported for amdhsa OS");
4454 
4455  StringRef KernelName;
4456  if (getParser().parseIdentifier(KernelName))
4457  return true;
4458 
4460 
4461  StringSet<> Seen;
4462 
4463  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4464 
4465  SMRange VGPRRange;
4466  uint64_t NextFreeVGPR = 0;
4467  uint64_t AccumOffset = 0;
4468  SMRange SGPRRange;
4469  uint64_t NextFreeSGPR = 0;
4470  unsigned UserSGPRCount = 0;
4471  bool ReserveVCC = true;
4472  bool ReserveFlatScr = true;
4473  Optional<bool> EnableWavefrontSize32;
4474 
4475  while (true) {
4476  while (trySkipToken(AsmToken::EndOfStatement));
4477 
4478  StringRef ID;
4479  SMRange IDRange = getTok().getLocRange();
4480  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4481  return true;
4482 
4483  if (ID == ".end_amdhsa_kernel")
4484  break;
4485 
4486  if (Seen.find(ID) != Seen.end())
4487  return TokError(".amdhsa_ directives cannot be repeated");
4488  Seen.insert(ID);
4489 
4490  SMLoc ValStart = getLoc();
4491  int64_t IVal;
4492  if (getParser().parseAbsoluteExpression(IVal))
4493  return true;
4494  SMLoc ValEnd = getLoc();
4495  SMRange ValRange = SMRange(ValStart, ValEnd);
4496 
4497  if (IVal < 0)
4498  return OutOfRangeError(ValRange);
4499 
4500  uint64_t Val = IVal;
4501 
4502 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4503  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4504  return OutOfRangeError(RANGE); \
4505  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4506 
4507  if (ID == ".amdhsa_group_segment_fixed_size") {
4508  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4509  return OutOfRangeError(ValRange);
4510  KD.group_segment_fixed_size = Val;
4511  } else if (ID == ".amdhsa_private_segment_fixed_size") {
4512  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4513  return OutOfRangeError(ValRange);
4514  KD.private_segment_fixed_size = Val;
4515  } else if (ID == ".amdhsa_kernarg_size") {
4516  if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4517  return OutOfRangeError(ValRange);
4518  KD.kernarg_size = Val;
4519  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4521  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4522  Val, ValRange);
4523  if (Val)
4524  UserSGPRCount += 4;
4525  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4527  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4528  ValRange);
4529  if (Val)
4530  UserSGPRCount += 2;
4531  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4533  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4534  ValRange);
4535  if (Val)
4536  UserSGPRCount += 2;
4537  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4539  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4540  Val, ValRange);
4541  if (Val)
4542  UserSGPRCount += 2;
4543  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4545  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4546  ValRange);
4547  if (Val)
4548  UserSGPRCount += 2;
4549  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4551  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4552  ValRange);
4553  if (Val)
4554  UserSGPRCount += 2;
4555  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4557  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4558  Val, ValRange);
4559  if (Val)
4560  UserSGPRCount += 1;
4561  } else if (ID == ".amdhsa_wavefront_size32") {
4562  if (IVersion.Major < 10)
4563  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4564  EnableWavefrontSize32 = Val;
4566  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4567  Val, ValRange);
4568  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4570  KD.compute_pgm_rsrc2,
4571  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4572  ValRange);
4573  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4575  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4576  ValRange);
4577  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4579  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4580  ValRange);
4581  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4583  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4584  ValRange);
4585  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4587  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4588  ValRange);
4589  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4591  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4592  ValRange);
4593  } else if (ID == ".amdhsa_next_free_vgpr") {
4594  VGPRRange = ValRange;
4595  NextFreeVGPR = Val;
4596  } else if (ID == ".amdhsa_next_free_sgpr") {
4597  SGPRRange = ValRange;
4598  NextFreeSGPR = Val;
4599  } else if (ID == ".amdhsa_accum_offset") {
4600  if (!isGFX90A())
4601  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4602  AccumOffset = Val;
4603  } else if (ID == ".amdhsa_reserve_vcc") {
4604  if (!isUInt<1>(Val))
4605  return OutOfRangeError(ValRange);
4606  ReserveVCC = Val;
4607  } else if (ID == ".amdhsa_reserve_flat_scratch") {
4608  if (IVersion.Major < 7)
4609  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4610  if (!isUInt<1>(Val))
4611  return OutOfRangeError(ValRange);
4612  ReserveFlatScr = Val;
4613  } else if (ID == ".amdhsa_reserve_xnack_mask") {
4614  if (IVersion.Major < 8)
4615  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4616  if (!isUInt<1>(Val))
4617  return OutOfRangeError(ValRange);
4618  if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4619  return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4620  IDRange);
4621  } else if (ID == ".amdhsa_float_round_mode_32") {
4623  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4624  } else if (ID == ".amdhsa_float_round_mode_16_64") {
4626  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4627  } else if (ID == ".amdhsa_float_denorm_mode_32") {
4629  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4630  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4632  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4633  ValRange);
4634  } else if (ID == ".amdhsa_dx10_clamp") {
4636  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4637  } else if (ID == ".amdhsa_ieee_mode") {
4638  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4639  Val, ValRange);
4640  } else if (ID == ".amdhsa_fp16_overflow") {
4641  if (IVersion.Major < 9)
4642  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4643  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4644  ValRange);
4645  } else if (ID == ".amdhsa_tg_split") {
4646  if (!isGFX90A())
4647  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4648  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4649  ValRange);
4650  } else if (ID == ".amdhsa_workgroup_processor_mode") {
4651  if (IVersion.Major < 10)
4652  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4653  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4654  ValRange);
4655  } else if (ID == ".amdhsa_memory_ordered") {
4656  if (IVersion.Major < 10)
4657  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4658  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4659  ValRange);
4660  } else if (ID == ".amdhsa_forward_progress") {
4661  if (IVersion.Major < 10)
4662  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4663  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4664  ValRange);
4665  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4667  KD.compute_pgm_rsrc2,
4668  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4669  ValRange);
4670  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4672  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4673  Val, ValRange);
4674  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4676  KD.compute_pgm_rsrc2,
4677  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4678  ValRange);
4679  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4681  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4682  Val, ValRange);
4683  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4685  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4686  Val, ValRange);
4687  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4689  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4690  Val, ValRange);
4691  } else if (ID == ".amdhsa_exception_int_div_zero") {
4693  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4694  Val, ValRange);
4695  } else {
4696  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4697  }
4698 
4699 #undef PARSE_BITS_ENTRY
4700  }
4701 
4702  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4703  return TokError(".amdhsa_next_free_vgpr directive is required");
4704 
4705  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4706  return TokError(".amdhsa_next_free_sgpr directive is required");
4707 
4708  unsigned VGPRBlocks;
4709  unsigned SGPRBlocks;
4710  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4711  getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4712  EnableWavefrontSize32, NextFreeVGPR,
4713  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4714  SGPRBlocks))
4715  return true;
4716 
4717  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4718  VGPRBlocks))
4719  return OutOfRangeError(VGPRRange);
4721  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4722 
4723  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4724  SGPRBlocks))
4725  return OutOfRangeError(SGPRRange);
4727  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4728  SGPRBlocks);
4729 
4730  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4731  return TokError("too many user SGPRs enabled");
4732  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4733  UserSGPRCount);
4734 
4735  if (isGFX90A()) {
4736  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4737  return TokError(".amdhsa_accum_offset directive is required");
4738  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4739  return TokError("accum_offset should be in range [4..256] in "
4740  "increments of 4");
4741  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4742  return TokError("accum_offset exceeds total VGPR allocation");
4743  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4744  (AccumOffset / 4 - 1));
4745  }
4746 
4747  getTargetStreamer().EmitAmdhsaKernelDescriptor(
4748  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4749  ReserveFlatScr);
4750  return false;
4751 }
4752 
4753 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4754  uint32_t Major;
4755  uint32_t Minor;
4756 
4757  if (ParseDirectiveMajorMinor(Major, Minor))
4758  return true;
4759 
4760  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4761  return false;
4762 }
4763 
4764 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4765  uint32_t Major;
4766  uint32_t Minor;
4767  uint32_t Stepping;
4768  StringRef VendorName;
4769  StringRef ArchName;
4770 
4771  // If this directive has no arguments, then use the ISA version for the
4772  // targeted GPU.
4773  if (isToken(AsmToken::EndOfStatement)) {
4775  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4776  ISA.Stepping,
4777  "AMD", "AMDGPU");
4778  return false;
4779  }
4780 
4781  if (ParseDirectiveMajorMinor(Major, Minor))
4782  return true;
4783 
4784  if (!trySkipToken(AsmToken::Comma))
4785  return TokError("stepping version number required, comma expected");
4786 
4787  if (ParseAsAbsoluteExpression(Stepping))
4788  return TokError("invalid stepping version");
4789 
4790  if (!trySkipToken(AsmToken::Comma))
4791  return TokError("vendor name required, comma expected");
4792 
4793  if (!parseString(VendorName, "invalid vendor name"))
4794  return true;
4795 
4796  if (!trySkipToken(AsmToken::Comma))
4797  return TokError("arch name required, comma expected");
4798 
4799  if (!parseString(ArchName, "invalid arch name"))
4800  return true;
4801 
4802  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4803  VendorName, ArchName);
4804  return false;
4805 }
4806 
4807 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4808  amd_kernel_code_t &Header) {
4809  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4810  // assembly for backwards compatibility.
4811  if (ID == "max_scratch_backing_memory_byte_size") {
4812  Parser.eatToEndOfStatement();
4813  return false;
4814  }
4815 
4816  SmallString<40> ErrStr;
4817  raw_svector_ostream Err(ErrStr);
4818  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4819  return TokError(Err.str());
4820  }
4821  Lex();
4822 
4823  if (ID == "enable_wavefront_size32") {
4825  if (!isGFX10Plus())
4826  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4827  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4828  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4829  } else {
4830  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4831  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4832  }
4833  }
4834 
4835  if (ID == "wavefront_size") {
4836  if (Header.wavefront_size == 5) {
4837  if (!isGFX10Plus())
4838  return TokError("wavefront_size=5 is only allowed on GFX10+");
4839  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4840  return TokError("wavefront_size=5 requires +WavefrontSize32");
4841  } else if (Header.wavefront_size == 6) {
4842  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4843  return TokError("wavefront_size=6 requires +WavefrontSize64");
4844  }
4845  }
4846 
4847  if (ID == "enable_wgp_mode") {
4849  !isGFX10Plus())
4850  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4851  }
4852 
4853  if (ID == "enable_mem_ordered") {
4855  !isGFX10Plus())
4856  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4857  }
4858 
4859  if (ID == "enable_fwd_progress") {
4861  !isGFX10Plus())
4862  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4863  }
4864 
4865  return false;
4866 }
4867 
4868 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4869  amd_kernel_code_t Header;
4870  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4871 
4872  while (true) {
4873  // Lex EndOfStatement. This is in a while loop, because lexing a comment
4874  // will set the current token to EndOfStatement.
4875  while(trySkipToken(AsmToken::EndOfStatement));
4876 
4877  StringRef ID;
4878  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4879  return true;
4880 
4881  if (ID == ".end_amd_kernel_code_t")
4882  break;
4883 
4884  if (ParseAMDKernelCodeTValue(ID, Header))
4885  return true;
4886  }
4887 
4888  getTargetStreamer().EmitAMDKernelCodeT(Header);
4889 
4890  return false;
4891 }
4892 
4893 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4894  StringRef KernelName;
4895  if (!parseId(KernelName, "expected symbol name"))
4896  return true;
4897 
4898  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4900 
4901  KernelScope.initialize(getContext());
4902  return false;
4903 }
4904 
4905 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4906  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4907  return Error(getLoc(),
4908  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4909  "architectures");
4910  }
4911 
4912  auto TargetIDDirective = getLexer().getTok().getStringContents();
4913  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4914  return Error(getParser().getTok().getLoc(), "target id must match options");
4915 
4916  getTargetStreamer().EmitISAVersion();
4917  Lex();
4918 
4919  return false;
4920 }
4921 
4922 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4923  const char *AssemblerDirectiveBegin;
4924  const char *AssemblerDirectiveEnd;
4926  isHsaAbiVersion3Or4(&getSTI())
4927  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4929  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4930  HSAMD::AssemblerDirectiveEnd);
4931 
4932  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4933  return Error(getLoc(),
4934  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4935  "not available on non-amdhsa OSes")).str());
4936  }
4937 
4938  std::string HSAMetadataString;
4939  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4940  HSAMetadataString))
4941  return true;
4942 
4943  if (isHsaAbiVersion3Or4(&getSTI())) {
4944  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4945  return Error(getLoc(), "invalid HSA metadata");
4946  } else {
4947  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4948  return Error(getLoc(), "invalid HSA metadata");
4949  }
4950 
4951  return false;
4952 }
4953 
4954 /// Common code to parse out a block of text (typically YAML) between start and
4955 /// end directives.
4956 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4957  const char *AssemblerDirectiveEnd,
4958  std::string &CollectString) {
4959 
4960  raw_string_ostream CollectStream(CollectString);
4961 
4962  getLexer().setSkipSpace(false);
4963 
4964  bool FoundEnd = false;
4965  while (!isToken(AsmToken::Eof)) {
4966  while (isToken(AsmToken::Space)) {
4967  CollectStream << getTokenStr();
4968  Lex();
4969  }
4970 
4971  if (trySkipId(AssemblerDirectiveEnd)) {
4972  FoundEnd = true;
4973  break;
4974  }
4975 
4976  CollectStream << Parser.parseStringToEndOfStatement()
4977  << getContext().getAsmInfo()->getSeparatorString();
4978 
4979  Parser.eatToEndOfStatement();
4980  }
4981 
4982  getLexer().setSkipSpace(true);
4983 
4984  if (isToken(AsmToken::Eof) && !FoundEnd) {
4985  return TokError(Twine("expected directive ") +
4986  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4987  }
4988 
4989  CollectStream.flush();
4990  return false;
4991 }
4992 
4993 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4994 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4995  std::string String;
4996  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4998  return true;
4999 
5000  auto PALMetadata = getTargetStreamer().getPALMetadata();
5001  if (!PALMetadata->setFromString(String))
5002  return Error(getLoc(), "invalid PAL metadata");
5003  return false;
5004 }
5005 
5006 /// Parse the assembler directive for old linear-format PAL metadata.
5007 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5008  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5009  return Error(getLoc(),
5010  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5011  "not available on non-amdpal OSes")).str());
5012  }
5013 
5014  auto PALMetadata = getTargetStreamer().getPALMetadata();
5015  PALMetadata->setLegacy();
5016  for (;;) {
5017  uint32_t Key, Value;
5018  if (ParseAsAbsoluteExpression(Key)) {
5019  return TokError(Twine("invalid value in ") +
5021  }
5022  if (!trySkipToken(AsmToken::Comma)) {
5023  return TokError(Twine("expected an even number of values in ") +
5025  }
5026  if (ParseAsAbsoluteExpression(Value)) {
5027  return TokError(Twine("invalid value in ") +
5029  }
5030  PALMetadata->setRegister(Key, Value);
5031  if (!trySkipToken(AsmToken::Comma))
5032  break;
5033  }
5034  return false;
5035 }
5036 
5037 /// ParseDirectiveAMDGPULDS
5038 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5039 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5040  if (getParser().checkForValidSection())
5041  return true;
5042 
5043  StringRef Name;
5044  SMLoc NameLoc = getLoc();
5045  if (getParser().parseIdentifier(Name))
5046  return TokError("expected identifier in directive");
5047 
5048  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5049  if (parseToken(AsmToken::Comma, "expected ','"))
5050  return true;
5051 
5052  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5053 
5054  int64_t Size;
5055  SMLoc SizeLoc = getLoc();
5056  if (getParser().parseAbsoluteExpression(Size))
5057  return true;
5058  if (Size < 0)
5059  return Error(SizeLoc, "size must be non-negative");
5060  if (Size > LocalMemorySize)
5061  return Error(SizeLoc, "size is too large");
5062 
5063  int64_t Alignment = 4;
5064  if (trySkipToken(AsmToken::Comma)) {
5065  SMLoc AlignLoc = getLoc();
5066  if (getParser().parseAbsoluteExpression(Alignment))
5067  return true;
5068  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5069  return Error(AlignLoc, "alignment must be a power of two");
5070 
5071  // Alignment larger than the size of LDS is possible in theory, as long
5072  // as the linker manages to place to symbol at address 0, but we do want
5073  // to make sure the alignment fits nicely into a 32-bit integer.
5074  if (Alignment >= 1u << 31)
5075  return Error(AlignLoc, "alignment is too large");
5076  }
5077 
5078  if (parseToken(AsmToken::EndOfStatement,
5079  "unexpected token in '.amdgpu_lds' directive"))
5080  return true;
5081 
5082  Symbol->redefineIfPossible();
5083  if (!Symbol->isUndefined())
5084  return Error(NameLoc, "invalid symbol redefinition");
5085 
5086  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5087  return false;
5088 }
5089 
5090 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5091  StringRef IDVal = DirectiveID.getString();
5092 
5093  if (isHsaAbiVersion3Or4(&getSTI())) {
5094  if (IDVal == ".amdhsa_kernel")
5095  return ParseDirectiveAMDHSAKernel();
5096 
5097  // TODO: Restructure/combine with PAL metadata directive.
5099  return ParseDirectiveHSAMetadata();
5100  } else {
5101  if (IDVal == ".hsa_code_object_version")
5102  return ParseDirectiveHSACodeObjectVersion();
5103 
5104  if (IDVal == ".hsa_code_object_isa")
5105  return ParseDirectiveHSACodeObjectISA();
5106 
5107  if (IDVal == ".amd_kernel_code_t")
5108  return ParseDirectiveAMDKernelCodeT();
5109 
5110  if (IDVal == ".amdgpu_hsa_kernel")
5111  return ParseDirectiveAMDGPUHsaKernel();
5112 
5113  if (IDVal == ".amd_amdgpu_isa")
5114  return ParseDirectiveISAVersion();
5115 
5117  return ParseDirectiveHSAMetadata();
5118  }
5119 
5120  if (IDVal == ".amdgcn_target")
5121  return ParseDirectiveAMDGCNTarget();
5122 
5123  if (IDVal == ".amdgpu_lds")
5124  return ParseDirectiveAMDGPULDS();
5125 
5126  if (IDVal == PALMD::AssemblerDirectiveBegin)
5127  return ParseDirectivePALMetadataBegin();
5128 
5129  if (IDVal == PALMD::AssemblerDirective)
5130  return ParseDirectivePALMetadata();
5131 
5132  return true;
5133 }
5134 
5135 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5136  unsigned RegNo) {
5137 
5138  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5139  R.isValid(); ++R) {
5140  if (*R == RegNo)
5141  return isGFX9Plus();
5142  }
5143 
5144  // GFX10 has 2 more SGPRs 104 and 105.
5145  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5146  R.isValid(); ++R) {
5147  if (*R == RegNo)
5148  return hasSGPR104_SGPR105();
5149  }
5150 
5151  switch (RegNo) {
5152  case AMDGPU::SRC_SHARED_BASE:
5153  case AMDGPU::SRC_SHARED_LIMIT:
5154  case AMDGPU::SRC_PRIVATE_BASE:
5155  case AMDGPU::SRC_PRIVATE_LIMIT:
5156  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5157  return isGFX9Plus();
5158  case AMDGPU::TBA:
5159  case AMDGPU::TBA_LO:
5160  case AMDGPU::TBA_HI:
5161  case AMDGPU::TMA:
5162  case AMDGPU::TMA_LO:
5163  case AMDGPU::TMA_HI:
5164  return !isGFX9Plus();
5165  case AMDGPU::XNACK_MASK:
5166  case AMDGPU::XNACK_MASK_LO:
5167  case AMDGPU::XNACK_MASK_HI:
5168  return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5169  case AMDGPU::SGPR_NULL:
5170  return isGFX10Plus();
5171  default:
5172  break;
5173  }
5174 
5175  if (isCI())
5176  return true;
5177 
5178  if (isSI() || isGFX10Plus()) {
5179  // No flat_scr on SI.
5180  // On GFX10 flat scratch is not a valid register operand and can only be
5181  // accessed with s_setreg/s_getreg.
5182  switch (RegNo) {
5183  case AMDGPU::FLAT_SCR:
5184  case AMDGPU::FLAT_SCR_LO:
5185  case AMDGPU::FLAT_SCR_HI:
5186  return false;
5187  default:
5188  return true;
5189  }
5190  }
5191 
5192  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5193  // SI/CI have.
5194  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5195  R.isValid(); ++R) {
5196  if (*R == RegNo)
5197  return hasSGPR102_SGPR103();
5198  }
5199 
5200  return true;
5201 }
5202 
5204 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5205  OperandMode Mode) {
5206  // Try to parse with a custom parser
5207  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5208 
5209  // If we successfully parsed the operand or if there as an error parsing,
5210  // we are done.
5211  //
5212  // If we are parsing after we reach EndOfStatement then this means we
5213  // are appending default values to the Operands list. This is only done
5214  // by custom parser, so we shouldn't continue on to the generic parsing.
5215  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5216  isToken(AsmToken::EndOfStatement))
5217  return ResTy;
5218 
5219  SMLoc RBraceLoc;
5220  SMLoc LBraceLoc = getLoc();
5221  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5222  unsigned Prefix = Operands.size();
5223 
5224  for (;;) {
5225  auto Loc = getLoc();
5226  ResTy = parseReg(Operands);
5227  if (ResTy == MatchOperand_NoMatch)
5228  Error(Loc, "expected a register");
5229  if (ResTy != MatchOperand_Success)
5230  return MatchOperand_ParseFail;
5231 
5232  RBraceLoc = getLoc();
5233  if (trySkipToken(AsmToken::RBrac))
5234  break;
5235 
5236  if (!skipToken(AsmToken::Comma,
5237  "expected a comma or a closing square bracket")) {
5238  return MatchOperand_ParseFail;
5239  }
5240  }
5241 
5242  if (Operands.size() - Prefix > 1) {
5243  Operands.insert(Operands.begin() + Prefix,
5244  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5245  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5246  }
5247 
5248  return MatchOperand_Success;
5249  }
5250 
5251  return parseRegOrImm(Operands);
5252 }
5253 
5254 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5255  // Clear any forced encodings from the previous instruction.
5256  setForcedEncodingSize(0);
5257  setForcedDPP(false);
5258  setForcedSDWA(false);
5259 
5260  if (Name.endswith("_e64")) {
5261  setForcedEncodingSize(64);
5262  return Name.substr(0, Name.size() - 4);
5263  } else if (Name.endswith("_e32")) {
5264  setForcedEncodingSize(32);
5265  return Name.substr(0, Name.size() - 4);
5266  } else if (Name.endswith("_dpp")) {
5267  setForcedDPP(true);
5268  return Name.substr(0, Name.size() - 4);
5269  } else if (Name.endswith("_sdwa")) {
5270  setForcedSDWA(true);
5271  return Name.substr(0, Name.size() - 5);
5272  }
5273  return Name;
5274 }
5275 
5276 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5277  StringRef Name,
5278  SMLoc NameLoc, OperandVector &Operands) {
5279  // Add the instruction mnemonic
5280  Name = parseMnemonicSuffix(Name);
5281  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5282 
5283  bool IsMIMG = Name.startswith("image_");
5284 
5285  while (!trySkipToken(AsmToken::EndOfStatement)) {
5286  OperandMode Mode = OperandMode_Default;
5287  if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5288  Mode = OperandMode_NSA;
5289  CPolSeen = 0;
5290  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5291 
5292  if (Res != MatchOperand_Success) {
5293  checkUnsupportedInstruction(Name, NameLoc);
5294  if (!Parser.hasPendingError()) {
5295  // FIXME: use real operand location rather than the current location.
5296  StringRef Msg =
5297  (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5298  "not a valid operand.";
5299  Error(getLoc(), Msg);
5300  }
5301  while (!trySkipToken(AsmToken::EndOfStatement)) {
5302  lex();
5303  }
5304  return true;
5305  }
5306 
5307  // Eat the comma or space if there is one.
5308  trySkipToken(AsmToken::Comma);
5309  }
5310 
5311  return false;
5312 }
5313 
5314 //===----------------------------------------------------------------------===//
5315 // Utility functions
5316 //===----------------------------------------------------------------------===//
5317 
5319 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5320 
5321  if (!trySkipId(Prefix, AsmToken::Colon))
5322  return MatchOperand_NoMatch;
5323 
5324  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5325 }
5326 
5328 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5329  AMDGPUOperand::ImmTy ImmTy,
5330  bool (*ConvertResult)(int64_t&)) {
5331  SMLoc S = getLoc();
5332  int64_t Value = 0;
5333 
5334  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5335  if (Res != MatchOperand_Success)
5336  return Res;
5337 
5338  if (ConvertResult && !ConvertResult(Value)) {
5339  Error(S, "invalid " + StringRef(Prefix) + " value.");
5340  }
5341 
5342  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5343  return MatchOperand_Success;
5344 }
5345 
5347 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5349  AMDGPUOperand::ImmTy ImmTy,
5350  bool (*ConvertResult)(int64_t&)) {
5351  SMLoc S = getLoc();
5352  if (!trySkipId(Prefix, AsmToken::Colon))
5353  return MatchOperand_NoMatch;
5354 
5355  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5356  return MatchOperand_ParseFail;
5357 
5358  unsigned Val = 0;
5359  const unsigned MaxSize = 4;
5360 
5361  // FIXME: How to verify the number of elements matches the number of src
5362  // operands?
5363  for (int I = 0; ; ++I) {
5364  int64_t Op;
5365  SMLoc Loc = getLoc();
5366  if (!parseExpr(Op))
5367  return MatchOperand_ParseFail;
5368 
5369  if (Op != 0 && Op != 1) {
5370  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5371  return MatchOperand_ParseFail;
5372  }
5373 
5374  Val |= (Op << I);
5375 
5376  if (trySkipToken(AsmToken::RBrac))
5377  break;
5378 
5379  if (I + 1 == MaxSize) {
5380  Error(getLoc(), "expected a closing square bracket");
5381  return MatchOperand_ParseFail;
5382  }
5383 
5384  if (!skipToken(AsmToken::Comma, "expected a comma"))
5385  return MatchOperand_ParseFail;
5386  }
5387 
5388  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5389  return MatchOperand_Success;
5390 }
5391 
5393 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5394  AMDGPUOperand::ImmTy ImmTy) {
5395  int64_t Bit;
5396  SMLoc S = getLoc();
5397 
5398  if (trySkipId(Name)) {
5399  Bit = 1;
5400  } else if (trySkipId("no", Name)) {
5401  Bit = 0;
5402  } else {
5403  return MatchOperand_NoMatch;
5404  }
5405 
5406  if (Name == "r128" && !hasMIMG_R128()) {
5407  Error(S, "r128 modifier is not supported on this GPU");
5408  return MatchOperand_ParseFail;
5409  }
5410  if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5411  Error(S, "a16 modifier is not supported on this GPU");
5412  return MatchOperand_ParseFail;
5413  }
5414 
5415  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5416  ImmTy = AMDGPUOperand::ImmTyR128A16;
5417 
5418  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5419  return MatchOperand_Success;
5420 }
5421 
5423 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5424  unsigned CPolOn = 0;
5425  unsigned CPolOff = 0;
5426  SMLoc S = getLoc();
5427 
5428  if (trySkipId("glc"))
5429  CPolOn = AMDGPU::CPol::GLC;
5430  else if (trySkipId("noglc"))
5431  CPolOff = AMDGPU::CPol::GLC;
5432  else if (trySkipId("slc"))
5433  CPolOn = AMDGPU::CPol::SLC;
5434  else if (trySkipId("noslc"))
5435  CPolOff = AMDGPU::CPol::SLC;
5436  else if (trySkipId("dlc"))
5437  CPolOn = AMDGPU::CPol::DLC;
5438  else if (trySkipId("nodlc"))
5439  CPolOff = AMDGPU::CPol::DLC;
5440  else if (trySkipId("scc"))
5441  CPolOn = AMDGPU::CPol::SCC;
5442  else if (trySkipId("noscc"))
5443  CPolOff = AMDGPU::CPol::SCC;
5444  else
5445  return MatchOperand_NoMatch;
5446 
5447  if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5448  Error(S, "dlc modifier is not supported on this GPU");
5449  return MatchOperand_ParseFail;
5450  }
5451 
5452  if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5453  Error(S, "scc modifier is not supported on this GPU");
5454  return MatchOperand_ParseFail;
5455  }
5456 
5457  if (CPolSeen & (CPolOn | CPolOff)) {
5458  Error(S, "duplicate cache policy modifier");
5459  return MatchOperand_ParseFail;
5460  }
5461 
5462  CPolSeen |= (CPolOn | CPolOff);
5463 
5464  for (unsigned I = 1; I != Operands.size(); ++I) {
5465  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5466  if (Op.isCPol()) {
5467  Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5468  return MatchOperand_Success;
5469  }
5470  }
5471 
5472  Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5473  AMDGPUOperand::ImmTyCPol));
5474 
5475  return MatchOperand_Success;
5476 }
5477 
5479  MCInst& Inst, const OperandVector& Operands,
5480  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5481  AMDGPUOperand::ImmTy ImmT,
5482  int64_t Default = 0) {
5483  auto i = OptionalIdx.find(ImmT);
5484  if (i != OptionalIdx.end()) {
5485  unsigned Idx = i->second;
5486  ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5487  } else {
5488  Inst.addOperand(MCOperand::createImm(Default));
5489  }
5490 }
5491 
5493 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5494  StringRef &Value,
5495  SMLoc &StringLoc) {
5496  if (!trySkipId(Prefix, AsmToken::Colon))
5497  return MatchOperand_NoMatch;
5498 
5499  StringLoc = getLoc();
5500  return parseId(Value, "expected an identifier") ? MatchOperand_Success
5502 }
5503 
5504 //===----------------------------------------------------------------------===//
5505 // MTBUF format
5506 //===----------------------------------------------------------------------===//
5507 
5508 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5509  int64_t MaxVal,
5510  int64_t &Fmt) {
5511  int64_t Val;
5512  SMLoc Loc = getLoc();
5513 
5514  auto Res = parseIntWithPrefix(Pref, Val);
5515  if (Res == MatchOperand_ParseFail)
5516  return false;
5517  if (Res == MatchOperand_NoMatch)
5518  return true;
5519 
5520  if (Val < 0 || Val > MaxVal) {
5521  Error(Loc, Twine("out of range ", StringRef(Pref)));
5522  return false;
5523  }
5524 
5525  Fmt = Val;
5526  return true;
5527 }
5528 
5529 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5530 // values to live in a joint format operand in the MCInst encoding.
5532 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5533  using namespace llvm::AMDGPU::MTBUFFormat;
5534 
5535  int64_t Dfmt = DFMT_UNDEF;
5536  int64_t Nfmt = NFMT_UNDEF;
5537 
5538  // dfmt and nfmt can appear in either order, and each is optional.
5539  for (int I = 0; I < 2; ++I) {
5540  if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5541  return MatchOperand_ParseFail;
5542 
5543  if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5544  return MatchOperand_ParseFail;
5545  }
5546  // Skip optional comma between dfmt/nfmt
5547  // but guard against 2 commas following each other.
5548  if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5549  !peekToken().is(AsmToken::Comma)) {
5550  trySkipToken(AsmToken::Comma);
5551  }
5552  }
5553 
5554  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5555  return MatchOperand_NoMatch;
5556 
5557  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5558  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5559 
5560  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5561  return MatchOperand_Success;
5562 }
5563 
5565 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5566  using namespace llvm::AMDGPU::MTBUFFormat;
5567 
5568  int64_t Fmt = UFMT_UNDEF;
5569 
5570  if (!tryParseFmt("format", UFMT_MAX, Fmt))
5571  return MatchOperand_ParseFail;
5572 
5573  if (Fmt == UFMT_UNDEF)
5574  return MatchOperand_NoMatch;
5575 
5576  Format = Fmt;
5577  return MatchOperand_Success;
5578 }
5579 
5580 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5581  int64_t &Nfmt,
5582  StringRef FormatStr,
5583  SMLoc Loc) {
5584  using namespace llvm::AMDGPU::MTBUFFormat;
5585  int64_t Format;
5586 
5587  Format = getDfmt(FormatStr);
5588  if (Format != DFMT_UNDEF) {
5589  Dfmt = Format;
5590  return true;
5591  }
5592 
5593  Format = getNfmt(FormatStr, getSTI());
5594  if (Format != NFMT_UNDEF) {
5595  Nfmt = Format;
5596  return true;
5597  }
5598 
5599  Error(Loc, "unsupported format");
5600  return false;
5601 }
5602 
5604 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5605  SMLoc FormatLoc,
5606  int64_t &Format) {
5607  using namespace llvm::AMDGPU::MTBUFFormat;
5608 
5609  int64_t Dfmt = DFMT_UNDEF;
5610  int64_t Nfmt = NFMT_UNDEF;
5611  if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5612  return MatchOperand_ParseFail;
5613 
5614  if (trySkipToken(AsmToken::Comma)) {
5615  StringRef Str;
5616  SMLoc Loc = getLoc();
5617  if (!parseId(Str, "expected a format string") ||
5618  !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5619  return MatchOperand_ParseFail;
5620  }
5621  if (Dfmt == DFMT_UNDEF) {
5622  Error(Loc, "duplicate numeric format");
5623  return MatchOperand_ParseFail;
5624  } else if (Nfmt == NFMT_UNDEF) {
5625  Error(Loc, "duplicate data format");
5626  return MatchOperand_ParseFail;
5627  }
5628  }
5629 
5630  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5631  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5632 
5633  if (isGFX10Plus()) {
5634  auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5635  if (Ufmt == UFMT_UNDEF) {
5636  Error(FormatLoc, "unsupported format");
5637  return MatchOperand_ParseFail;
5638  }
5639  Format = Ufmt;
5640  } else {
5641  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5642  }
5643 
5644  return MatchOperand_Success;
5645 }
5646 
5648 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5649  SMLoc Loc,
5650  int64_t &Format) {
5651  using namespace llvm::AMDGPU::MTBUFFormat;
5652 
5653  auto Id = getUnifiedFormat(FormatStr);
5654  if (Id == UFMT_UNDEF)
5655  return MatchOperand_NoMatch;
5656 
5657  if (!isGFX10Plus()) {
5658  Error(Loc, "unified format is not supported on this GPU");
5659  return MatchOperand_ParseFail;
5660  }
5661 
5662  Format = Id;
5663  return MatchOperand_Success;
5664 }
5665 
5667 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5668  using namespace llvm::AMDGPU::MTBUFFormat;
5669  SMLoc Loc = getLoc();
5670 
5671  if (!parseExpr(Format))
5672  return MatchOperand_ParseFail;
5673  if (!isValidFormatEncoding(Format, getSTI())) {
5674  Error(Loc, "out of range format");
5675  return MatchOperand_ParseFail;
5676  }
5677 
5678  return MatchOperand_Success;
5679 }
5680 
5682 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5683  using namespace llvm::AMDGPU::MTBUFFormat;
5684 
5685  if (!trySkipId("format", AsmToken::Colon))
5686  return MatchOperand_NoMatch;
5687 
5688  if (trySkipToken(AsmToken::LBrac)) {
5689  StringRef FormatStr;
5690  SMLoc Loc = getLoc();
5691  if (!parseId(FormatStr, "expected a format string"))
5692  return MatchOperand_ParseFail;
5693 
5694  auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5695  if (Res == MatchOperand_NoMatch)
5696  Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5697  if (Res != MatchOperand_Success)
5698  return Res;
5699 
5700  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5701  return MatchOperand_ParseFail;
5702 
5703  return MatchOperand_Success;
5704  }
5705 
5706  return parseNumericFormat(Format);
5707 }
5708 
5710 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5711  using namespace llvm::AMDGPU::MTBUFFormat;
5712 
5713  int64_t Format = getDefaultFormatEncoding(getSTI());
5715  SMLoc Loc = getLoc();
5716 
5717  // Parse legacy format syntax.
5718  Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5719  if (Res == MatchOperand_ParseFail)
5720  return Res;
5721 
5722  bool FormatFound = (Res == MatchOperand_Success);
5723 
5724  Operands.push_back(
5725  AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5726 
5727  if (FormatFound)
5728  trySkipToken(AsmToken::Comma);
5729 
5730  if (isToken(AsmToken::EndOfStatement)) {
5731  // We are expecting an soffset operand,
5732  // but let matcher handle the error.
5733  return MatchOperand_Success;
5734  }
5735 
5736  // Parse soffset.
5737  Res = parseRegOrImm(Operands);
5738  if (Res != MatchOperand_Success)
5739  return Res;
5740 
5741  trySkipToken(AsmToken::Comma);
5742 
5743  if (!FormatFound) {
5744  Res = parseSymbolicOrNumericFormat(Format);
5745  if (Res == MatchOperand_ParseFail)
5746  return Res;
5747  if (Res == MatchOperand_Success) {
5748  auto Size = Operands.size();
5749  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5750  assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5751  Op.setImm(Format);
5752  }
5753  return MatchOperand_Success;
5754  }
5755 
5756  if (isId("format") && peekToken().is(AsmToken::Colon)) {
5757  Error(getLoc(), "duplicate format");
5758  return MatchOperand_ParseFail;
5759  }
5760  return MatchOperand_Success;
5761 }
5762 
5763 //===----------------------------------------------------------------------===//
5764 // ds
5765 //===----------------------------------------------------------------------===//
5766 
5767 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5768  const OperandVector &Operands) {
5769  OptionalImmIndexMap OptionalIdx;
5770 
5771  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5772  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5773 
5774  // Add the register arguments
5775  if (Op.isReg()) {
5776  Op.addRegOperands(Inst, 1);
5777  continue;
5778  }
5779 
5780  // Handle optional arguments
5781  OptionalIdx[Op.getImmTy()] = i;
5782  }
5783 
5784  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5785  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5786  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5787 
5789 }
5790 
5791 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5792  bool IsGdsHardcoded) {
5793  OptionalImmIndexMap OptionalIdx;
5794 
5795  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5796  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5797 
5798  // Add the register arguments
5799  if (Op.isReg()) {
5800  Op.addRegOperands(Inst, 1);
5801  continue;
5802  }
5803 
5804  if (Op.isToken() && Op.getToken() == "gds") {
5805  IsGdsHardcoded = true;
5806  continue;
5807  }
5808 
5809  // Handle optional arguments
5810  OptionalIdx[Op.getImmTy()] = i;
5811  }
5812 
5813  AMDGPUOperand::ImmTy OffsetType =
5814  (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5815  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5816  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5817  AMDGPUOperand::ImmTyOffset;
5818 
5819  addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5820 
5821  if (!IsGdsHardcoded) {
5822  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5823  }
5825 }
5826 
5827 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5828  OptionalImmIndexMap OptionalIdx;
5829 
5830  unsigned OperandIdx[4];
5831  unsigned EnMask = 0;
5832  int SrcIdx = 0;
5833 
5834  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5835  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5836 
5837  // Add the register arguments
5838  if (Op.isReg()) {
5839  assert(SrcIdx < 4);
5840  OperandIdx[SrcIdx] = Inst.size();
5841  Op.addRegOperands(Inst, 1);
5842  ++SrcIdx;
5843  continue;
5844  }
5845 
5846  if (Op.isOff()) {
5847  assert(SrcIdx < 4);
5848  OperandIdx[SrcIdx] = Inst.size();
5849  Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5850  ++SrcIdx;
5851  continue;
5852  }
5853 
5854  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5855  Op.addImmOperands(Inst, 1);
5856  continue;
5857  }
5858 
5859  if (Op.isToken() && Op.getToken() == "done")
5860  continue;
5861 
5862  // Handle optional arguments
5863  OptionalIdx[Op.getImmTy()] = i;
5864  }
5865 
5866  assert(SrcIdx == 4);
5867 
5868  bool Compr = false;
5869  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5870  Compr = true;