LLVM  16.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
37 #include "llvm/Support/Casting.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57  enum KindTy {
58  Token,
59  Immediate,
60  Register,
62  } Kind;
63 
64  SMLoc StartLoc, EndLoc;
65  const AMDGPUAsmParser *AsmParser;
66 
67 public:
68  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69  : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71  using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73  struct Modifiers {
74  bool Abs = false;
75  bool Neg = false;
76  bool Sext = false;
77 
78  bool hasFPModifiers() const { return Abs || Neg; }
79  bool hasIntModifiers() const { return Sext; }
80  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82  int64_t getFPModifiersOperand() const {
83  int64_t Operand = 0;
84  Operand |= Abs ? SISrcMods::ABS : 0u;
85  Operand |= Neg ? SISrcMods::NEG : 0u;
86  return Operand;
87  }
88 
89  int64_t getIntModifiersOperand() const {
90  int64_t Operand = 0;
91  Operand |= Sext ? SISrcMods::SEXT : 0u;
92  return Operand;
93  }
94 
95  int64_t getModifiersOperand() const {
96  assert(!(hasFPModifiers() && hasIntModifiers())
97  && "fp and int modifiers should not be used simultaneously");
98  if (hasFPModifiers()) {
99  return getFPModifiersOperand();
100  } else if (hasIntModifiers()) {
101  return getIntModifiersOperand();
102  } else {
103  return 0;
104  }
105  }
106 
107  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108  };
109 
110  enum ImmTy {
111  ImmTyNone,
112  ImmTyGDS,
113  ImmTyLDS,
114  ImmTyOffen,
115  ImmTyIdxen,
116  ImmTyAddr64,
117  ImmTyOffset,
118  ImmTyInstOffset,
119  ImmTyOffset0,
120  ImmTyOffset1,
121  ImmTyCPol,
122  ImmTySWZ,
123  ImmTyTFE,
124  ImmTyD16,
125  ImmTyClampSI,
126  ImmTyOModSI,
127  ImmTySdwaDstSel,
128  ImmTySdwaSrc0Sel,
129  ImmTySdwaSrc1Sel,
130  ImmTySdwaDstUnused,
131  ImmTyDMask,
132  ImmTyDim,
133  ImmTyUNorm,
134  ImmTyDA,
135  ImmTyR128A16,
136  ImmTyA16,
137  ImmTyLWE,
138  ImmTyExpTgt,
139  ImmTyExpCompr,
140  ImmTyExpVM,
141  ImmTyFORMAT,
142  ImmTyHwreg,
143  ImmTyOff,
144  ImmTySendMsg,
145  ImmTyInterpSlot,
146  ImmTyInterpAttr,
147  ImmTyAttrChan,
148  ImmTyOpSel,
149  ImmTyOpSelHi,
150  ImmTyNegLo,
151  ImmTyNegHi,
152  ImmTyDPP8,
153  ImmTyDppCtrl,
154  ImmTyDppRowMask,
155  ImmTyDppBankMask,
156  ImmTyDppBoundCtrl,
157  ImmTyDppFi,
158  ImmTySwizzle,
159  ImmTyGprIdxMode,
160  ImmTyHigh,
161  ImmTyBLGP,
162  ImmTyCBSZ,
163  ImmTyABID,
164  ImmTyEndpgm,
165  ImmTyWaitVDST,
166  ImmTyWaitEXP,
167  };
168 
169  // Immediate operand kind.
170  // It helps to identify the location of an offending operand after an error.
171  // Note that regular literals and mandatory literals (KImm) must be handled
172  // differently. When looking for an offending operand, we should usually
173  // ignore mandatory literals because they are part of the instruction and
174  // cannot be changed. Report location of mandatory operands only for VOPD,
175  // when both OpX and OpY have a KImm and there are no other literals.
176  enum ImmKindTy {
177  ImmKindTyNone,
178  ImmKindTyLiteral,
179  ImmKindTyMandatoryLiteral,
180  ImmKindTyConst,
181  };
182 
183 private:
184  struct TokOp {
185  const char *Data;
186  unsigned Length;
187  };
188 
189  struct ImmOp {
190  int64_t Val;
191  ImmTy Type;
192  bool IsFPImm;
193  mutable ImmKindTy Kind;
194  Modifiers Mods;
195  };
196 
197  struct RegOp {
198  unsigned RegNo;
199  Modifiers Mods;
200  };
201 
202  union {
203  TokOp Tok;
204  ImmOp Imm;
205  RegOp Reg;
206  const MCExpr *Expr;
207  };
208 
209 public:
210  bool isToken() const override {
211  if (Kind == Token)
212  return true;
213 
214  // When parsing operands, we can't always tell if something was meant to be
215  // a token, like 'gds', or an expression that references a global variable.
216  // In this case, we assume the string is an expression, and if we need to
217  // interpret is a token, then we treat the symbol name as the token.
218  return isSymbolRefExpr();
219  }
220 
221  bool isSymbolRefExpr() const {
222  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223  }
224 
225  bool isImm() const override {
226  return Kind == Immediate;
227  }
228 
229  void setImmKindNone() const {
230  assert(isImm());
231  Imm.Kind = ImmKindTyNone;
232  }
233 
234  void setImmKindLiteral() const {
235  assert(isImm());
236  Imm.Kind = ImmKindTyLiteral;
237  }
238 
239  void setImmKindMandatoryLiteral() const {
240  assert(isImm());
241  Imm.Kind = ImmKindTyMandatoryLiteral;
242  }
243 
244  void setImmKindConst() const {
245  assert(isImm());
246  Imm.Kind = ImmKindTyConst;
247  }
248 
249  bool IsImmKindLiteral() const {
250  return isImm() && Imm.Kind == ImmKindTyLiteral;
251  }
252 
253  bool IsImmKindMandatoryLiteral() const {
254  return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
255  }
256 
257  bool isImmKindConst() const {
258  return isImm() && Imm.Kind == ImmKindTyConst;
259  }
260 
261  bool isInlinableImm(MVT type) const;
262  bool isLiteralImm(MVT type) const;
263 
264  bool isRegKind() const {
265  return Kind == Register;
266  }
267 
268  bool isReg() const override {
269  return isRegKind() && !hasModifiers();
270  }
271 
272  bool isRegOrInline(unsigned RCID, MVT type) const {
273  return isRegClass(RCID) || isInlinableImm(type);
274  }
275 
276  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
277  return isRegOrInline(RCID, type) || isLiteralImm(type);
278  }
279 
280  bool isRegOrImmWithInt16InputMods() const {
281  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
282  }
283 
284  bool isRegOrImmWithInt32InputMods() const {
285  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
286  }
287 
288  bool isRegOrInlineImmWithInt16InputMods() const {
289  return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
290  }
291 
292  bool isRegOrInlineImmWithInt32InputMods() const {
293  return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
294  }
295 
296  bool isRegOrImmWithInt64InputMods() const {
297  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
298  }
299 
300  bool isRegOrImmWithFP16InputMods() const {
301  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
302  }
303 
304  bool isRegOrImmWithFP32InputMods() const {
305  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
306  }
307 
308  bool isRegOrImmWithFP64InputMods() const {
309  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
310  }
311 
312  bool isRegOrInlineImmWithFP16InputMods() const {
313  return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
314  }
315 
316  bool isRegOrInlineImmWithFP32InputMods() const {
317  return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
318  }
319 
320 
321  bool isVReg() const {
322  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
323  isRegClass(AMDGPU::VReg_64RegClassID) ||
324  isRegClass(AMDGPU::VReg_96RegClassID) ||
325  isRegClass(AMDGPU::VReg_128RegClassID) ||
326  isRegClass(AMDGPU::VReg_160RegClassID) ||
327  isRegClass(AMDGPU::VReg_192RegClassID) ||
328  isRegClass(AMDGPU::VReg_256RegClassID) ||
329  isRegClass(AMDGPU::VReg_512RegClassID) ||
330  isRegClass(AMDGPU::VReg_1024RegClassID);
331  }
332 
333  bool isVReg32() const {
334  return isRegClass(AMDGPU::VGPR_32RegClassID);
335  }
336 
337  bool isVReg32OrOff() const {
338  return isOff() || isVReg32();
339  }
340 
341  bool isNull() const {
342  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
343  }
344 
345  bool isVRegWithInputMods() const;
346 
347  bool isSDWAOperand(MVT type) const;
348  bool isSDWAFP16Operand() const;
349  bool isSDWAFP32Operand() const;
350  bool isSDWAInt16Operand() const;
351  bool isSDWAInt32Operand() const;
352 
353  bool isImmTy(ImmTy ImmT) const {
354  return isImm() && Imm.Type == ImmT;
355  }
356 
357  bool isImmModifier() const {
358  return isImm() && Imm.Type != ImmTyNone;
359  }
360 
361  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
362  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
363  bool isDMask() const { return isImmTy(ImmTyDMask); }
364  bool isDim() const { return isImmTy(ImmTyDim); }
365  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
366  bool isDA() const { return isImmTy(ImmTyDA); }
367  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
368  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
369  bool isLWE() const { return isImmTy(ImmTyLWE); }
370  bool isOff() const { return isImmTy(ImmTyOff); }
371  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
372  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
373  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
374  bool isOffen() const { return isImmTy(ImmTyOffen); }
375  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
376  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
377  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
378  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
379  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
380 
381  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
382  bool isGDS() const { return isImmTy(ImmTyGDS); }
383  bool isLDS() const { return isImmTy(ImmTyLDS); }
384  bool isCPol() const { return isImmTy(ImmTyCPol); }
385  bool isSWZ() const { return isImmTy(ImmTySWZ); }
386  bool isTFE() const { return isImmTy(ImmTyTFE); }
387  bool isD16() const { return isImmTy(ImmTyD16); }
388  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
389  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
390  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
391  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
392  bool isFI() const { return isImmTy(ImmTyDppFi); }
393  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
394  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
395  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
396  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
397  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
398  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
399  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
400  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
401  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
402  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
403  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
404  bool isHigh() const { return isImmTy(ImmTyHigh); }
405 
406  bool isMod() const {
407  return isClampSI() || isOModSI();
408  }
409 
410  bool isRegOrImm() const {
411  return isReg() || isImm();
412  }
413 
414  bool isRegClass(unsigned RCID) const;
415 
416  bool isInlineValue() const;
417 
418  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
419  return isRegOrInline(RCID, type) && !hasModifiers();
420  }
421 
422  bool isSCSrcB16() const {
423  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
424  }
425 
426  bool isSCSrcV2B16() const {
427  return isSCSrcB16();
428  }
429 
430  bool isSCSrcB32() const {
431  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
432  }
433 
434  bool isSCSrcB64() const {
435  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
436  }
437 
438  bool isBoolReg() const;
439 
440  bool isSCSrcF16() const {
441  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
442  }
443 
444  bool isSCSrcV2F16() const {
445  return isSCSrcF16();
446  }
447 
448  bool isSCSrcF32() const {
449  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
450  }
451 
452  bool isSCSrcF64() const {
453  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
454  }
455 
456  bool isSSrcB32() const {
457  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
458  }
459 
460  bool isSSrcB16() const {
461  return isSCSrcB16() || isLiteralImm(MVT::i16);
462  }
463 
464  bool isSSrcV2B16() const {
465  llvm_unreachable("cannot happen");
466  return isSSrcB16();
467  }
468 
469  bool isSSrcB64() const {
470  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
471  // See isVSrc64().
472  return isSCSrcB64() || isLiteralImm(MVT::i64);
473  }
474 
475  bool isSSrcF32() const {
476  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
477  }
478 
479  bool isSSrcF64() const {
480  return isSCSrcB64() || isLiteralImm(MVT::f64);
481  }
482 
483  bool isSSrcF16() const {
484  return isSCSrcB16() || isLiteralImm(MVT::f16);
485  }
486 
487  bool isSSrcV2F16() const {
488  llvm_unreachable("cannot happen");
489  return isSSrcF16();
490  }
491 
492  bool isSSrcV2FP32() const {
493  llvm_unreachable("cannot happen");
494  return isSSrcF32();
495  }
496 
497  bool isSCSrcV2FP32() const {
498  llvm_unreachable("cannot happen");
499  return isSCSrcF32();
500  }
501 
502  bool isSSrcV2INT32() const {
503  llvm_unreachable("cannot happen");
504  return isSSrcB32();
505  }
506 
507  bool isSCSrcV2INT32() const {
508  llvm_unreachable("cannot happen");
509  return isSCSrcB32();
510  }
511 
512  bool isSSrcOrLdsB32() const {
513  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
514  isLiteralImm(MVT::i32) || isExpr();
515  }
516 
517  bool isVCSrcB32() const {
518  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
519  }
520 
521  bool isVCSrcB64() const {
522  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
523  }
524 
525  bool isVCSrcB16() const {
526  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
527  }
528 
529  bool isVCSrcV2B16() const {
530  return isVCSrcB16();
531  }
532 
533  bool isVCSrcF32() const {
534  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
535  }
536 
537  bool isVCSrcF64() const {
538  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
539  }
540 
541  bool isVCSrcF16() const {
542  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
543  }
544 
545  bool isVCSrcV2F16() const {
546  return isVCSrcF16();
547  }
548 
549  bool isVSrcB32() const {
550  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
551  }
552 
553  bool isVSrcB64() const {
554  return isVCSrcF64() || isLiteralImm(MVT::i64);
555  }
556 
557  bool isVSrcB16() const {
558  return isVCSrcB16() || isLiteralImm(MVT::i16);
559  }
560 
561  bool isVSrcV2B16() const {
562  return isVSrcB16() || isLiteralImm(MVT::v2i16);
563  }
564 
565  bool isVCSrcV2FP32() const {
566  return isVCSrcF64();
567  }
568 
569  bool isVSrcV2FP32() const {
570  return isVSrcF64() || isLiteralImm(MVT::v2f32);
571  }
572 
573  bool isVCSrcV2INT32() const {
574  return isVCSrcB64();
575  }
576 
577  bool isVSrcV2INT32() const {
578  return isVSrcB64() || isLiteralImm(MVT::v2i32);
579  }
580 
581  bool isVSrcF32() const {
582  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
583  }
584 
585  bool isVSrcF64() const {
586  return isVCSrcF64() || isLiteralImm(MVT::f64);
587  }
588 
589  bool isVSrcF16() const {
590  return isVCSrcF16() || isLiteralImm(MVT::f16);
591  }
592 
593  bool isVSrcV2F16() const {
594  return isVSrcF16() || isLiteralImm(MVT::v2f16);
595  }
596 
597  bool isVISrcB32() const {
598  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
599  }
600 
601  bool isVISrcB16() const {
602  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
603  }
604 
605  bool isVISrcV2B16() const {
606  return isVISrcB16();
607  }
608 
609  bool isVISrcF32() const {
610  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
611  }
612 
613  bool isVISrcF16() const {
614  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
615  }
616 
617  bool isVISrcV2F16() const {
618  return isVISrcF16() || isVISrcB32();
619  }
620 
621  bool isVISrc_64B64() const {
622  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
623  }
624 
625  bool isVISrc_64F64() const {
626  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
627  }
628 
629  bool isVISrc_64V2FP32() const {
630  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
631  }
632 
633  bool isVISrc_64V2INT32() const {
634  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
635  }
636 
637  bool isVISrc_256B64() const {
638  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
639  }
640 
641  bool isVISrc_256F64() const {
642  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
643  }
644 
645  bool isVISrc_128B16() const {
646  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
647  }
648 
649  bool isVISrc_128V2B16() const {
650  return isVISrc_128B16();
651  }
652 
653  bool isVISrc_128B32() const {
654  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
655  }
656 
657  bool isVISrc_128F32() const {
658  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
659  }
660 
661  bool isVISrc_256V2FP32() const {
662  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
663  }
664 
665  bool isVISrc_256V2INT32() const {
666  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
667  }
668 
669  bool isVISrc_512B32() const {
670  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
671  }
672 
673  bool isVISrc_512B16() const {
674  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
675  }
676 
677  bool isVISrc_512V2B16() const {
678  return isVISrc_512B16();
679  }
680 
681  bool isVISrc_512F32() const {
682  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
683  }
684 
685  bool isVISrc_512F16() const {
686  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
687  }
688 
689  bool isVISrc_512V2F16() const {
690  return isVISrc_512F16() || isVISrc_512B32();
691  }
692 
693  bool isVISrc_1024B32() const {
694  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
695  }
696 
697  bool isVISrc_1024B16() const {
698  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
699  }
700 
701  bool isVISrc_1024V2B16() const {
702  return isVISrc_1024B16();
703  }
704 
705  bool isVISrc_1024F32() const {
706  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
707  }
708 
709  bool isVISrc_1024F16() const {
710  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
711  }
712 
713  bool isVISrc_1024V2F16() const {
714  return isVISrc_1024F16() || isVISrc_1024B32();
715  }
716 
717  bool isAISrcB32() const {
718  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
719  }
720 
721  bool isAISrcB16() const {
722  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
723  }
724 
725  bool isAISrcV2B16() const {
726  return isAISrcB16();
727  }
728 
729  bool isAISrcF32() const {
730  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
731  }
732 
733  bool isAISrcF16() const {
734  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
735  }
736 
737  bool isAISrcV2F16() const {
738  return isAISrcF16() || isAISrcB32();
739  }
740 
741  bool isAISrc_64B64() const {
742  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
743  }
744 
745  bool isAISrc_64F64() const {
746  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
747  }
748 
749  bool isAISrc_128B32() const {
750  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
751  }
752 
753  bool isAISrc_128B16() const {
754  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
755  }
756 
757  bool isAISrc_128V2B16() const {
758  return isAISrc_128B16();
759  }
760 
761  bool isAISrc_128F32() const {
762  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
763  }
764 
765  bool isAISrc_128F16() const {
766  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
767  }
768 
769  bool isAISrc_128V2F16() const {
770  return isAISrc_128F16() || isAISrc_128B32();
771  }
772 
773  bool isVISrc_128F16() const {
774  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
775  }
776 
777  bool isVISrc_128V2F16() const {
778  return isVISrc_128F16() || isVISrc_128B32();
779  }
780 
781  bool isAISrc_256B64() const {
782  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
783  }
784 
785  bool isAISrc_256F64() const {
786  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
787  }
788 
789  bool isAISrc_512B32() const {
790  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
791  }
792 
793  bool isAISrc_512B16() const {
794  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
795  }
796 
797  bool isAISrc_512V2B16() const {
798  return isAISrc_512B16();
799  }
800 
801  bool isAISrc_512F32() const {
802  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
803  }
804 
805  bool isAISrc_512F16() const {
806  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
807  }
808 
809  bool isAISrc_512V2F16() const {
810  return isAISrc_512F16() || isAISrc_512B32();
811  }
812 
813  bool isAISrc_1024B32() const {
814  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
815  }
816 
817  bool isAISrc_1024B16() const {
818  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
819  }
820 
821  bool isAISrc_1024V2B16() const {
822  return isAISrc_1024B16();
823  }
824 
825  bool isAISrc_1024F32() const {
826  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
827  }
828 
829  bool isAISrc_1024F16() const {
830  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
831  }
832 
833  bool isAISrc_1024V2F16() const {
834  return isAISrc_1024F16() || isAISrc_1024B32();
835  }
836 
837  bool isKImmFP32() const {
838  return isLiteralImm(MVT::f32);
839  }
840 
841  bool isKImmFP16() const {
842  return isLiteralImm(MVT::f16);
843  }
844 
845  bool isMem() const override {
846  return false;
847  }
848 
849  bool isExpr() const {
850  return Kind == Expression;
851  }
852 
853  bool isSoppBrTarget() const {
854  return isExpr() || isImm();
855  }
856 
857  bool isSWaitCnt() const;
858  bool isDepCtr() const;
859  bool isSDelayAlu() const;
860  bool isHwreg() const;
861  bool isSendMsg() const;
862  bool isSwizzle() const;
863  bool isSMRDOffset8() const;
864  bool isSMEMOffset() const;
865  bool isSMRDLiteralOffset() const;
866  bool isDPP8() const;
867  bool isDPPCtrl() const;
868  bool isBLGP() const;
869  bool isCBSZ() const;
870  bool isABID() const;
871  bool isGPRIdxMode() const;
872  bool isS16Imm() const;
873  bool isU16Imm() const;
874  bool isEndpgm() const;
875  bool isWaitVDST() const;
876  bool isWaitEXP() const;
877 
878  StringRef getExpressionAsToken() const {
879  assert(isExpr());
880  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
881  return S->getSymbol().getName();
882  }
883 
884  StringRef getToken() const {
885  assert(isToken());
886 
887  if (Kind == Expression)
888  return getExpressionAsToken();
889 
890  return StringRef(Tok.Data, Tok.Length);
891  }
892 
893  int64_t getImm() const {
894  assert(isImm());
895  return Imm.Val;
896  }
897 
898  void setImm(int64_t Val) {
899  assert(isImm());
900  Imm.Val = Val;
901  }
902 
903  ImmTy getImmTy() const {
904  assert(isImm());
905  return Imm.Type;
906  }
907 
908  unsigned getReg() const override {
909  assert(isRegKind());
910  return Reg.RegNo;
911  }
912 
913  SMLoc getStartLoc() const override {
914  return StartLoc;
915  }
916 
917  SMLoc getEndLoc() const override {
918  return EndLoc;
919  }
920 
921  SMRange getLocRange() const {
922  return SMRange(StartLoc, EndLoc);
923  }
924 
925  Modifiers getModifiers() const {
926  assert(isRegKind() || isImmTy(ImmTyNone));
927  return isRegKind() ? Reg.Mods : Imm.Mods;
928  }
929 
930  void setModifiers(Modifiers Mods) {
931  assert(isRegKind() || isImmTy(ImmTyNone));
932  if (isRegKind())
933  Reg.Mods = Mods;
934  else
935  Imm.Mods = Mods;
936  }
937 
938  bool hasModifiers() const {
939  return getModifiers().hasModifiers();
940  }
941 
942  bool hasFPModifiers() const {
943  return getModifiers().hasFPModifiers();
944  }
945 
946  bool hasIntModifiers() const {
947  return getModifiers().hasIntModifiers();
948  }
949 
950  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
951 
952  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
953 
954  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
955 
956  template <unsigned Bitwidth>
957  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
958 
959  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
960  addKImmFPOperands<16>(Inst, N);
961  }
962 
963  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
964  addKImmFPOperands<32>(Inst, N);
965  }
966 
967  void addRegOperands(MCInst &Inst, unsigned N) const;
968 
969  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
970  addRegOperands(Inst, N);
971  }
972 
973  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
974  if (isRegKind())
975  addRegOperands(Inst, N);
976  else if (isExpr())
977  Inst.addOperand(MCOperand::createExpr(Expr));
978  else
979  addImmOperands(Inst, N);
980  }
981 
982  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
983  Modifiers Mods = getModifiers();
984  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
985  if (isRegKind()) {
986  addRegOperands(Inst, N);
987  } else {
988  addImmOperands(Inst, N, false);
989  }
990  }
991 
992  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993  assert(!hasIntModifiers());
994  addRegOrImmWithInputModsOperands(Inst, N);
995  }
996 
997  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998  assert(!hasFPModifiers());
999  addRegOrImmWithInputModsOperands(Inst, N);
1000  }
1001 
1002  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1003  Modifiers Mods = getModifiers();
1004  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1005  assert(isRegKind());
1006  addRegOperands(Inst, N);
1007  }
1008 
1009  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1010  assert(!hasIntModifiers());
1011  addRegWithInputModsOperands(Inst, N);
1012  }
1013 
1014  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1015  assert(!hasFPModifiers());
1016  addRegWithInputModsOperands(Inst, N);
1017  }
1018 
1019  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1020  if (isImm())
1021  addImmOperands(Inst, N);
1022  else {
1023  assert(isExpr());
1024  Inst.addOperand(MCOperand::createExpr(Expr));
1025  }
1026  }
1027 
1028  static void printImmTy(raw_ostream& OS, ImmTy Type) {
1029  switch (Type) {
1030  case ImmTyNone: OS << "None"; break;
1031  case ImmTyGDS: OS << "GDS"; break;
1032  case ImmTyLDS: OS << "LDS"; break;
1033  case ImmTyOffen: OS << "Offen"; break;
1034  case ImmTyIdxen: OS << "Idxen"; break;
1035  case ImmTyAddr64: OS << "Addr64"; break;
1036  case ImmTyOffset: OS << "Offset"; break;
1037  case ImmTyInstOffset: OS << "InstOffset"; break;
1038  case ImmTyOffset0: OS << "Offset0"; break;
1039  case ImmTyOffset1: OS << "Offset1"; break;
1040  case ImmTyCPol: OS << "CPol"; break;
1041  case ImmTySWZ: OS << "SWZ"; break;
1042  case ImmTyTFE: OS << "TFE"; break;
1043  case ImmTyD16: OS << "D16"; break;
1044  case ImmTyFORMAT: OS << "FORMAT"; break;
1045  case ImmTyClampSI: OS << "ClampSI"; break;
1046  case ImmTyOModSI: OS << "OModSI"; break;
1047  case ImmTyDPP8: OS << "DPP8"; break;
1048  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1049  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1050  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1051  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1052  case ImmTyDppFi: OS << "FI"; break;
1053  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1054  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1055  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1056  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1057  case ImmTyDMask: OS << "DMask"; break;
1058  case ImmTyDim: OS << "Dim"; break;
1059  case ImmTyUNorm: OS << "UNorm"; break;
1060  case ImmTyDA: OS << "DA"; break;
1061  case ImmTyR128A16: OS << "R128A16"; break;
1062  case ImmTyA16: OS << "A16"; break;
1063  case ImmTyLWE: OS << "LWE"; break;
1064  case ImmTyOff: OS << "Off"; break;
1065  case ImmTyExpTgt: OS << "ExpTgt"; break;
1066  case ImmTyExpCompr: OS << "ExpCompr"; break;
1067  case ImmTyExpVM: OS << "ExpVM"; break;
1068  case ImmTyHwreg: OS << "Hwreg"; break;
1069  case ImmTySendMsg: OS << "SendMsg"; break;
1070  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1071  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1072  case ImmTyAttrChan: OS << "AttrChan"; break;
1073  case ImmTyOpSel: OS << "OpSel"; break;
1074  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1075  case ImmTyNegLo: OS << "NegLo"; break;
1076  case ImmTyNegHi: OS << "NegHi"; break;
1077  case ImmTySwizzle: OS << "Swizzle"; break;
1078  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1079  case ImmTyHigh: OS << "High"; break;
1080  case ImmTyBLGP: OS << "BLGP"; break;
1081  case ImmTyCBSZ: OS << "CBSZ"; break;
1082  case ImmTyABID: OS << "ABID"; break;
1083  case ImmTyEndpgm: OS << "Endpgm"; break;
1084  case ImmTyWaitVDST: OS << "WaitVDST"; break;
1085  case ImmTyWaitEXP: OS << "WaitEXP"; break;
1086  }
1087  }
1088 
1089  void print(raw_ostream &OS) const override {
1090  switch (Kind) {
1091  case Register:
1092  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1093  break;
1094  case Immediate:
1095  OS << '<' << getImm();
1096  if (getImmTy() != ImmTyNone) {
1097  OS << " type: "; printImmTy(OS, getImmTy());
1098  }
1099  OS << " mods: " << Imm.Mods << '>';
1100  break;
1101  case Token:
1102  OS << '\'' << getToken() << '\'';
1103  break;
1104  case Expression:
1105  OS << "<expr " << *Expr << '>';
1106  break;
1107  }
1108  }
1109 
1110  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1111  int64_t Val, SMLoc Loc,
1112  ImmTy Type = ImmTyNone,
1113  bool IsFPImm = false) {
1114  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1115  Op->Imm.Val = Val;
1116  Op->Imm.IsFPImm = IsFPImm;
1117  Op->Imm.Kind = ImmKindTyNone;
1118  Op->Imm.Type = Type;
1119  Op->Imm.Mods = Modifiers();
1120  Op->StartLoc = Loc;
1121  Op->EndLoc = Loc;
1122  return Op;
1123  }
1124 
1125  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1126  StringRef Str, SMLoc Loc,
1127  bool HasExplicitEncodingSize = true) {
1128  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1129  Res->Tok.Data = Str.data();
1130  Res->Tok.Length = Str.size();
1131  Res->StartLoc = Loc;
1132  Res->EndLoc = Loc;
1133  return Res;
1134  }
1135 
1136  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1137  unsigned RegNo, SMLoc S,
1138  SMLoc E) {
1139  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1140  Op->Reg.RegNo = RegNo;
1141  Op->Reg.Mods = Modifiers();
1142  Op->StartLoc = S;
1143  Op->EndLoc = E;
1144  return Op;
1145  }
1146 
1147  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1148  const class MCExpr *Expr, SMLoc S) {
1149  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1150  Op->Expr = Expr;
1151  Op->StartLoc = S;
1152  Op->EndLoc = S;
1153  return Op;
1154  }
1155 };
1156 
1157 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1158  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1159  return OS;
1160 }
1161 
1162 //===----------------------------------------------------------------------===//
1163 // AsmParser
1164 //===----------------------------------------------------------------------===//
1165 
1166 // Holds info related to the current kernel, e.g. count of SGPRs used.
1167 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1168 // .amdgpu_hsa_kernel or at EOF.
1169 class KernelScopeInfo {
1170  int SgprIndexUnusedMin = -1;
1171  int VgprIndexUnusedMin = -1;
1172  int AgprIndexUnusedMin = -1;
1173  MCContext *Ctx = nullptr;
1174  MCSubtargetInfo const *MSTI = nullptr;
1175 
1176  void usesSgprAt(int i) {
1177  if (i >= SgprIndexUnusedMin) {
1178  SgprIndexUnusedMin = ++i;
1179  if (Ctx) {
1180  MCSymbol* const Sym =
1181  Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1182  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1183  }
1184  }
1185  }
1186 
1187  void usesVgprAt(int i) {
1188  if (i >= VgprIndexUnusedMin) {
1189  VgprIndexUnusedMin = ++i;
1190  if (Ctx) {
1191  MCSymbol* const Sym =
1192  Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1193  int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1194  VgprIndexUnusedMin);
1195  Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1196  }
1197  }
1198  }
1199 
1200  void usesAgprAt(int i) {
1201  // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1202  if (!hasMAIInsts(*MSTI))
1203  return;
1204 
1205  if (i >= AgprIndexUnusedMin) {
1206  AgprIndexUnusedMin = ++i;
1207  if (Ctx) {
1208  MCSymbol* const Sym =
1209  Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1210  Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1211 
1212  // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1213  MCSymbol* const vSym =
1214  Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1215  int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1216  VgprIndexUnusedMin);
1217  vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1218  }
1219  }
1220  }
1221 
1222 public:
1223  KernelScopeInfo() = default;
1224 
1225  void initialize(MCContext &Context) {
1226  Ctx = &Context;
1227  MSTI = Ctx->getSubtargetInfo();
1228 
1229  usesSgprAt(SgprIndexUnusedMin = -1);
1230  usesVgprAt(VgprIndexUnusedMin = -1);
1231  if (hasMAIInsts(*MSTI)) {
1232  usesAgprAt(AgprIndexUnusedMin = -1);
1233  }
1234  }
1235 
1236  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1237  unsigned RegWidth) {
1238  switch (RegKind) {
1239  case IS_SGPR:
1240  usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1241  break;
1242  case IS_AGPR:
1243  usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1244  break;
1245  case IS_VGPR:
1246  usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1247  break;
1248  default:
1249  break;
1250  }
1251  }
1252 };
1253 
1254 class AMDGPUAsmParser : public MCTargetAsmParser {
1255  MCAsmParser &Parser;
1256 
1257  // Number of extra operands parsed after the first optional operand.
1258  // This may be necessary to skip hardcoded mandatory operands.
1259  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1260 
1261  unsigned ForcedEncodingSize = 0;
1262  bool ForcedDPP = false;
1263  bool ForcedSDWA = false;
1264  KernelScopeInfo KernelScope;
1265  unsigned CPolSeen;
1266 
1267  /// @name Auto-generated Match Functions
1268  /// {
1269 
1270 #define GET_ASSEMBLER_HEADER
1271 #include "AMDGPUGenAsmMatcher.inc"
1272 
1273  /// }
1274 
1275 private:
1276  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1277  bool OutOfRangeError(SMRange Range);
1278  /// Calculate VGPR/SGPR blocks required for given target, reserved
1279  /// registers, and user-specified NextFreeXGPR values.
1280  ///
1281  /// \param Features [in] Target features, used for bug corrections.
1282  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1283  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1284  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1285  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1286  /// descriptor field, if valid.
1287  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1288  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1289  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1290  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1291  /// \param VGPRBlocks [out] Result VGPR block count.
1292  /// \param SGPRBlocks [out] Result SGPR block count.
1293  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1294  bool FlatScrUsed, bool XNACKUsed,
1295  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1296  SMRange VGPRRange, unsigned NextFreeSGPR,
1297  SMRange SGPRRange, unsigned &VGPRBlocks,
1298  unsigned &SGPRBlocks);
1299  bool ParseDirectiveAMDGCNTarget();
1300  bool ParseDirectiveAMDHSAKernel();
1301  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1302  bool ParseDirectiveHSACodeObjectVersion();
1303  bool ParseDirectiveHSACodeObjectISA();
1304  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1305  bool ParseDirectiveAMDKernelCodeT();
1306  // TODO: Possibly make subtargetHasRegister const.
1307  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1308  bool ParseDirectiveAMDGPUHsaKernel();
1309 
1310  bool ParseDirectiveISAVersion();
1311  bool ParseDirectiveHSAMetadata();
1312  bool ParseDirectivePALMetadataBegin();
1313  bool ParseDirectivePALMetadata();
1314  bool ParseDirectiveAMDGPULDS();
1315 
1316  /// Common code to parse out a block of text (typically YAML) between start and
1317  /// end directives.
1318  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1319  const char *AssemblerDirectiveEnd,
1320  std::string &CollectString);
1321 
1322  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1323  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1324  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1325  unsigned &RegNum, unsigned &RegWidth,
1326  bool RestoreOnFailure = false);
1327  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1328  unsigned &RegNum, unsigned &RegWidth,
1329  SmallVectorImpl<AsmToken> &Tokens);
1330  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1331  unsigned &RegWidth,
1332  SmallVectorImpl<AsmToken> &Tokens);
1333  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1334  unsigned &RegWidth,
1335  SmallVectorImpl<AsmToken> &Tokens);
1336  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1337  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1338  bool ParseRegRange(unsigned& Num, unsigned& Width);
1339  unsigned getRegularReg(RegisterKind RegKind,
1340  unsigned RegNum,
1341  unsigned RegWidth,
1342  SMLoc Loc);
1343 
1344  bool isRegister();
1345  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1346  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1347  void initializeGprCountSymbol(RegisterKind RegKind);
1348  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1349  unsigned RegWidth);
1350  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1351  bool IsAtomic, bool IsLds = false);
1352  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1353  bool IsGdsHardcoded);
1354 
1355 public:
1356  enum AMDGPUMatchResultTy {
1357  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1358  };
1359  enum OperandMode {
1360  OperandMode_Default,
1361  OperandMode_NSA,
1362  };
1363 
1364  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1365 
1366  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1367  const MCInstrInfo &MII,
1368  const MCTargetOptions &Options)
1369  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1370  MCAsmParserExtension::Initialize(Parser);
1371 
1372  if (getFeatureBits().none()) {
1373  // Set default features.
1374  copySTI().ToggleFeature("southern-islands");
1375  }
1376 
1377  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1378 
1379  {
1380  // TODO: make those pre-defined variables read-only.
1381  // Currently there is none suitable machinery in the core llvm-mc for this.
1382  // MCSymbol::isRedefinable is intended for another purpose, and
1383  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1384  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1385  MCContext &Ctx = getContext();
1386  if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387  MCSymbol *Sym =
1388  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1389  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1390  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1391  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1392  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1393  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1394  } else {
1395  MCSymbol *Sym =
1396  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1397  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1398  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1399  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1400  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1401  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1402  }
1403  if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1404  initializeGprCountSymbol(IS_VGPR);
1405  initializeGprCountSymbol(IS_SGPR);
1406  } else
1407  KernelScope.initialize(getContext());
1408  }
1409  }
1410 
1411  bool hasMIMG_R128() const {
1412  return AMDGPU::hasMIMG_R128(getSTI());
1413  }
1414 
1415  bool hasPackedD16() const {
1416  return AMDGPU::hasPackedD16(getSTI());
1417  }
1418 
1419  bool hasGFX10A16() const {
1420  return AMDGPU::hasGFX10A16(getSTI());
1421  }
1422 
1423  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1424 
1425  bool isSI() const {
1426  return AMDGPU::isSI(getSTI());
1427  }
1428 
1429  bool isCI() const {
1430  return AMDGPU::isCI(getSTI());
1431  }
1432 
1433  bool isVI() const {
1434  return AMDGPU::isVI(getSTI());
1435  }
1436 
1437  bool isGFX9() const {
1438  return AMDGPU::isGFX9(getSTI());
1439  }
1440 
1441  // TODO: isGFX90A is also true for GFX940. We need to clean it.
1442  bool isGFX90A() const {
1443  return AMDGPU::isGFX90A(getSTI());
1444  }
1445 
1446  bool isGFX940() const {
1447  return AMDGPU::isGFX940(getSTI());
1448  }
1449 
1450  bool isGFX9Plus() const {
1451  return AMDGPU::isGFX9Plus(getSTI());
1452  }
1453 
1454  bool isGFX10() const {
1455  return AMDGPU::isGFX10(getSTI());
1456  }
1457 
1458  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1459 
1460  bool isGFX11() const {
1461  return AMDGPU::isGFX11(getSTI());
1462  }
1463 
1464  bool isGFX11Plus() const {
1465  return AMDGPU::isGFX11Plus(getSTI());
1466  }
1467 
1468  bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1469 
1470  bool isGFX10_BEncoding() const {
1471  return AMDGPU::isGFX10_BEncoding(getSTI());
1472  }
1473 
1474  bool hasInv2PiInlineImm() const {
1475  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1476  }
1477 
1478  bool hasFlatOffsets() const {
1479  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1480  }
1481 
1482  bool hasArchitectedFlatScratch() const {
1483  return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1484  }
1485 
1486  bool hasSGPR102_SGPR103() const {
1487  return !isVI() && !isGFX9();
1488  }
1489 
1490  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1491 
1492  bool hasIntClamp() const {
1493  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1494  }
1495 
1496  AMDGPUTargetStreamer &getTargetStreamer() {
1497  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1498  return static_cast<AMDGPUTargetStreamer &>(TS);
1499  }
1500 
1501  const MCRegisterInfo *getMRI() const {
1502  // We need this const_cast because for some reason getContext() is not const
1503  // in MCAsmParser.
1504  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1505  }
1506 
1507  const MCInstrInfo *getMII() const {
1508  return &MII;
1509  }
1510 
1511  const FeatureBitset &getFeatureBits() const {
1512  return getSTI().getFeatureBits();
1513  }
1514 
1515  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1516  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1517  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1518 
1519  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1520  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1521  bool isForcedDPP() const { return ForcedDPP; }
1522  bool isForcedSDWA() const { return ForcedSDWA; }
1523  ArrayRef<unsigned> getMatchedVariants() const;
1524  StringRef getMatchedVariantName() const;
1525 
1526  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1527  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1528  bool RestoreOnFailure);
1529  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1530  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1531  SMLoc &EndLoc) override;
1532  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1533  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1534  unsigned Kind) override;
1535  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1536  OperandVector &Operands, MCStreamer &Out,
1537  uint64_t &ErrorInfo,
1538  bool MatchingInlineAsm) override;
1539  bool ParseDirective(AsmToken DirectiveID) override;
1540  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1541  OperandMode Mode = OperandMode_Default);
1542  StringRef parseMnemonicSuffix(StringRef Name);
1543  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1544  SMLoc NameLoc, OperandVector &Operands) override;
1545  //bool ProcessInstruction(MCInst &Inst);
1546 
1547  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1548 
1549  OperandMatchResultTy
1550  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1551  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1552  bool (*ConvertResult)(int64_t &) = nullptr);
1553 
1554  OperandMatchResultTy
1555  parseOperandArrayWithPrefix(const char *Prefix,
1556  OperandVector &Operands,
1557  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1558  bool (*ConvertResult)(int64_t&) = nullptr);
1559 
1560  OperandMatchResultTy
1561  parseNamedBit(StringRef Name, OperandVector &Operands,
1562  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1563  OperandMatchResultTy parseCPol(OperandVector &Operands);
1564  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1565  StringRef &Value,
1566  SMLoc &StringLoc);
1567 
1568  bool isModifier();
1569  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1570  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1571  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1572  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1573  bool parseSP3NegModifier();
1574  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1575  OperandMatchResultTy parseReg(OperandVector &Operands);
1576  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1577  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1578  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1579  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1580  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1581  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1582  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1583  OperandMatchResultTy parseUfmt(int64_t &Format);
1584  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1585  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1586  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1587  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1588  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1589  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1590  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1591 
1592  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1593  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1594  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1595  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1596 
1597  bool parseCnt(int64_t &IntVal);
1598  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1599 
1600  bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1601  void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1602  OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1603 
1604  bool parseDelay(int64_t &Delay);
1605  OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1606 
1607  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1608 
1609 private:
1610  struct OperandInfoTy {
1611  SMLoc Loc;
1612  int64_t Id;
1613  bool IsSymbolic = false;
1614  bool IsDefined = false;
1615 
1616  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1617  };
1618 
1619  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1620  bool validateSendMsg(const OperandInfoTy &Msg,
1621  const OperandInfoTy &Op,
1622  const OperandInfoTy &Stream);
1623 
1624  bool parseHwregBody(OperandInfoTy &HwReg,
1625  OperandInfoTy &Offset,
1626  OperandInfoTy &Width);
1627  bool validateHwreg(const OperandInfoTy &HwReg,
1628  const OperandInfoTy &Offset,
1629  const OperandInfoTy &Width);
1630 
1631  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1632  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1633  SMLoc getBLGPLoc(const OperandVector &Operands) const;
1634 
1635  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1636  const OperandVector &Operands) const;
1637  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1638  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1639  SMLoc getLitLoc(const OperandVector &Operands,
1640  bool SearchMandatoryLiterals = false) const;
1641  SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1642  SMLoc getConstLoc(const OperandVector &Operands) const;
1643  SMLoc getInstLoc(const OperandVector &Operands) const;
1644 
1645  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1646  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1647  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1648  bool validateSOPLiteral(const MCInst &Inst) const;
1649  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1650  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1651  bool validateIntClampSupported(const MCInst &Inst);
1652  bool validateMIMGAtomicDMask(const MCInst &Inst);
1653  bool validateMIMGGatherDMask(const MCInst &Inst);
1654  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1655  bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1656  bool validateMIMGAddrSize(const MCInst &Inst);
1657  bool validateMIMGD16(const MCInst &Inst);
1658  bool validateMIMGDim(const MCInst &Inst);
1659  bool validateMIMGMSAA(const MCInst &Inst);
1660  bool validateOpSel(const MCInst &Inst);
1661  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1662  bool validateVccOperand(unsigned Reg) const;
1663  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1664  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1665  bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1666  bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1667  bool validateAGPRLdSt(const MCInst &Inst) const;
1668  bool validateVGPRAlign(const MCInst &Inst) const;
1669  bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1670  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1671  bool validateDivScale(const MCInst &Inst);
1672  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1673  const SMLoc &IDLoc);
1674  bool validateLdsDMA(uint64_t Enc, const MCInst &Inst,
1675  const OperandVector &Operands, const SMLoc &IDLoc);
1676  bool validateExeczVcczOperands(const OperandVector &Operands);
1677  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1678  unsigned getConstantBusLimit(unsigned Opcode) const;
1679  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1680  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1681  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1682 
1683  bool isSupportedMnemo(StringRef Mnemo,
1684  const FeatureBitset &FBS);
1685  bool isSupportedMnemo(StringRef Mnemo,
1686  const FeatureBitset &FBS,
1687  ArrayRef<unsigned> Variants);
1688  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1689 
1690  bool isId(const StringRef Id) const;
1691  bool isId(const AsmToken &Token, const StringRef Id) const;
1692  bool isToken(const AsmToken::TokenKind Kind) const;
1693  bool trySkipId(const StringRef Id);
1694  bool trySkipId(const StringRef Pref, const StringRef Id);
1695  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1696  bool trySkipToken(const AsmToken::TokenKind Kind);
1697  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1698  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1699  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1700 
1701  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1702  AsmToken::TokenKind getTokenKind() const;
1703  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1704  bool parseExpr(OperandVector &Operands);
1705  StringRef getTokenStr() const;
1706  AsmToken peekToken(bool ShouldSkipSpace = true);
1707  AsmToken getToken() const;
1708  SMLoc getLoc() const;
1709  void lex();
1710 
1711 public:
1712  void onBeginOfFile() override;
1713 
1714  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1715  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1716 
1717  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1718  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1719  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1720  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1721  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1722  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1723 
1724  bool parseSwizzleOperand(int64_t &Op,
1725  const unsigned MinVal,
1726  const unsigned MaxVal,
1727  const StringRef ErrMsg,
1728  SMLoc &Loc);
1729  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1730  const unsigned MinVal,
1731  const unsigned MaxVal,
1732  const StringRef ErrMsg);
1733  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1734  bool parseSwizzleOffset(int64_t &Imm);
1735  bool parseSwizzleMacro(int64_t &Imm);
1736  bool parseSwizzleQuadPerm(int64_t &Imm);
1737  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1738  bool parseSwizzleBroadcast(int64_t &Imm);
1739  bool parseSwizzleSwap(int64_t &Imm);
1740  bool parseSwizzleReverse(int64_t &Imm);
1741 
1742  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1743  int64_t parseGPRIdxMacro();
1744 
1745  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1746  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1747  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1748  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1749 
1750  AMDGPUOperand::Ptr defaultCPol() const;
1751 
1752  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1753  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1754  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1755  AMDGPUOperand::Ptr defaultFlatOffset() const;
1756 
1757  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1758 
1759  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1760  OptionalImmIndexMap &OptionalIdx);
1761  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1762  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1763  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1764  void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1765  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1766  OptionalImmIndexMap &OptionalIdx);
1767  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1768  OptionalImmIndexMap &OptionalIdx);
1769 
1770  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1771  void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1772 
1773  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1774  bool IsAtomic = false);
1775  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1776  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1777 
1778  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1779 
1780  bool parseDimId(unsigned &Encoding);
1781  OperandMatchResultTy parseDim(OperandVector &Operands);
1782  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1783  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1784  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1785  int64_t parseDPPCtrlSel(StringRef Ctrl);
1786  int64_t parseDPPCtrlPerm();
1787  AMDGPUOperand::Ptr defaultRowMask() const;
1788  AMDGPUOperand::Ptr defaultBankMask() const;
1789  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1790  AMDGPUOperand::Ptr defaultFI() const;
1791  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1792  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1793  cvtDPP(Inst, Operands, true);
1794  }
1795  void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1796  bool IsDPP8 = false);
1797  void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1798  cvtVOP3DPP(Inst, Operands, true);
1799  }
1800 
1801  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1802  AMDGPUOperand::ImmTy Type);
1803  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1804  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1805  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1806  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1807  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1808  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1809  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1810  uint64_t BasicInstType,
1811  bool SkipDstVcc = false,
1812  bool SkipSrcVcc = false);
1813 
1814  AMDGPUOperand::Ptr defaultBLGP() const;
1815  AMDGPUOperand::Ptr defaultCBSZ() const;
1816  AMDGPUOperand::Ptr defaultABID() const;
1817 
1818  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1819  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1820 
1821  AMDGPUOperand::Ptr defaultWaitVDST() const;
1822  AMDGPUOperand::Ptr defaultWaitEXP() const;
1823  OperandMatchResultTy parseVOPD(OperandVector &Operands);
1824 };
1825 
1826 struct OptionalOperand {
1827  const char *Name;
1828  AMDGPUOperand::ImmTy Type;
1829  bool IsBit;
1830  bool (*ConvertResult)(int64_t&);
1831 };
1832 
1833 } // end anonymous namespace
1834 
1835 // May be called with integer type with equivalent bitwidth.
1836 static const fltSemantics *getFltSemantics(unsigned Size) {
1837  switch (Size) {
1838  case 4:
1839  return &APFloat::IEEEsingle();
1840  case 8:
1841  return &APFloat::IEEEdouble();
1842  case 2:
1843  return &APFloat::IEEEhalf();
1844  default:
1845  llvm_unreachable("unsupported fp type");
1846  }
1847 }
1848 
1849 static const fltSemantics *getFltSemantics(MVT VT) {
1850  return getFltSemantics(VT.getSizeInBits() / 8);
1851 }
1852 
1853 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1854  switch (OperandType) {
1855  case AMDGPU::OPERAND_REG_IMM_INT32:
1856  case AMDGPU::OPERAND_REG_IMM_FP32:
1857  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1858  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1859  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1860  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1861  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1862  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1863  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1864  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1865  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1866  case AMDGPU::OPERAND_KIMM32:
1867  return &APFloat::IEEEsingle();
1868  case AMDGPU::OPERAND_REG_IMM_INT64:
1869  case AMDGPU::OPERAND_REG_IMM_FP64:
1870  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1871  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1872  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1873  return &APFloat::IEEEdouble();
1874  case AMDGPU::OPERAND_REG_IMM_INT16:
1875  case AMDGPU::OPERAND_REG_IMM_FP16:
1876  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1877  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1878  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1879  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1880  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1881  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1882  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1883  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1884  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1885  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1886  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1887  case AMDGPU::OPERAND_KIMM16:
1888  return &APFloat::IEEEhalf();
1889  default:
1890  llvm_unreachable("unsupported fp type");
1891  }
1892 }
1893 
1894 //===----------------------------------------------------------------------===//
1895 // Operand
1896 //===----------------------------------------------------------------------===//
1897 
1898 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1899  bool Lost;
1900 
1901  // Convert literal to single precision
1902  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1903  APFloat::rmNearestTiesToEven,
1904  &Lost);
1905  // We allow precision lost but not overflow or underflow
1906  if (Status != APFloat::opOK &&
1907  Lost &&
1908  ((Status & APFloat::opOverflow) != 0 ||
1909  (Status & APFloat::opUnderflow) != 0)) {
1910  return false;
1911  }
1912 
1913  return true;
1914 }
1915 
1916 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1917  return isUIntN(Size, Val) || isIntN(Size, Val);
1918 }
1919 
1920 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1921  if (VT.getScalarType() == MVT::i16) {
1922  // FP immediate values are broken.
1923  return isInlinableIntLiteral(Val);
1924  }
1925 
1926  // f16/v2f16 operands work correctly for all values.
1927  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1928 }
1929 
1930 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1931 
1932  // This is a hack to enable named inline values like
1933  // shared_base with both 32-bit and 64-bit operands.
1934  // Note that these values are defined as
1935  // 32-bit operands only.
1936  if (isInlineValue()) {
1937  return true;
1938  }
1939 
1940  if (!isImmTy(ImmTyNone)) {
1941  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1942  return false;
1943  }
1944  // TODO: We should avoid using host float here. It would be better to
1945  // check the float bit values which is what a few other places do.
1946  // We've had bot failures before due to weird NaN support on mips hosts.
1947 
1948  APInt Literal(64, Imm.Val);
1949 
1950  if (Imm.IsFPImm) { // We got fp literal token
1951  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1952  return AMDGPU::isInlinableLiteral64(Imm.Val,
1953  AsmParser->hasInv2PiInlineImm());
1954  }
1955 
1956  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1957  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1958  return false;
1959 
1960  if (type.getScalarSizeInBits() == 16) {
1961  return isInlineableLiteralOp16(
1962  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1963  type, AsmParser->hasInv2PiInlineImm());
1964  }
1965 
1966  // Check if single precision literal is inlinable
1968  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1969  AsmParser->hasInv2PiInlineImm());
1970  }
1971 
1972  // We got int literal token.
1973  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1974  return AMDGPU::isInlinableLiteral64(Imm.Val,
1975  AsmParser->hasInv2PiInlineImm());
1976  }
1977 
1978  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1979  return false;
1980  }
1981 
1982  if (type.getScalarSizeInBits() == 16) {
1983  return isInlineableLiteralOp16(
1984  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1985  type, AsmParser->hasInv2PiInlineImm());
1986  }
1987 
1989  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1990  AsmParser->hasInv2PiInlineImm());
1991 }
1992 
1993 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1994  // Check that this immediate can be added as literal
1995  if (!isImmTy(ImmTyNone)) {
1996  return false;
1997  }
1998 
1999  if (!Imm.IsFPImm) {
2000  // We got int literal token.
2001 
2002  if (type == MVT::f64 && hasFPModifiers()) {
2003  // Cannot apply fp modifiers to int literals preserving the same semantics
2004  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2005  // disable these cases.
2006  return false;
2007  }
2008 
2009  unsigned Size = type.getSizeInBits();
2010  if (Size == 64)
2011  Size = 32;
2012 
2013  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2014  // types.
2015  return isSafeTruncation(Imm.Val, Size);
2016  }
2017 
2018  // We got fp literal token
2019  if (type == MVT::f64) { // Expected 64-bit fp operand
2020  // We would set low 64-bits of literal to zeroes but we accept this literals
2021  return true;
2022  }
2023 
2024  if (type == MVT::i64) { // Expected 64-bit int operand
2025  // We don't allow fp literals in 64-bit integer instructions. It is
2026  // unclear how we should encode them.
2027  return false;
2028  }
2029 
2030  // We allow fp literals with f16x2 operands assuming that the specified
2031  // literal goes into the lower half and the upper half is zero. We also
2032  // require that the literal may be losslessly converted to f16.
2033  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2034  (type == MVT::v2i16)? MVT::i16 :
2035  (type == MVT::v2f32)? MVT::f32 : type;
2036 
2037  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2038  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2039 }
2040 
2041 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2042  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2043 }
2044 
2045 bool AMDGPUOperand::isVRegWithInputMods() const {
2046  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2047  // GFX90A allows DPP on 64-bit operands.
2048  (isRegClass(AMDGPU::VReg_64RegClassID) &&
2049  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2050 }
2051 
2052 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2053  if (AsmParser->isVI())
2054  return isVReg32();
2055  else if (AsmParser->isGFX9Plus())
2056  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2057  else
2058  return false;
2059 }
2060 
2061 bool AMDGPUOperand::isSDWAFP16Operand() const {
2062  return isSDWAOperand(MVT::f16);
2063 }
2064 
2065 bool AMDGPUOperand::isSDWAFP32Operand() const {
2066  return isSDWAOperand(MVT::f32);
2067 }
2068 
2069 bool AMDGPUOperand::isSDWAInt16Operand() const {
2070  return isSDWAOperand(MVT::i16);
2071 }
2072 
2073 bool AMDGPUOperand::isSDWAInt32Operand() const {
2074  return isSDWAOperand(MVT::i32);
2075 }
2076 
2077 bool AMDGPUOperand::isBoolReg() const {
2078  auto FB = AsmParser->getFeatureBits();
2079  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2080  (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2081 }
2082 
2083 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2084 {
2085  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2086  assert(Size == 2 || Size == 4 || Size == 8);
2087 
2088  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2089 
2090  if (Imm.Mods.Abs) {
2091  Val &= ~FpSignMask;
2092  }
2093  if (Imm.Mods.Neg) {
2094  Val ^= FpSignMask;
2095  }
2096 
2097  return Val;
2098 }
2099 
2100 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2101  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2102  Inst.getNumOperands())) {
2103  addLiteralImmOperand(Inst, Imm.Val,
2104  ApplyModifiers &
2105  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2106  } else {
2107  assert(!isImmTy(ImmTyNone) || !hasModifiers());
2108  Inst.addOperand(MCOperand::createImm(Imm.Val));
2109  setImmKindNone();
2110  }
2111 }
2112 
2113 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2114  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2115  auto OpNum = Inst.getNumOperands();
2116  // Check that this operand accepts literals
2117  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2118 
2119  if (ApplyModifiers) {
2120  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2121  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2122  Val = applyInputFPModifiers(Val, Size);
2123  }
2124 
2125  APInt Literal(64, Val);
2126  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2127 
2128  if (Imm.IsFPImm) { // We got fp literal token
2129  switch (OpTy) {
2135  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2136  AsmParser->hasInv2PiInlineImm())) {
2137  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2138  setImmKindConst();
2139  return;
2140  }
2141 
2142  // Non-inlineable
2143  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2144  // For fp operands we check if low 32 bits are zeros
2145  if (Literal.getLoBits(32) != 0) {
2146  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2147  "Can't encode literal as exact 64-bit floating-point operand. "
2148  "Low 32-bits will be set to zero");
2149  }
2150 
2151  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2152  setImmKindLiteral();
2153  return;
2154  }
2155 
2156  // We don't allow fp literals in 64-bit integer instructions. It is
2157  // unclear how we should encode them. This case should be checked earlier
2158  // in predicate methods (isLiteralImm())
2159  llvm_unreachable("fp literal in 64-bit integer instruction.");
2160 
2186  case AMDGPU::OPERAND_KIMM16: {
2187  bool lost;
2188  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2189  // Convert literal to single precision
2190  FPLiteral.convert(*getOpFltSemantics(OpTy),
2192  // We allow precision lost but not overflow or underflow. This should be
2193  // checked earlier in isLiteralImm()
2194 
2195  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2196  Inst.addOperand(MCOperand::createImm(ImmVal));
2197  if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2198  setImmKindMandatoryLiteral();
2199  } else {
2200  setImmKindLiteral();
2201  }
2202  return;
2203  }
2204  default:
2205  llvm_unreachable("invalid operand size");
2206  }
2207 
2208  return;
2209  }
2210 
2211  // We got int literal token.
2212  // Only sign extend inline immediates.
2213  switch (OpTy) {
2227  if (isSafeTruncation(Val, 32) &&
2228  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2229  AsmParser->hasInv2PiInlineImm())) {
2230  Inst.addOperand(MCOperand::createImm(Val));
2231  setImmKindConst();
2232  return;
2233  }
2234 
2235  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2236  setImmKindLiteral();
2237  return;
2238 
2244  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2245  Inst.addOperand(MCOperand::createImm(Val));
2246  setImmKindConst();
2247  return;
2248  }
2249 
2251  setImmKindLiteral();
2252  return;
2253 
2261  if (isSafeTruncation(Val, 16) &&
2262  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2263  AsmParser->hasInv2PiInlineImm())) {
2264  Inst.addOperand(MCOperand::createImm(Val));
2265  setImmKindConst();
2266  return;
2267  }
2268 
2269  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2270  setImmKindLiteral();
2271  return;
2272 
2277  assert(isSafeTruncation(Val, 16));
2278  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2279  AsmParser->hasInv2PiInlineImm()));
2280 
2281  Inst.addOperand(MCOperand::createImm(Val));
2282  return;
2283  }
2285  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2286  setImmKindMandatoryLiteral();
2287  return;
2289  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2290  setImmKindMandatoryLiteral();
2291  return;
2292  default:
2293  llvm_unreachable("invalid operand size");
2294  }
2295 }
2296 
2297 template <unsigned Bitwidth>
2298 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2299  APInt Literal(64, Imm.Val);
2300  setImmKindMandatoryLiteral();
2301 
2302  if (!Imm.IsFPImm) {
2303  // We got int literal token.
2304  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2305  return;
2306  }
2307 
2308  bool Lost;
2309  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2310  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2312  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2313 }
2314 
2315 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2316  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2317 }
2318 
2319 static bool isInlineValue(unsigned Reg) {
2320  switch (Reg) {
2321  case AMDGPU::SRC_SHARED_BASE:
2322  case AMDGPU::SRC_SHARED_LIMIT:
2323  case AMDGPU::SRC_PRIVATE_BASE:
2324  case AMDGPU::SRC_PRIVATE_LIMIT:
2325  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2326  return true;
2327  case AMDGPU::SRC_VCCZ:
2328  case AMDGPU::SRC_EXECZ:
2329  case AMDGPU::SRC_SCC:
2330  return true;
2331  case AMDGPU::SGPR_NULL:
2332  return true;
2333  default:
2334  return false;
2335  }
2336 }
2337 
2338 bool AMDGPUOperand::isInlineValue() const {
2339  return isRegKind() && ::isInlineValue(getReg());
2340 }
2341 
2342 //===----------------------------------------------------------------------===//
2343 // AsmParser
2344 //===----------------------------------------------------------------------===//
2345 
2346 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2347  if (Is == IS_VGPR) {
2348  switch (RegWidth) {
2349  default: return -1;
2350  case 32:
2351  return AMDGPU::VGPR_32RegClassID;
2352  case 64:
2353  return AMDGPU::VReg_64RegClassID;
2354  case 96:
2355  return AMDGPU::VReg_96RegClassID;
2356  case 128:
2357  return AMDGPU::VReg_128RegClassID;
2358  case 160:
2359  return AMDGPU::VReg_160RegClassID;
2360  case 192:
2361  return AMDGPU::VReg_192RegClassID;
2362  case 224:
2363  return AMDGPU::VReg_224RegClassID;
2364  case 256:
2365  return AMDGPU::VReg_256RegClassID;
2366  case 512:
2367  return AMDGPU::VReg_512RegClassID;
2368  case 1024:
2369  return AMDGPU::VReg_1024RegClassID;
2370  }
2371  } else if (Is == IS_TTMP) {
2372  switch (RegWidth) {
2373  default: return -1;
2374  case 32:
2375  return AMDGPU::TTMP_32RegClassID;
2376  case 64:
2377  return AMDGPU::TTMP_64RegClassID;
2378  case 128:
2379  return AMDGPU::TTMP_128RegClassID;
2380  case 256:
2381  return AMDGPU::TTMP_256RegClassID;
2382  case 512:
2383  return AMDGPU::TTMP_512RegClassID;
2384  }
2385  } else if (Is == IS_SGPR) {
2386  switch (RegWidth) {
2387  default: return -1;
2388  case 32:
2389  return AMDGPU::SGPR_32RegClassID;
2390  case 64:
2391  return AMDGPU::SGPR_64RegClassID;
2392  case 96:
2393  return AMDGPU::SGPR_96RegClassID;
2394  case 128:
2395  return AMDGPU::SGPR_128RegClassID;
2396  case 160:
2397  return AMDGPU::SGPR_160RegClassID;
2398  case 192:
2399  return AMDGPU::SGPR_192RegClassID;
2400  case 224:
2401  return AMDGPU::SGPR_224RegClassID;
2402  case 256:
2403  return AMDGPU::SGPR_256RegClassID;
2404  case 512:
2405  return AMDGPU::SGPR_512RegClassID;
2406  }
2407  } else if (Is == IS_AGPR) {
2408  switch (RegWidth) {
2409  default: return -1;
2410  case 32:
2411  return AMDGPU::AGPR_32RegClassID;
2412  case 64:
2413  return AMDGPU::AReg_64RegClassID;
2414  case 96:
2415  return AMDGPU::AReg_96RegClassID;
2416  case 128:
2417  return AMDGPU::AReg_128RegClassID;
2418  case 160:
2419  return AMDGPU::AReg_160RegClassID;
2420  case 192:
2421  return AMDGPU::AReg_192RegClassID;
2422  case 224:
2423  return AMDGPU::AReg_224RegClassID;
2424  case 256:
2425  return AMDGPU::AReg_256RegClassID;
2426  case 512:
2427  return AMDGPU::AReg_512RegClassID;
2428  case 1024:
2429  return AMDGPU::AReg_1024RegClassID;
2430  }
2431  }
2432  return -1;
2433 }
2434 
2437  .Case("exec", AMDGPU::EXEC)
2438  .Case("vcc", AMDGPU::VCC)
2439  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2440  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2441  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2442  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2443  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2444  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2445  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2446  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2447  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2448  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2449  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2450  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2451  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2452  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2453  .Case("m0", AMDGPU::M0)
2454  .Case("vccz", AMDGPU::SRC_VCCZ)
2455  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2456  .Case("execz", AMDGPU::SRC_EXECZ)
2457  .Case("src_execz", AMDGPU::SRC_EXECZ)
2458  .Case("scc", AMDGPU::SRC_SCC)
2459  .Case("src_scc", AMDGPU::SRC_SCC)
2460  .Case("tba", AMDGPU::TBA)
2461  .Case("tma", AMDGPU::TMA)
2462  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2463  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2464  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2465  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2466  .Case("vcc_lo", AMDGPU::VCC_LO)
2467  .Case("vcc_hi", AMDGPU::VCC_HI)
2468  .Case("exec_lo", AMDGPU::EXEC_LO)
2469  .Case("exec_hi", AMDGPU::EXEC_HI)
2470  .Case("tma_lo", AMDGPU::TMA_LO)
2471  .Case("tma_hi", AMDGPU::TMA_HI)
2472  .Case("tba_lo", AMDGPU::TBA_LO)
2473  .Case("tba_hi", AMDGPU::TBA_HI)
2474  .Case("pc", AMDGPU::PC_REG)
2475  .Case("null", AMDGPU::SGPR_NULL)
2476  .Default(AMDGPU::NoRegister);
2477 }
2478 
2479 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2480  SMLoc &EndLoc, bool RestoreOnFailure) {
2481  auto R = parseRegister();
2482  if (!R) return true;
2483  assert(R->isReg());
2484  RegNo = R->getReg();
2485  StartLoc = R->getStartLoc();
2486  EndLoc = R->getEndLoc();
2487  return false;
2488 }
2489 
2490 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2491  SMLoc &EndLoc) {
2492  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2493 }
2494 
2495 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2496  SMLoc &StartLoc,
2497  SMLoc &EndLoc) {
2498  bool Result =
2499  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2500  bool PendingErrors = getParser().hasPendingError();
2501  getParser().clearPendingErrors();
2502  if (PendingErrors)
2503  return MatchOperand_ParseFail;
2504  if (Result)
2505  return MatchOperand_NoMatch;
2506  return MatchOperand_Success;
2507 }
2508 
2509 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2510  RegisterKind RegKind, unsigned Reg1,
2511  SMLoc Loc) {
2512  switch (RegKind) {
2513  case IS_SPECIAL:
2514  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2515  Reg = AMDGPU::EXEC;
2516  RegWidth = 64;
2517  return true;
2518  }
2519  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2520  Reg = AMDGPU::FLAT_SCR;
2521  RegWidth = 64;
2522  return true;
2523  }
2524  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2525  Reg = AMDGPU::XNACK_MASK;
2526  RegWidth = 64;
2527  return true;
2528  }
2529  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2530  Reg = AMDGPU::VCC;
2531  RegWidth = 64;
2532  return true;
2533  }
2534  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2535  Reg = AMDGPU::TBA;
2536  RegWidth = 64;
2537  return true;
2538  }
2539  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2540  Reg = AMDGPU::TMA;
2541  RegWidth = 64;
2542  return true;
2543  }
2544  Error(Loc, "register does not fit in the list");
2545  return false;
2546  case IS_VGPR:
2547  case IS_SGPR:
2548  case IS_AGPR:
2549  case IS_TTMP:
2550  if (Reg1 != Reg + RegWidth / 32) {
2551  Error(Loc, "registers in a list must have consecutive indices");
2552  return false;
2553  }
2554  RegWidth += 32;
2555  return true;
2556  default:
2557  llvm_unreachable("unexpected register kind");
2558  }
2559 }
2560 
2561 struct RegInfo {
2563  RegisterKind Kind;
2564 };
2565 
2566 static constexpr RegInfo RegularRegisters[] = {
2567  {{"v"}, IS_VGPR},
2568  {{"s"}, IS_SGPR},
2569  {{"ttmp"}, IS_TTMP},
2570  {{"acc"}, IS_AGPR},
2571  {{"a"}, IS_AGPR},
2572 };
2573 
2574 static bool isRegularReg(RegisterKind Kind) {
2575  return Kind == IS_VGPR ||
2576  Kind == IS_SGPR ||
2577  Kind == IS_TTMP ||
2578  Kind == IS_AGPR;
2579 }
2580 
2581 static const RegInfo* getRegularRegInfo(StringRef Str) {
2582  for (const RegInfo &Reg : RegularRegisters)
2583  if (Str.startswith(Reg.Name))
2584  return &Reg;
2585  return nullptr;
2586 }
2587 
2588 static bool getRegNum(StringRef Str, unsigned& Num) {
2589  return !Str.getAsInteger(10, Num);
2590 }
2591 
2592 bool
2593 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2594  const AsmToken &NextToken) const {
2595 
2596  // A list of consecutive registers: [s0,s1,s2,s3]
2597  if (Token.is(AsmToken::LBrac))
2598  return true;
2599 
2600  if (!Token.is(AsmToken::Identifier))
2601  return false;
2602 
2603  // A single register like s0 or a range of registers like s[0:1]
2604 
2605  StringRef Str = Token.getString();
2606  const RegInfo *Reg = getRegularRegInfo(Str);
2607  if (Reg) {
2608  StringRef RegName = Reg->Name;
2609  StringRef RegSuffix = Str.substr(RegName.size());
2610  if (!RegSuffix.empty()) {
2611  unsigned Num;
2612  // A single register with an index: rXX
2613  if (getRegNum(RegSuffix, Num))
2614  return true;
2615  } else {
2616  // A range of registers: r[XX:YY].
2617  if (NextToken.is(AsmToken::LBrac))
2618  return true;
2619  }
2620  }
2621 
2622  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2623 }
2624 
2625 bool
2626 AMDGPUAsmParser::isRegister()
2627 {
2628  return isRegister(getToken(), peekToken());
2629 }
2630 
2631 unsigned
2632 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2633  unsigned RegNum,
2634  unsigned RegWidth,
2635  SMLoc Loc) {
2636 
2637  assert(isRegularReg(RegKind));
2638 
2639  unsigned AlignSize = 1;
2640  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2641  // SGPR and TTMP registers must be aligned.
2642  // Max required alignment is 4 dwords.
2643  AlignSize = std::min(RegWidth / 32, 4u);
2644  }
2645 
2646  if (RegNum % AlignSize != 0) {
2647  Error(Loc, "invalid register alignment");
2648  return AMDGPU::NoRegister;
2649  }
2650 
2651  unsigned RegIdx = RegNum / AlignSize;
2652  int RCID = getRegClass(RegKind, RegWidth);
2653  if (RCID == -1) {
2654  Error(Loc, "invalid or unsupported register size");
2655  return AMDGPU::NoRegister;
2656  }
2657 
2658  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2659  const MCRegisterClass RC = TRI->getRegClass(RCID);
2660  if (RegIdx >= RC.getNumRegs()) {
2661  Error(Loc, "register index is out of range");
2662  return AMDGPU::NoRegister;
2663  }
2664 
2665  return RC.getRegister(RegIdx);
2666 }
2667 
2668 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2669  int64_t RegLo, RegHi;
2670  if (!skipToken(AsmToken::LBrac, "missing register index"))
2671  return false;
2672 
2673  SMLoc FirstIdxLoc = getLoc();
2674  SMLoc SecondIdxLoc;
2675 
2676  if (!parseExpr(RegLo))
2677  return false;
2678 
2679  if (trySkipToken(AsmToken::Colon)) {
2680  SecondIdxLoc = getLoc();
2681  if (!parseExpr(RegHi))
2682  return false;
2683  } else {
2684  RegHi = RegLo;
2685  }
2686 
2687  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2688  return false;
2689 
2690  if (!isUInt<32>(RegLo)) {
2691  Error(FirstIdxLoc, "invalid register index");
2692  return false;
2693  }
2694 
2695  if (!isUInt<32>(RegHi)) {
2696  Error(SecondIdxLoc, "invalid register index");
2697  return false;
2698  }
2699 
2700  if (RegLo > RegHi) {
2701  Error(FirstIdxLoc, "first register index should not exceed second index");
2702  return false;
2703  }
2704 
2705  Num = static_cast<unsigned>(RegLo);
2706  RegWidth = 32 * ((RegHi - RegLo) + 1);
2707  return true;
2708 }
2709 
2710 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2711  unsigned &RegNum, unsigned &RegWidth,
2712  SmallVectorImpl<AsmToken> &Tokens) {
2713  assert(isToken(AsmToken::Identifier));
2714  unsigned Reg = getSpecialRegForName(getTokenStr());
2715  if (Reg) {
2716  RegNum = 0;
2717  RegWidth = 32;
2718  RegKind = IS_SPECIAL;
2719  Tokens.push_back(getToken());
2720  lex(); // skip register name
2721  }
2722  return Reg;
2723 }
2724 
2725 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2726  unsigned &RegNum, unsigned &RegWidth,
2727  SmallVectorImpl<AsmToken> &Tokens) {
2728  assert(isToken(AsmToken::Identifier));
2729  StringRef RegName = getTokenStr();
2730  auto Loc = getLoc();
2731 
2732  const RegInfo *RI = getRegularRegInfo(RegName);
2733  if (!RI) {
2734  Error(Loc, "invalid register name");
2735  return AMDGPU::NoRegister;
2736  }
2737 
2738  Tokens.push_back(getToken());
2739  lex(); // skip register name
2740 
2741  RegKind = RI->Kind;
2742  StringRef RegSuffix = RegName.substr(RI->Name.size());
2743  if (!RegSuffix.empty()) {
2744  // Single 32-bit register: vXX.
2745  if (!getRegNum(RegSuffix, RegNum)) {
2746  Error(Loc, "invalid register index");
2747  return AMDGPU::NoRegister;
2748  }
2749  RegWidth = 32;
2750  } else {
2751  // Range of registers: v[XX:YY]. ":YY" is optional.
2752  if (!ParseRegRange(RegNum, RegWidth))
2753  return AMDGPU::NoRegister;
2754  }
2755 
2756  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2757 }
2758 
2759 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2760  unsigned &RegWidth,
2761  SmallVectorImpl<AsmToken> &Tokens) {
2762  unsigned Reg = AMDGPU::NoRegister;
2763  auto ListLoc = getLoc();
2764 
2765  if (!skipToken(AsmToken::LBrac,
2766  "expected a register or a list of registers")) {
2767  return AMDGPU::NoRegister;
2768  }
2769 
2770  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2771 
2772  auto Loc = getLoc();
2773  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2774  return AMDGPU::NoRegister;
2775  if (RegWidth != 32) {
2776  Error(Loc, "expected a single 32-bit register");
2777  return AMDGPU::NoRegister;
2778  }
2779 
2780  for (; trySkipToken(AsmToken::Comma); ) {
2781  RegisterKind NextRegKind;
2782  unsigned NextReg, NextRegNum, NextRegWidth;
2783  Loc = getLoc();
2784 
2785  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2786  NextRegNum, NextRegWidth,
2787  Tokens)) {
2788  return AMDGPU::NoRegister;
2789  }
2790  if (NextRegWidth != 32) {
2791  Error(Loc, "expected a single 32-bit register");
2792  return AMDGPU::NoRegister;
2793  }
2794  if (NextRegKind != RegKind) {
2795  Error(Loc, "registers in a list must be of the same kind");
2796  return AMDGPU::NoRegister;
2797  }
2798  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2799  return AMDGPU::NoRegister;
2800  }
2801 
2802  if (!skipToken(AsmToken::RBrac,
2803  "expected a comma or a closing square bracket")) {
2804  return AMDGPU::NoRegister;
2805  }
2806 
2807  if (isRegularReg(RegKind))
2808  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2809 
2810  return Reg;
2811 }
2812 
2813 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2814  unsigned &RegNum, unsigned &RegWidth,
2815  SmallVectorImpl<AsmToken> &Tokens) {
2816  auto Loc = getLoc();
2817  Reg = AMDGPU::NoRegister;
2818 
2819  if (isToken(AsmToken::Identifier)) {
2820  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2821  if (Reg == AMDGPU::NoRegister)
2822  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2823  } else {
2824  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2825  }
2826 
2827  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2828  if (Reg == AMDGPU::NoRegister) {
2829  assert(Parser.hasPendingError());
2830  return false;
2831  }
2832 
2833  if (!subtargetHasRegister(*TRI, Reg)) {
2834  if (Reg == AMDGPU::SGPR_NULL) {
2835  Error(Loc, "'null' operand is not supported on this GPU");
2836  } else {
2837  Error(Loc, "register not available on this GPU");
2838  }
2839  return false;
2840  }
2841 
2842  return true;
2843 }
2844 
2845 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2846  unsigned &RegNum, unsigned &RegWidth,
2847  bool RestoreOnFailure /*=false*/) {
2848  Reg = AMDGPU::NoRegister;
2849 
2850  SmallVector<AsmToken, 1> Tokens;
2851  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2852  if (RestoreOnFailure) {
2853  while (!Tokens.empty()) {
2854  getLexer().UnLex(Tokens.pop_back_val());
2855  }
2856  }
2857  return true;
2858  }
2859  return false;
2860 }
2861 
2863 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2864  switch (RegKind) {
2865  case IS_VGPR:
2866  return StringRef(".amdgcn.next_free_vgpr");
2867  case IS_SGPR:
2868  return StringRef(".amdgcn.next_free_sgpr");
2869  default:
2870  return None;
2871  }
2872 }
2873 
2874 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2875  auto SymbolName = getGprCountSymbolName(RegKind);
2876  assert(SymbolName && "initializing invalid register kind");
2877  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2878  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2879 }
2880 
2881 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2882  unsigned DwordRegIndex,
2883  unsigned RegWidth) {
2884  // Symbols are only defined for GCN targets
2885  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2886  return true;
2887 
2888  auto SymbolName = getGprCountSymbolName(RegKind);
2889  if (!SymbolName)
2890  return true;
2891  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2892 
2893  int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2894  int64_t OldCount;
2895 
2896  if (!Sym->isVariable())
2897  return !Error(getLoc(),
2898  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2899  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2900  return !Error(
2901  getLoc(),
2902  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2903 
2904  if (OldCount <= NewMax)
2905  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2906 
2907  return true;
2908 }
2909 
2910 std::unique_ptr<AMDGPUOperand>
2911 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2912  const auto &Tok = getToken();
2913  SMLoc StartLoc = Tok.getLoc();
2914  SMLoc EndLoc = Tok.getEndLoc();
2915  RegisterKind RegKind;
2916  unsigned Reg, RegNum, RegWidth;
2917 
2918  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2919  return nullptr;
2920  }
2921  if (isHsaAbiVersion3AndAbove(&getSTI())) {
2922  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2923  return nullptr;
2924  } else
2925  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2926  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2927 }
2928 
2930 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2931  // TODO: add syntactic sugar for 1/(2*PI)
2932 
2933  if (isRegister())
2934  return MatchOperand_NoMatch;
2935  assert(!isModifier());
2936 
2937  const auto& Tok = getToken();
2938  const auto& NextTok = peekToken();
2939  bool IsReal = Tok.is(AsmToken::Real);
2940  SMLoc S = getLoc();
2941  bool Negate = false;
2942 
2943  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2944  lex();
2945  IsReal = true;
2946  Negate = true;
2947  }
2948 
2949  if (IsReal) {
2950  // Floating-point expressions are not supported.
2951  // Can only allow floating-point literals with an
2952  // optional sign.
2953 
2954  StringRef Num = getTokenStr();
2955  lex();
2956 
2957  APFloat RealVal(APFloat::IEEEdouble());
2958  auto roundMode = APFloat::rmNearestTiesToEven;
2959  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2960  return MatchOperand_ParseFail;
2961  }
2962  if (Negate)
2963  RealVal.changeSign();
2964 
2965  Operands.push_back(
2966  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2967  AMDGPUOperand::ImmTyNone, true));
2968 
2969  return MatchOperand_Success;
2970 
2971  } else {
2972  int64_t IntVal;
2973  const MCExpr *Expr;
2974  SMLoc S = getLoc();
2975 
2976  if (HasSP3AbsModifier) {
2977  // This is a workaround for handling expressions
2978  // as arguments of SP3 'abs' modifier, for example:
2979  // |1.0|
2980  // |-1|
2981  // |1+x|
2982  // This syntax is not compatible with syntax of standard
2983  // MC expressions (due to the trailing '|').
2984  SMLoc EndLoc;
2985  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2986  return MatchOperand_ParseFail;
2987  } else {
2988  if (Parser.parseExpression(Expr))
2989  return MatchOperand_ParseFail;
2990  }
2991 
2992  if (Expr->evaluateAsAbsolute(IntVal)) {
2993  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2994  } else {
2995  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2996  }
2997 
2998  return MatchOperand_Success;
2999  }
3000 
3001  return MatchOperand_NoMatch;
3002 }
3003 
3005 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3006  if (!isRegister())
3007  return MatchOperand_NoMatch;
3008 
3009  if (auto R = parseRegister()) {
3010  assert(R->isReg());
3011  Operands.push_back(std::move(R));
3012  return MatchOperand_Success;
3013  }
3014  return MatchOperand_ParseFail;
3015 }
3016 
3018 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
3019  auto res = parseReg(Operands);
3020  if (res != MatchOperand_NoMatch) {
3021  return res;
3022  } else if (isModifier()) {
3023  return MatchOperand_NoMatch;
3024  } else {
3025  return parseImm(Operands, HasSP3AbsMod);
3026  }
3027 }
3028 
3029 bool
3030 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3031  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3032  const auto &str = Token.getString();
3033  return str == "abs" || str == "neg" || str == "sext";
3034  }
3035  return false;
3036 }
3037 
3038 bool
3039 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3040  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3041 }
3042 
3043 bool
3044 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3045  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3046 }
3047 
3048 bool
3049 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3050  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3051 }
3052 
3053 // Check if this is an operand modifier or an opcode modifier
3054 // which may look like an expression but it is not. We should
3055 // avoid parsing these modifiers as expressions. Currently
3056 // recognized sequences are:
3057 // |...|
3058 // abs(...)
3059 // neg(...)
3060 // sext(...)
3061 // -reg
3062 // -|...|
3063 // -abs(...)
3064 // name:...
3065 // Note that simple opcode modifiers like 'gds' may be parsed as
3066 // expressions; this is a special case. See getExpressionAsToken.
3067 //
3068 bool
3069 AMDGPUAsmParser::isModifier() {
3070 
3071  AsmToken Tok = getToken();
3072  AsmToken NextToken[2];
3073  peekTokens(NextToken);
3074 
3075  return isOperandModifier(Tok, NextToken[0]) ||
3076  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3077  isOpcodeModifierWithVal(Tok, NextToken[0]);
3078 }
3079 
3080 // Check if the current token is an SP3 'neg' modifier.
3081 // Currently this modifier is allowed in the following context:
3082 //
3083 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3084 // 2. Before an 'abs' modifier: -abs(...)
3085 // 3. Before an SP3 'abs' modifier: -|...|
3086 //
3087 // In all other cases "-" is handled as a part
3088 // of an expression that follows the sign.
3089 //
3090 // Note: When "-" is followed by an integer literal,
3091 // this is interpreted as integer negation rather
3092 // than a floating-point NEG modifier applied to N.
3093 // Beside being contr-intuitive, such use of floating-point
3094 // NEG modifier would have resulted in different meaning
3095 // of integer literals used with VOP1/2/C and VOP3,
3096 // for example:
3097 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3098 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3099 // Negative fp literals with preceding "-" are
3100 // handled likewise for uniformity
3101 //
3102 bool
3103 AMDGPUAsmParser::parseSP3NegModifier() {
3104 
3105  AsmToken NextToken[2];
3106  peekTokens(NextToken);
3107 
3108  if (isToken(AsmToken::Minus) &&
3109  (isRegister(NextToken[0], NextToken[1]) ||
3110  NextToken[0].is(AsmToken::Pipe) ||
3111  isId(NextToken[0], "abs"))) {
3112  lex();
3113  return true;
3114  }
3115 
3116  return false;
3117 }
3118 
3120 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3121  bool AllowImm) {
3122  bool Neg, SP3Neg;
3123  bool Abs, SP3Abs;
3124  SMLoc Loc;
3125 
3126  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3127  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3128  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3129  return MatchOperand_ParseFail;
3130  }
3131 
3132  SP3Neg = parseSP3NegModifier();
3133 
3134  Loc = getLoc();
3135  Neg = trySkipId("neg");
3136  if (Neg && SP3Neg) {
3137  Error(Loc, "expected register or immediate");
3138  return MatchOperand_ParseFail;
3139  }
3140  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3141  return MatchOperand_ParseFail;
3142 
3143  Abs = trySkipId("abs");
3144  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3145  return MatchOperand_ParseFail;
3146 
3147  Loc = getLoc();
3148  SP3Abs = trySkipToken(AsmToken::Pipe);
3149  if (Abs && SP3Abs) {
3150  Error(Loc, "expected register or immediate");
3151  return MatchOperand_ParseFail;
3152  }
3153 
3155  if (AllowImm) {
3156  Res = parseRegOrImm(Operands, SP3Abs);
3157  } else {
3158  Res = parseReg(Operands);
3159  }
3160  if (Res != MatchOperand_Success) {
3161  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3162  }
3163 
3164  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3165  return MatchOperand_ParseFail;
3166  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3167  return MatchOperand_ParseFail;
3168  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3169  return MatchOperand_ParseFail;
3170 
3171  AMDGPUOperand::Modifiers Mods;
3172  Mods.Abs = Abs || SP3Abs;
3173  Mods.Neg = Neg || SP3Neg;
3174 
3175  if (Mods.hasFPModifiers()) {
3176  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3177  if (Op.isExpr()) {
3178  Error(Op.getStartLoc(), "expected an absolute expression");
3179  return MatchOperand_ParseFail;
3180  }
3181  Op.setModifiers(Mods);
3182  }
3183  return MatchOperand_Success;
3184 }
3185 
3187 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3188  bool AllowImm) {
3189  bool Sext = trySkipId("sext");
3190  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3191  return MatchOperand_ParseFail;
3192 
3194  if (AllowImm) {
3195  Res = parseRegOrImm(Operands);
3196  } else {
3197  Res = parseReg(Operands);
3198  }
3199  if (Res != MatchOperand_Success) {
3200  return Sext? MatchOperand_ParseFail : Res;
3201  }
3202 
3203  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3204  return MatchOperand_ParseFail;
3205 
3206  AMDGPUOperand::Modifiers Mods;
3207  Mods.Sext = Sext;
3208 
3209  if (Mods.hasIntModifiers()) {
3210  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3211  if (Op.isExpr()) {
3212  Error(Op.getStartLoc(), "expected an absolute expression");
3213  return MatchOperand_ParseFail;
3214  }
3215  Op.setModifiers(Mods);
3216  }
3217 
3218  return MatchOperand_Success;
3219 }
3220 
3222 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3223  return parseRegOrImmWithFPInputMods(Operands, false);
3224 }
3225 
3227 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3228  return parseRegOrImmWithIntInputMods(Operands, false);
3229 }
3230 
3231 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3232  auto Loc = getLoc();
3233  if (trySkipId("off")) {
3234  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3235  AMDGPUOperand::ImmTyOff, false));
3236  return MatchOperand_Success;
3237  }
3238 
3239  if (!isRegister())
3240  return MatchOperand_NoMatch;
3241 
3242  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3243  if (Reg) {
3244  Operands.push_back(std::move(Reg));
3245  return MatchOperand_Success;
3246  }
3247 
3248  return MatchOperand_ParseFail;
3249 
3250 }
3251 
3252 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3253  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3254 
3255  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3256  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3257  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3258  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3259  return Match_InvalidOperand;
3260 
3261  if ((TSFlags & SIInstrFlags::VOP3) &&
3262  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3263  getForcedEncodingSize() != 64)
3264  return Match_PreferE32;
3265 
3266  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3267  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3268  // v_mac_f32/16 allow only dst_sel == DWORD;
3269  auto OpNum =
3270  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3271  const auto &Op = Inst.getOperand(OpNum);
3272  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3273  return Match_InvalidOperand;
3274  }
3275  }
3276 
3277  return Match_Success;
3278 }
3279 
3281  static const unsigned Variants[] = {
3285  };
3286 
3287  return makeArrayRef(Variants);
3288 }
3289 
3290 // What asm variants we should check
3291 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3292  if (isForcedDPP() && isForcedVOP3()) {
3293  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3294  return makeArrayRef(Variants);
3295  }
3296  if (getForcedEncodingSize() == 32) {
3297  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3298  return makeArrayRef(Variants);
3299  }
3300 
3301  if (isForcedVOP3()) {
3302  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3303  return makeArrayRef(Variants);
3304  }
3305 
3306  if (isForcedSDWA()) {
3307  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3309  return makeArrayRef(Variants);
3310  }
3311 
3312  if (isForcedDPP()) {
3313  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3314  return makeArrayRef(Variants);
3315  }
3316 
3317  return getAllVariants();
3318 }
3319 
3320 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3321  if (isForcedDPP() && isForcedVOP3())
3322  return "e64_dpp";
3323 
3324  if (getForcedEncodingSize() == 32)
3325  return "e32";
3326 
3327  if (isForcedVOP3())
3328  return "e64";
3329 
3330  if (isForcedSDWA())
3331  return "sdwa";
3332 
3333  if (isForcedDPP())
3334  return "dpp";
3335 
3336  return "";
3337 }
3338 
3339 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3340  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3341  const unsigned Num = Desc.getNumImplicitUses();
3342  for (unsigned i = 0; i < Num; ++i) {
3343  unsigned Reg = Desc.ImplicitUses[i];
3344  switch (Reg) {
3345  case AMDGPU::FLAT_SCR:
3346  case AMDGPU::VCC:
3347  case AMDGPU::VCC_LO:
3348  case AMDGPU::VCC_HI:
3349  case AMDGPU::M0:
3350  return Reg;
3351  default:
3352  break;
3353  }
3354  }
3355  return AMDGPU::NoRegister;
3356 }
3357 
3358 // NB: This code is correct only when used to check constant
3359 // bus limitations because GFX7 support no f16 inline constants.
3360 // Note that there are no cases when a GFX7 opcode violates
3361 // constant bus limitations due to the use of an f16 constant.
3362 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3363  unsigned OpIdx) const {
3364  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3365 
3366  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3367  return false;
3368  }
3369 
3370  const MCOperand &MO = Inst.getOperand(OpIdx);
3371 
3372  int64_t Val = MO.getImm();
3373  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3374 
3375  switch (OpSize) { // expected operand size
3376  case 8:
3377  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3378  case 4:
3379  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3380  case 2: {
3381  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3385  return AMDGPU::isInlinableIntLiteral(Val);
3386 
3391 
3395  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3396 
3397  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3398  }
3399  default:
3400  llvm_unreachable("invalid operand size");
3401  }
3402 }
3403 
3404 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3405  if (!isGFX10Plus())
3406  return 1;
3407 
3408  switch (Opcode) {
3409  // 64-bit shift instructions can use only one scalar value input
3410  case AMDGPU::V_LSHLREV_B64_e64:
3411  case AMDGPU::V_LSHLREV_B64_gfx10:
3412  case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3413  case AMDGPU::V_LSHRREV_B64_e64:
3414  case AMDGPU::V_LSHRREV_B64_gfx10:
3415  case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3416  case AMDGPU::V_ASHRREV_I64_e64:
3417  case AMDGPU::V_ASHRREV_I64_gfx10:
3418  case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3419  case AMDGPU::V_LSHL_B64_e64:
3420  case AMDGPU::V_LSHR_B64_e64:
3421  case AMDGPU::V_ASHR_I64_e64:
3422  return 1;
3423  default:
3424  return 2;
3425  }
3426 }
3427 
3428 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3430 
3431 // Get regular operand indices in the same order as specified
3432 // in the instruction (but append mandatory literals to the end).
3433 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3434  bool AddMandatoryLiterals = false) {
3435 
3436  int16_t ImmIdx =
3437  AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3438 
3439  if (isVOPD(Opcode)) {
3440  int16_t ImmDeferredIdx =
3441  AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3442  : -1;
3443 
3444  return {getNamedOperandIdx(Opcode, OpName::src0X),
3445  getNamedOperandIdx(Opcode, OpName::vsrc1X),
3446  getNamedOperandIdx(Opcode, OpName::src0Y),
3447  getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3448  ImmDeferredIdx,
3449  ImmIdx};
3450  }
3451 
3452  return {getNamedOperandIdx(Opcode, OpName::src0),
3453  getNamedOperandIdx(Opcode, OpName::src1),
3454  getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3455 }
3456 
3457 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3458  const MCOperand &MO = Inst.getOperand(OpIdx);
3459  if (MO.isImm()) {
3460  return !isInlineConstant(Inst, OpIdx);
3461  } else if (MO.isReg()) {
3462  auto Reg = MO.getReg();
3463  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3464  auto PReg = mc2PseudoReg(Reg);
3465  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3466  } else {
3467  return true;
3468  }
3469 }
3470 
3471 bool AMDGPUAsmParser::validateConstantBusLimitations(
3472  const MCInst &Inst, const OperandVector &Operands) {
3473  const unsigned Opcode = Inst.getOpcode();
3474  const MCInstrDesc &Desc = MII.get(Opcode);
3475  unsigned LastSGPR = AMDGPU::NoRegister;
3476  unsigned ConstantBusUseCount = 0;
3477  unsigned NumLiterals = 0;
3478  unsigned LiteralSize;
3479 
3480  if (!(Desc.TSFlags &
3483  !isVOPD(Opcode))
3484  return true;
3485 
3486  // Check special imm operands (used by madmk, etc)
3487  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3488  ++NumLiterals;
3489  LiteralSize = 4;
3490  }
3491 
3492  SmallDenseSet<unsigned> SGPRsUsed;
3493  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3494  if (SGPRUsed != AMDGPU::NoRegister) {
3495  SGPRsUsed.insert(SGPRUsed);
3496  ++ConstantBusUseCount;
3497  }
3498 
3499  OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3500 
3501  for (int OpIdx : OpIndices) {
3502  if (OpIdx == -1)
3503  continue;
3504 
3505  const MCOperand &MO = Inst.getOperand(OpIdx);
3506  if (usesConstantBus(Inst, OpIdx)) {
3507  if (MO.isReg()) {
3508  LastSGPR = mc2PseudoReg(MO.getReg());
3509  // Pairs of registers with a partial intersections like these
3510  // s0, s[0:1]
3511  // flat_scratch_lo, flat_scratch
3512  // flat_scratch_lo, flat_scratch_hi
3513  // are theoretically valid but they are disabled anyway.
3514  // Note that this code mimics SIInstrInfo::verifyInstruction
3515  if (SGPRsUsed.insert(LastSGPR).second) {
3516  ++ConstantBusUseCount;
3517  }
3518  } else { // Expression or a literal
3519 
3520  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3521  continue; // special operand like VINTERP attr_chan
3522 
3523  // An instruction may use only one literal.
3524  // This has been validated on the previous step.
3525  // See validateVOPLiteral.
3526  // This literal may be used as more than one operand.
3527  // If all these operands are of the same size,
3528  // this literal counts as one scalar value.
3529  // Otherwise it counts as 2 scalar values.
3530  // See "GFX10 Shader Programming", section 3.6.2.3.
3531 
3532  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3533  if (Size < 4)
3534  Size = 4;
3535 
3536  if (NumLiterals == 0) {
3537  NumLiterals = 1;
3538  LiteralSize = Size;
3539  } else if (LiteralSize != Size) {
3540  NumLiterals = 2;
3541  }
3542  }
3543  }
3544  }
3545  ConstantBusUseCount += NumLiterals;
3546 
3547  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3548  return true;
3549 
3550  SMLoc LitLoc = getLitLoc(Operands);
3551  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3552  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3553  Error(Loc, "invalid operand (violates constant bus restrictions)");
3554  return false;
3555 }
3556 
3557 bool
3558 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3559  const OperandVector &Operands) {
3560  const unsigned Opcode = Inst.getOpcode();
3561  const MCInstrDesc &Desc = MII.get(Opcode);
3562 
3563  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3564  if (DstIdx == -1 ||
3565  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3566  return true;
3567  }
3568 
3569  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3570 
3571  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3572  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3573  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3574 
3575  assert(DstIdx != -1);
3576  const MCOperand &Dst = Inst.getOperand(DstIdx);
3577  assert(Dst.isReg());
3578 
3579  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3580 
3581  for (int SrcIdx : SrcIndices) {
3582  if (SrcIdx == -1) break;
3583  const MCOperand &Src = Inst.getOperand(SrcIdx);
3584  if (Src.isReg()) {
3585  if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3586  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3587  Error(getRegLoc(SrcReg, Operands),
3588  "destination must be different than all sources");
3589  return false;
3590  }
3591  }
3592  }
3593 
3594  return true;
3595 }
3596 
3597 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3598 
3599  const unsigned Opc = Inst.getOpcode();
3600  const MCInstrDesc &Desc = MII.get(Opc);
3601 
3602  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3603  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3604  assert(ClampIdx != -1);
3605  return Inst.getOperand(ClampIdx).getImm() == 0;
3606  }
3607 
3608  return true;
3609 }
3610 
3611 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3612  const SMLoc &IDLoc) {
3613 
3614  const unsigned Opc = Inst.getOpcode();
3615  const MCInstrDesc &Desc = MII.get(Opc);
3616 
3617  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3618  return true;
3619 
3620  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3621  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3622  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3623 
3624  assert(VDataIdx != -1);
3625 
3626  if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3627  return true;
3628 
3629  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3630  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3631  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3632  if (DMask == 0)
3633  DMask = 1;
3634 
3635  bool IsPackedD16 = false;
3636  unsigned DataSize =
3637  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3638  if (hasPackedD16()) {
3639  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3640  IsPackedD16 = D16Idx >= 0;
3641  if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3642  DataSize = (DataSize + 1) / 2;
3643  }
3644 
3645  if ((VDataSize / 4) == DataSize + TFESize)
3646  return true;
3647 
3648  StringRef Modifiers;
3649  if (isGFX90A())
3650  Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3651  else
3652  Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3653 
3654  Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3655  return false;
3656 }
3657 
3658 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3659  const unsigned Opc = Inst.getOpcode();
3660  const MCInstrDesc &Desc = MII.get(Opc);
3661 
3662  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3663  return true;
3664 
3665  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3666 
3667  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3668  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3669  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3670  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3671  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3672  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3673 
3674  assert(VAddr0Idx != -1);
3675  assert(SrsrcIdx != -1);
3676  assert(SrsrcIdx > VAddr0Idx);
3677 
3678  if (DimIdx == -1)
3679  return true; // intersect_ray
3680 
3681  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3682  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3683  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3684  unsigned ActualAddrSize =
3685  IsNSA ? SrsrcIdx - VAddr0Idx
3686  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3687  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3688 
3689  unsigned ExpectedAddrSize =
3690  AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3691 
3692  if (!IsNSA) {
3693  if (ExpectedAddrSize > 8)
3694  ExpectedAddrSize = 16;
3695 
3696  // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3697  // This provides backward compatibility for assembly created
3698  // before 160b/192b/224b types were directly supported.
3699  if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3700  return true;
3701  }
3702 
3703  return ActualAddrSize == ExpectedAddrSize;
3704 }
3705 
3706 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3707 
3708  const unsigned Opc = Inst.getOpcode();
3709  const MCInstrDesc &Desc = MII.get(Opc);
3710 
3711  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3712  return true;
3713  if (!Desc.mayLoad() || !Desc.mayStore())
3714  return true; // Not atomic
3715 
3716  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3717  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3718 
3719  // This is an incomplete check because image_atomic_cmpswap
3720  // may only use 0x3 and 0xf while other atomic operations
3721  // may use 0x1 and 0x3. However these limitations are
3722  // verified when we check that dmask matches dst size.
3723  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3724 }
3725 
3726 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3727 
3728  const unsigned Opc = Inst.getOpcode();
3729  const MCInstrDesc &Desc = MII.get(Opc);
3730 
3731  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3732  return true;
3733 
3734  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3735  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3736 
3737  // GATHER4 instructions use dmask in a different fashion compared to
3738  // other MIMG instructions. The only useful DMASK values are
3739  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3740  // (red,red,red,red) etc.) The ISA document doesn't mention
3741  // this.
3742  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3743 }
3744 
3745 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3746  const unsigned Opc = Inst.getOpcode();
3747  const MCInstrDesc &Desc = MII.get(Opc);
3748 
3749  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3750  return true;
3751 
3752  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3753  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3754  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3755 
3756  if (!BaseOpcode->MSAA)
3757  return true;
3758 
3759  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3760  assert(DimIdx != -1);
3761 
3762  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3763  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3764 
3765  return DimInfo->MSAA;
3766 }
3767 
3768 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3769 {
3770  switch (Opcode) {
3771  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3772  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3773  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3774  return true;
3775  default:
3776  return false;
3777  }
3778 }
3779 
3780 // movrels* opcodes should only allow VGPRS as src0.
3781 // This is specified in .td description for vop1/vop3,
3782 // but sdwa is handled differently. See isSDWAOperand.
3783 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3784  const OperandVector &Operands) {
3785 
3786  const unsigned Opc = Inst.getOpcode();
3787  const MCInstrDesc &Desc = MII.get(Opc);
3788 
3789  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3790  return true;
3791 
3792  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3793  assert(Src0Idx != -1);
3794 
3795  SMLoc ErrLoc;
3796  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3797  if (Src0.isReg()) {
3798  auto Reg = mc2PseudoReg(Src0.getReg());
3799  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3800  if (!isSGPR(Reg, TRI))
3801  return true;
3802  ErrLoc = getRegLoc(Reg, Operands);
3803  } else {
3804  ErrLoc = getConstLoc(Operands);
3805  }
3806 
3807  Error(ErrLoc, "source operand must be a VGPR");
3808  return false;
3809 }
3810 
3811 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3812  const OperandVector &Operands) {
3813 
3814  const unsigned Opc = Inst.getOpcode();
3815 
3816  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3817  return true;
3818 
3819  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3820  assert(Src0Idx != -1);
3821 
3822  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3823  if (!Src0.isReg())
3824  return true;
3825 
3826  auto Reg = mc2PseudoReg(Src0.getReg());
3827  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3828  if (!isGFX90A() && isSGPR(Reg, TRI)) {
3829  Error(getRegLoc(Reg, Operands),
3830  "source operand must be either a VGPR or an inline constant");
3831  return false;
3832  }
3833 
3834  return true;
3835 }
3836 
3837 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3838  const OperandVector &Operands) {
3839  unsigned Opcode = Inst.getOpcode();
3840  const MCInstrDesc &Desc = MII.get(Opcode);
3841 
3842  if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3843  !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3844  return true;
3845 
3846  const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3847  if (Src2Idx == -1)
3848  return true;
3849 
3850  if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3851  Error(getConstLoc(Operands),
3852  "inline constants are not allowed for this operand");
3853  return false;
3854  }
3855 
3856  return true;
3857 }
3858 
3859 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3860  const OperandVector &Operands) {
3861  const unsigned Opc = Inst.getOpcode();
3862  const MCInstrDesc &Desc = MII.get(Opc);
3863 
3864  if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3865  return true;
3866 
3867  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3868  if (Src2Idx == -1)
3869  return true;
3870 
3871  const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3872  if (!Src2.isReg())
3873  return true;
3874 
3875  MCRegister Src2Reg = Src2.getReg();
3876  MCRegister DstReg = Inst.getOperand(0).getReg();
3877  if (Src2Reg == DstReg)
3878  return true;
3879 
3880  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3881  if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3882  return true;
3883 
3884  if (TRI->regsOverlap(Src2Reg, DstReg)) {
3885  Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3886  "source 2 operand must not partially overlap with dst");
3887  return false;
3888  }
3889 
3890  return true;
3891 }
3892 
3893 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3894  switch (Inst.getOpcode()) {
3895  default:
3896  return true;
3897  case V_DIV_SCALE_F32_gfx6_gfx7:
3898  case V_DIV_SCALE_F32_vi:
3899  case V_DIV_SCALE_F32_gfx10:
3900  case V_DIV_SCALE_F64_gfx6_gfx7:
3901  case V_DIV_SCALE_F64_vi:
3902  case V_DIV_SCALE_F64_gfx10:
3903  break;
3904  }
3905 
3906  // TODO: Check that src0 = src1 or src2.
3907 
3908  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3909  AMDGPU::OpName::src2_modifiers,
3910  AMDGPU::OpName::src2_modifiers}) {
3912  .getImm() &
3913  SISrcMods::ABS) {
3914  return false;
3915  }
3916  }
3917 
3918  return true;
3919 }
3920 
3921 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3922 
3923  const unsigned Opc = Inst.getOpcode();
3924  const MCInstrDesc &Desc = MII.get(Opc);
3925 
3926  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3927  return true;
3928 
3929  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3930  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3931  if (isCI() || isSI())
3932  return false;
3933  }
3934 
3935  return true;
3936 }
3937 
3938 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3939  const unsigned Opc = Inst.getOpcode();
3940  const MCInstrDesc &Desc = MII.get(Opc);
3941 
3942  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3943  return true;
3944 
3945  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3946  if (DimIdx < 0)
3947  return true;
3948 
3949  int64_t Imm = Inst.getOperand(DimIdx).getImm();
3950  if (Imm < 0 || Imm >= 8)
3951  return false;
3952 
3953  return true;
3954 }
3955 
3956 static bool IsRevOpcode(const unsigned Opcode)
3957 {
3958  switch (Opcode) {
3959  case AMDGPU::V_SUBREV_F32_e32:
3960  case AMDGPU::V_SUBREV_F32_e64:
3961  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3962  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3963  case AMDGPU::V_SUBREV_F32_e32_vi:
3964  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3965  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3966  case AMDGPU::V_SUBREV_F32_e64_vi:
3967 
3968  case AMDGPU::V_SUBREV_CO_U32_e32:
3969  case AMDGPU::V_SUBREV_CO_U32_e64:
3970  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3971  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3972 
3973  case AMDGPU::V_SUBBREV_U32_e32:
3974  case AMDGPU::V_SUBBREV_U32_e64:
3975  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3976  case AMDGPU::V_SUBBREV_U32_e32_vi:
3977  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3978  case AMDGPU::V_SUBBREV_U32_e64_vi:
3979 
3980  case AMDGPU::V_SUBREV_U32_e32:
3981  case AMDGPU::V_SUBREV_U32_e64:
3982  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3983  case AMDGPU::V_SUBREV_U32_e32_vi:
3984  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3985  case AMDGPU::V_SUBREV_U32_e64_vi:
3986 
3987  case AMDGPU::V_SUBREV_F16_e32:
3988  case AMDGPU::V_SUBREV_F16_e64:
3989  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3990  case AMDGPU::V_SUBREV_F16_e32_vi:
3991  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3992  case AMDGPU::V_SUBREV_F16_e64_vi:
3993 
3994  case AMDGPU::V_SUBREV_U16_e32:
3995  case AMDGPU::V_SUBREV_U16_e64:
3996  case AMDGPU::V_SUBREV_U16_e32_vi:
3997  case AMDGPU::V_SUBREV_U16_e64_vi:
3998 
3999  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4000  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4001  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4002 
4003  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4004  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4005 
4006  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4007  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4008 
4009  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4010  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4011 
4012  case AMDGPU::V_LSHRREV_B32_e32:
4013  case AMDGPU::V_LSHRREV_B32_e64:
4014  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4015  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4016  case AMDGPU::V_LSHRREV_B32_e32_vi:
4017  case AMDGPU::V_LSHRREV_B32_e64_vi:
4018  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4019  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4020 
4021  case AMDGPU::V_ASHRREV_I32_e32:
4022  case AMDGPU::V_ASHRREV_I32_e64:
4023  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4024  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4025  case AMDGPU::V_ASHRREV_I32_e32_vi:
4026  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4027  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4028  case AMDGPU::V_ASHRREV_I32_e64_vi:
4029 
4030  case AMDGPU::V_LSHLREV_B32_e32:
4031  case AMDGPU::V_LSHLREV_B32_e64:
4032  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4033  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4034  case AMDGPU::V_LSHLREV_B32_e32_vi:
4035  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4036  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4037  case AMDGPU::V_LSHLREV_B32_e64_vi:
4038 
4039  case AMDGPU::V_LSHLREV_B16_e32:
4040  case AMDGPU::V_LSHLREV_B16_e64:
4041  case AMDGPU::V_LSHLREV_B16_e32_vi:
4042  case AMDGPU::V_LSHLREV_B16_e64_vi:
4043  case AMDGPU::V_LSHLREV_B16_gfx10:
4044 
4045  case AMDGPU::V_LSHRREV_B16_e32:
4046  case AMDGPU::V_LSHRREV_B16_e64:
4047  case AMDGPU::V_LSHRREV_B16_e32_vi:
4048  case AMDGPU::V_LSHRREV_B16_e64_vi:
4049  case AMDGPU::V_LSHRREV_B16_gfx10:
4050 
4051  case AMDGPU::V_ASHRREV_I16_e32:
4052  case AMDGPU::V_ASHRREV_I16_e64:
4053  case AMDGPU::V_ASHRREV_I16_e32_vi:
4054  case AMDGPU::V_ASHRREV_I16_e64_vi:
4055  case AMDGPU::V_ASHRREV_I16_gfx10:
4056 
4057  case AMDGPU::V_LSHLREV_B64_e64:
4058  case AMDGPU::V_LSHLREV_B64_gfx10:
4059  case AMDGPU::V_LSHLREV_B64_vi:
4060 
4061  case AMDGPU::V_LSHRREV_B64_e64:
4062  case AMDGPU::V_LSHRREV_B64_gfx10:
4063  case AMDGPU::V_LSHRREV_B64_vi:
4064 
4065  case AMDGPU::V_ASHRREV_I64_e64:
4066  case AMDGPU::V_ASHRREV_I64_gfx10:
4067  case AMDGPU::V_ASHRREV_I64_vi:
4068 
4069  case AMDGPU::V_PK_LSHLREV_B16:
4070  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4071  case AMDGPU::V_PK_LSHLREV_B16_vi:
4072 
4073  case AMDGPU::V_PK_LSHRREV_B16:
4074  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4075  case AMDGPU::V_PK_LSHRREV_B16_vi:
4076  case AMDGPU::V_PK_ASHRREV_I16:
4077  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4078  case AMDGPU::V_PK_ASHRREV_I16_vi:
4079  return true;
4080  default:
4081  return false;
4082  }
4083 }
4084 
4085 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4086 
4087  using namespace SIInstrFlags;
4088  const unsigned Opcode = Inst.getOpcode();
4089  const MCInstrDesc &Desc = MII.get(Opcode);
4090 
4091  // lds_direct register is defined so that it can be used
4092  // with 9-bit operands only. Ignore encodings which do not accept these.
4093  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4094  if ((Desc.TSFlags & Enc) == 0)
4095  return None;
4096 
4097  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4098  auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4099  if (SrcIdx == -1)
4100  break;
4101  const auto &Src = Inst.getOperand(SrcIdx);
4102  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4103 
4104  if (isGFX90A() || isGFX11Plus())
4105  return StringRef("lds_direct is not supported on this GPU");
4106 
4107  if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4108  return StringRef("lds_direct cannot be used with this instruction");
4109 
4110  if (SrcName != OpName::src0)
4111  return StringRef("lds_direct may be used as src0 only");
4112  }
4113  }
4114 
4115  return None;
4116 }
4117 
4118 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4119  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4120  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4121  if (Op.isFlatOffset())
4122  return Op.getStartLoc();
4123  }
4124  return getLoc();
4125 }
4126 
4127 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4128  const OperandVector &Operands) {
4129  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4130  if ((TSFlags & SIInstrFlags::FLAT) == 0)
4131  return true;
4132 
4133  auto Opcode = Inst.getOpcode();
4134  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4135  assert(OpNum != -1);
4136 
4137  const auto &Op = Inst.getOperand(OpNum);
4138  if (!hasFlatOffsets() && Op.getImm() != 0) {
4139  Error(getFlatOffsetLoc(Operands),
4140  "flat offset modifier is not supported on this GPU");
4141  return false;
4142  }
4143 
4144  // For FLAT segment the offset must be positive;
4145  // MSB is ignored and forced to zero.
4147  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4148  if (!isIntN(OffsetSize, Op.getImm())) {
4149  Error(getFlatOffsetLoc(Operands),
4150  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4151  return false;
4152  }
4153  } else {
4154  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4155  if (!isUIntN(OffsetSize, Op.getImm())) {
4156  Error(getFlatOffsetLoc(Operands),
4157  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4158  return false;
4159  }
4160  }
4161 
4162  return true;
4163 }
4164 
4165 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4166  // Start with second operand because SMEM Offset cannot be dst or src0.
4167  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4168  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4169  if (Op.isSMEMOffset())
4170  return Op.getStartLoc();
4171  }
4172  return getLoc();
4173 }
4174 
4175 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4176  const OperandVector &Operands) {
4177  if (isCI() || isSI())
4178  return true;
4179 
4180  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4181  if ((TSFlags & SIInstrFlags::SMRD) == 0)
4182  return true;
4183 
4184  auto Opcode = Inst.getOpcode();
4185  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4186  if (OpNum == -1)
4187  return true;
4188 
4189  const auto &Op = Inst.getOperand(OpNum);
4190  if (!Op.isImm())
4191  return true;
4192 
4193  uint64_t Offset = Op.getImm();
4194  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4195  if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4196  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4197  return true;
4198 
4199  Error(getSMEMOffsetLoc(Operands),
4200  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4201  "expected a 21-bit signed offset");
4202 
4203  return false;
4204 }
4205 
4206 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4207  unsigned Opcode = Inst.getOpcode();
4208  const MCInstrDesc &Desc = MII.get(Opcode);
4209  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4210  return true;
4211 
4212  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4213  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4214 
4215  const int OpIndices[] = { Src0Idx, Src1Idx };
4216 
4217  unsigned NumExprs = 0;
4218  unsigned NumLiterals = 0;
4220 
4221  for (int OpIdx : OpIndices) {
4222  if (OpIdx == -1) break;
4223 
4224  const MCOperand &MO = Inst.getOperand(OpIdx);
4225  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4226  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4227  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4228  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4229  if (NumLiterals == 0 || LiteralValue != Value) {
4230  LiteralValue = Value;
4231  ++NumLiterals;
4232  }
4233  } else if (MO.isExpr()) {
4234  ++NumExprs;
4235  }
4236  }
4237  }
4238 
4239  return NumLiterals + NumExprs <= 1;
4240 }
4241 
4242 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4243  const unsigned Opc = Inst.getOpcode();
4244  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4245  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4246  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4247  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4248 
4249  if (OpSel & ~3)
4250  return false;
4251  }
4252 
4253  uint64_t TSFlags = MII.get(Opc).TSFlags;
4254 
4255  if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4256  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4257  if (OpSelIdx != -1) {
4258  if (Inst.getOperand(OpSelIdx).getImm() != 0)
4259  return false;
4260  }
4261  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4262  if (OpSelHiIdx != -1) {
4263  if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4264  return false;
4265  }
4266  }
4267 
4268  // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4269  if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) &&
4270  !(TSFlags & SIInstrFlags::VOP3P)) {
4271  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4272  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4273  if (OpSel & 3)
4274  return false;
4275  }
4276 
4277  return true;
4278 }
4279 
4280 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4281  const OperandVector &Operands) {
4282  const unsigned Opc = Inst.getOpcode();
4283  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4284  if (DppCtrlIdx < 0)
4285  return true;
4286  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4287 
4289  // DPP64 is supported for row_newbcast only.
4290  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4291  if (Src0Idx >= 0 &&
4292  getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4293  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4294  Error(S, "64 bit dpp only supports row_newbcast");
4295  return false;
4296  }
4297  }
4298 
4299  return true;
4300 }
4301 
4302 // Check if VCC register matches wavefront size
4303 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4304  auto FB = getFeatureBits();
4305  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4306  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4307 }
4308 
4309 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4310 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4311  const OperandVector &Operands) {
4312  unsigned Opcode = Inst.getOpcode();
4313  const MCInstrDesc &Desc = MII.get(Opcode);
4314  bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4315  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4316  !HasMandatoryLiteral && !isVOPD(Opcode))
4317  return true;
4318 
4319  OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4320 
4321  unsigned NumExprs = 0;
4322  unsigned NumLiterals = 0;
4324 
4325  for (int OpIdx : OpIndices) {
4326  if (OpIdx == -1)
4327  continue;
4328 
4329  const MCOperand &MO = Inst.getOperand(OpIdx);
4330  if (!MO.isImm() && !MO.isExpr())
4331  continue;
4332  if (!isSISrcOperand(Desc, OpIdx))
4333  continue;
4334 
4335  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4336  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4337  if (NumLiterals == 0 || LiteralValue != Value) {
4338  LiteralValue = Value;
4339  ++NumLiterals;
4340  }
4341  } else if (MO.isExpr()) {
4342  ++NumExprs;
4343  }
4344  }
4345  NumLiterals += NumExprs;
4346 
4347  if (!NumLiterals)
4348  return true;
4349 
4350  if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4351  Error(getLitLoc(Operands), "literal operands are not supported");
4352  return false;
4353  }
4354 
4355  if (NumLiterals > 1) {
4356  Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4357  return false;
4358  }
4359 
4360  return true;
4361 }
4362 
4363 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4364 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4365  const MCRegisterInfo *MRI) {
4366  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4367  if (OpIdx < 0)
4368  return -1;
4369 
4370  const MCOperand &Op = Inst.getOperand(OpIdx);
4371  if (!Op.isReg())
4372  return -1;
4373 
4374  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4375  auto Reg = Sub ? Sub : Op.getReg();
4376  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4377  return AGPR32.contains(Reg) ? 1 : 0;
4378 }
4379 
4380 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4381  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4382  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4384  SIInstrFlags::DS)) == 0)
4385  return true;
4386 
4387  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4388  : AMDGPU::OpName::vdata;
4389 
4390  const MCRegisterInfo *MRI = getMRI();
4391  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4392  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4393 
4394  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4395  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4396  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4397  return false;
4398  }
4399 
4400  auto FB = getFeatureBits();
4401  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4402  if (DataAreg < 0 || DstAreg < 0)
4403  return true;
4404  return DstAreg == DataAreg;
4405  }
4406 
4407  return DstAreg < 1 && DataAreg < 1;
4408 }
4409 
4410 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4411  auto FB = getFeatureBits();
4412  if (!FB[AMDGPU::FeatureGFX90AInsts])
4413  return true;
4414 
4415  const MCRegisterInfo *MRI = getMRI();
4416  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4417  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4418  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4419  const MCOperand &Op = Inst.getOperand(I);
4420  if (!Op.isReg())
4421  continue;
4422 
4423  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4424  if (!Sub)
4425  continue;
4426 
4427  if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4428  return false;
4429  if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4430  return false;
4431  }
4432 
4433  return true;
4434 }
4435 
4436 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4437  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4438  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4439  if (Op.isBLGP())
4440  return Op.getStartLoc();
4441  }
4442  return SMLoc();
4443 }
4444 
4445 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4446  const OperandVector &Operands) {
4447  unsigned Opc = Inst.getOpcode();
4448  int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4449  if (BlgpIdx == -1)
4450  return true;
4451  SMLoc BLGPLoc = getBLGPLoc(Operands);
4452  if (!BLGPLoc.isValid())
4453  return true;
4454  bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4455  auto FB = getFeatureBits();
4456  bool UsesNeg = false;
4457  if (FB[AMDGPU::FeatureGFX940Insts]) {
4458  switch (Opc) {
4459  case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4460  case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4461  case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4462  case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4463  UsesNeg = true;
4464  }
4465  }
4466 
4467  if (IsNeg == UsesNeg)
4468  return true;
4469 
4470  Error(BLGPLoc,
4471  UsesNeg ? "invalid modifier: blgp is not supported"
4472  : "invalid modifier: neg is not supported");
4473 
4474  return false;
4475 }
4476 
4477 // gfx90a has an undocumented limitation:
4478 // DS_GWS opcodes must use even aligned registers.
4479 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4480  const OperandVector &Operands) {
4481  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4482  return true;
4483 
4484  int Opc = Inst.getOpcode();
4485  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4486  Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4487  return true;
4488 
4489  const MCRegisterInfo *MRI = getMRI();
4490  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4491  int Data0Pos =
4492  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4493  assert(Data0Pos != -1);
4494  auto Reg = Inst.getOperand(Data0Pos).getReg();
4495  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4496  if (RegIdx & 1) {
4497  SMLoc RegLoc = getRegLoc(Reg, Operands);
4498  Error(RegLoc, "vgpr must be even aligned");
4499  return false;
4500  }
4501 
4502  return true;
4503 }
4504 
4505 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4506  const OperandVector &Operands,
4507  const SMLoc &IDLoc) {
4508  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4509  AMDGPU::OpName::cpol);
4510  if (CPolPos == -1)
4511  return true;
4512 
4513  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4514 
4515  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4516  if (TSFlags & SIInstrFlags::SMRD) {
4517  if (CPol && (isSI() || isCI())) {
4518  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4519  Error(S, "cache policy is not supported for SMRD instructions");
4520  return false;
4521  }
4523  Error(IDLoc, "invalid cache policy for SMEM instruction");
4524  return false;
4525  }
4526  }
4527 
4528  if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4529  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4530  StringRef CStr(S.getPointer());
4531  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4532  Error(S, "scc is not supported on this GPU");
4533  return false;
4534  }
4535 
4537  return true;
4538 
4539  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4540  if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4541  Error(IDLoc, isGFX940() ? "instruction must use sc0"
4542  : "instruction must use glc");
4543  return false;
4544  }
4545  } else {
4546  if (CPol & CPol::GLC) {
4547  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4548  StringRef CStr(S.getPointer());
4550  &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4551  Error(S, isGFX940() ? "instruction must not use sc0"
4552  : "instruction must not use glc");
4553  return false;
4554  }
4555  }
4556 
4557  return true;
4558 }
4559 
4560 bool AMDGPUAsmParser::validateLdsDMA(uint64_t Enc, const MCInst &Inst,
4561  const OperandVector &Operands,
4562  const SMLoc &IDLoc) {
4563  assert(Enc == SIInstrFlags::FLAT || Enc == SIInstrFlags::MUBUF);
4564 
4565  // Exclude cases when there are separate DMA opcodes.
4566  // In these cases, incorrect opcode selection is not possible.
4567  if (Enc == SIInstrFlags::FLAT && isGFX940())
4568  return true;
4569  if (Enc == SIInstrFlags::MUBUF && isGFX11Plus())
4570  return true;
4571 
4572  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4573  if ((TSFlags & (SIInstrFlags::VALU | Enc)) !=
4574  (SIInstrFlags::VALU | Enc))
4575  return true;
4576  // This is FLAT/MUBUF LDS DMA.
4577 
4578  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4579  StringRef CStr(S.getPointer());
4580  if (!CStr.startswith("lds")) {
4581  // This is incorrectly selected LDS DMA version of a FLAT/MUBUF load
4582  // opcode. And LDS version should have 'lds' modifier, but it follows
4583  // optional operands so its absense is ignored by the matcher.
4584  Error(IDLoc, "missing dst operand or lds modifier");
4585  return false;
4586  }
4587 
4588  return true;
4589 }
4590 
4591 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4592  if (!isGFX11Plus())
4593  return true;
4594  for (auto &Operand : Operands) {
4595  if (!Operand->isReg())
4596  continue;
4597  unsigned Reg = Operand->getReg();
4598  if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4599  Error(getRegLoc(Reg, Operands),
4600  "execz and vccz are not supported on this GPU");
4601  return false;
4602  }
4603  }
4604  return true;
4605 }
4606 
4607 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4608  const SMLoc &IDLoc,
4609  const OperandVector &Operands) {
4610  if (auto ErrMsg = validateLdsDirect(Inst)) {
4611  Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4612  return false;
4613  }
4614  if (!validateSOPLiteral(Inst)) {
4615  Error(getLitLoc(Operands),
4616  "only one unique literal operand is allowed");
4617  return false;
4618  }
4619  if (!validateVOPLiteral(Inst, Operands)) {
4620  return false;
4621  }
4622  if (!validateConstantBusLimitations(Inst, Operands)) {
4623  return false;
4624  }
4625  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4626  return false;
4627  }
4628  if (!validateIntClampSupported(Inst)) {
4629  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4630  "integer clamping is not supported on this GPU");
4631  return false;
4632  }
4633  if (!validateOpSel(Inst)) {
4634  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4635  "invalid op_sel operand");
4636  return false;
4637  }
4638  if (!validateDPP(Inst, Operands)) {
4639  return false;
4640  }
4641  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4642  if (!validateMIMGD16(Inst)) {
4643  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4644  "d16 modifier is not supported on this GPU");
4645  return false;
4646  }
4647  if (!validateMIMGDim(Inst)) {
4648  Error(IDLoc, "dim modifier is required on this GPU");
4649  return false;
4650  }
4651  if (!validateMIMGMSAA(Inst)) {
4652  Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4653  "invalid dim; must be MSAA type");
4654  return false;
4655  }
4656  if (!validateMIMGDataSize(Inst, IDLoc)) {
4657  return false;
4658  }
4659  if (!validateMIMGAddrSize(Inst)) {
4660  Error(IDLoc,
4661  "image address size does not match dim and a16");
4662  return false;
4663  }
4664  if (!validateMIMGAtomicDMask(Inst)) {
4665  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4666  "invalid atomic image dmask");
4667  return false;
4668  }
4669  if (!validateMIMGGatherDMask(Inst)) {
4670  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4671  "invalid image_gather dmask: only one bit must be set");
4672  return false;
4673  }
4674  if (!validateMovrels(Inst, Operands)) {
4675  return false;
4676  }
4677  if (!validateFlatOffset(Inst, Operands)) {
4678  return false;
4679  }
4680  if (!validateSMEMOffset(Inst, Operands)) {
4681  return false;
4682  }
4683  if (!validateMAIAccWrite(Inst, Operands)) {
4684  return false;
4685  }
4686  if (!validateMAISrc2(Inst, Operands)) {
4687  return false;
4688  }
4689  if (!validateMFMA(Inst, Operands)) {
4690  return false;
4691  }
4692  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4693  return false;
4694  }
4695 
4696  if (!validateAGPRLdSt(Inst)) {
4697  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4698  ? "invalid register class: data and dst should be all VGPR or AGPR"
4699  : "invalid register class: agpr loads and stores not supported on this GPU"
4700  );
4701  return false;
4702  }
4703  if (!validateVGPRAlign(Inst)) {
4704  Error(IDLoc,
4705  "invalid register class: vgpr tuples must be 64 bit aligned");
4706  return false;
4707  }
4708  if (!validateGWS(Inst, Operands)) {
4709  return false;
4710  }
4711 
4712  if (!validateBLGP(Inst, Operands)) {
4713  return false;
4714  }
4715 
4716  if (!validateDivScale(Inst)) {
4717  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4718  return false;
4719  }
4720  if (!validateExeczVcczOperands(Operands)) {
4721  return false;
4722  }
4723 
4724  if (!validateLdsDMA(SIInstrFlags::FLAT, Inst, Operands, IDLoc)) {
4725  return false;
4726  }
4727 
4728  if (!validateLdsDMA(SIInstrFlags::MUBUF, Inst, Operands, IDLoc)) {
4729  return false;
4730  }
4731 
4732  return true;
4733 }
4734 
4735 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4736  const FeatureBitset &FBS,
4737  unsigned VariantID = 0);
4738 
4739 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4740  const FeatureBitset &AvailableFeatures,
4741  unsigned VariantID);
4742 
4743 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4744  const FeatureBitset &FBS) {
4745  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4746 }
4747 
4748 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4749  const FeatureBitset &FBS,
4750  ArrayRef<unsigned> Variants) {
4751  for (auto Variant : Variants) {
4752  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4753  return true;
4754  }
4755 
4756  return false;
4757 }
4758 
4759 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4760  const SMLoc &IDLoc) {
4761  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4762 
4763  // Check if requested instruction variant is supported.
4764  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4765  return false;
4766 
4767  // This instruction is not supported.
4768  // Clear any other pending errors because they are no longer relevant.
4769  getParser().clearPendingErrors();
4770 
4771  // Requested instruction variant is not supported.
4772  // Check if any other variants are supported.
4773  StringRef VariantName = getMatchedVariantName();
4774  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4775  return Error(IDLoc,
4776  Twine(VariantName,
4777  " variant of this instruction is not supported"));
4778  }
4779 
4780  // Finally check if this instruction is supported on any other GPU.
4781  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4782  return Error(IDLoc, "instruction not supported on this GPU");
4783  }
4784 
4785  // Instruction not supported on any GPU. Probably a typo.
4786  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4787  return Error(IDLoc, "invalid instruction" + Suggestion);
4788 }
4789 
4790 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4792  MCStreamer &Out,
4794  bool MatchingInlineAsm) {
4795  MCInst Inst;
4796  unsigned Result = Match_Success;
4797  for (auto Variant : getMatchedVariants()) {
4798  uint64_t EI;
4799  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4800  Variant);
4801  // We order match statuses from least to most specific. We use most specific
4802  // status as resulting
4803  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4804  if ((R == Match_Success) ||
4805  (R == Match_PreferE32) ||
4806  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4807  (R == Match_InvalidOperand && Result != Match_MissingFeature
4808  && Result != Match_PreferE32) ||
4809  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4810  && Result != Match_MissingFeature
4811  && Result != Match_PreferE32)) {
4812  Result = R;
4813  ErrorInfo = EI;
4814  }
4815  if (R == Match_Success)
4816  break;
4817  }
4818 
4819  if (Result == Match_Success) {
4820  if (!validateInstruction(Inst, IDLoc, Operands)) {
4821  return true;
4822  }
4823  Inst.setLoc(IDLoc);
4824  Out.emitInstruction(Inst, getSTI());
4825  return false;
4826  }
4827 
4828  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4829  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4830  return true;
4831  }
4832 
4833  switch (Result) {
4834  default: break;
4835  case Match_MissingFeature:
4836  // It has been verified that the specified instruction
4837  // mnemonic is valid. A match was found but it requires
4838  // features which are not supported on this GPU.
4839  return Error(IDLoc, "operands are not valid for this GPU or mode");
4840 
4841  case Match_InvalidOperand: {
4842  SMLoc ErrorLoc = IDLoc;
4843  if (ErrorInfo != ~0ULL) {
4844  if (ErrorInfo >= Operands.size()) {
4845  return Error(IDLoc, "too few operands for instruction");
4846  }
4847  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4848  if (ErrorLoc == SMLoc())
4849  ErrorLoc = IDLoc;
4850  }
4851  return Error(ErrorLoc, "invalid operand for instruction");
4852  }
4853 
4854  case Match_PreferE32:
4855  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4856  "should be encoded as e32");
4857  case Match_MnemonicFail:
4858  llvm_unreachable("Invalid instructions should have been handled already");
4859  }
4860  llvm_unreachable("Implement any new match types added!");
4861 }
4862 
4863 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4864  int64_t Tmp = -1;
4865  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4866  return true;
4867  }
4868  if (getParser().parseAbsoluteExpression(Tmp)) {
4869  return true;
4870  }
4871  Ret = static_cast<uint32_t>(Tmp);
4872  return false;
4873 }
4874 
4875 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4876  uint32_t &Minor) {
4877  if (ParseAsAbsoluteExpression(Major))
4878  return TokError("invalid major version");
4879 
4880  if (!trySkipToken(AsmToken::Comma))
4881  return TokError("minor version number required, comma expected");
4882 
4883  if (ParseAsAbsoluteExpression(Minor))
4884  return TokError("invalid minor version");
4885 
4886  return false;
4887 }
4888 
4889 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4890  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4891  return TokError("directive only supported for amdgcn architecture");
4892 
4893  std::string TargetIDDirective;
4894  SMLoc TargetStart = getTok().getLoc();
4895  if (getParser().parseEscapedString(TargetIDDirective))
4896  return true;
4897 
4898  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4899  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4900  return getParser().Error(TargetRange.Start,
4901  (Twine(".amdgcn_target directive's target id ") +
4902  Twine(TargetIDDirective) +
4903  Twine(" does not match the specified target id ") +
4904  Twine(getTargetStreamer().getTargetID()->toString())).str());
4905 
4906  return false;
4907 }
4908 
4909 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4910  return Error(Range.Start, "value out of range", Range);
4911 }
4912 
4913 bool AMDGPUAsmParser::calculateGPRBlocks(
4914  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4915  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4916  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4917  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4918  // TODO(scott.linder): These calculations are duplicated from
4919  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4920  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4921 
4922  unsigned NumVGPRs = NextFreeVGPR;
4923  unsigned NumSGPRs = NextFreeSGPR;
4924 
4925  if (Version.Major >= 10)
4926  NumSGPRs = 0;
4927  else {
4928  unsigned MaxAddressableNumSGPRs =
4930 
4931  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4932  NumSGPRs > MaxAddressableNumSGPRs)
4933  return OutOfRangeError(SGPRRange);
4934 
4935  NumSGPRs +=
4936  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4937 
4938  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4939  NumSGPRs > MaxAddressableNumSGPRs)
4940  return OutOfRangeError(SGPRRange);
4941 
4942  if (Features.test(FeatureSGPRInitBug))
4944  }
4945 
4946  VGPRBlocks =
4947  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4948  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4949 
4950  return false;
4951 }
4952 
4953 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4954  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4955  return TokError("directive only supported for amdgcn architecture");
4956 
4957  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4958  return TokError("directive only supported for amdhsa OS");
4959 
4960  StringRef KernelName;
4961  if (getParser().parseIdentifier(KernelName))
4962  return true;
4963 
4965 
4966  StringSet<> Seen;
4967 
4968  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4969 
4970  SMRange VGPRRange;
4971  uint64_t NextFreeVGPR = 0;
4972  uint64_t AccumOffset = 0;
4973  uint64_t SharedVGPRCount = 0;
4974  SMRange SGPRRange;
4975  uint64_t NextFreeSGPR = 0;
4976 
4977  // Count the number of user SGPRs implied from the enabled feature bits.
4978  unsigned ImpliedUserSGPRCount = 0;
4979 
4980  // Track if the asm explicitly contains the directive for the user SGPR
4981  // count.
4982  Optional<unsigned> ExplicitUserSGPRCount;
4983  bool ReserveVCC = true;
4984  bool ReserveFlatScr = true;
4985  Optional<bool> EnableWavefrontSize32;
4986 
4987  while (true) {
4988  while (trySkipToken(AsmToken::EndOfStatement));
4989 
4990  StringRef ID;
4991  SMRange IDRange = getTok().getLocRange();
4992  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4993  return true;
4994 
4995  if (ID == ".end_amdhsa_kernel")
4996  break;
4997 
4998  if (!Seen.insert(ID).second)
4999  return TokError(".amdhsa_ directives cannot be repeated");
5000 
5001  SMLoc ValStart = getLoc();
5002  int64_t IVal;
5003  if (getParser().parseAbsoluteExpression(IVal))
5004  return true;
5005  SMLoc ValEnd = getLoc();
5006  SMRange ValRange = SMRange(ValStart, ValEnd);
5007 
5008  if (IVal < 0)
5009  return OutOfRangeError(ValRange);
5010 
5011  uint64_t Val = IVal;
5012 
5013 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5014  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5015  return OutOfRangeError(RANGE); \
5016  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5017 
5018  if (ID == ".amdhsa_group_segment_fixed_size") {
5019  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5020  return OutOfRangeError(ValRange);
5021  KD.group_segment_fixed_size = Val;
5022  } else if (ID == ".amdhsa_private_segment_fixed_size") {
5023  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5024  return OutOfRangeError(ValRange);
5025  KD.private_segment_fixed_size = Val;
5026  } else if (ID == ".amdhsa_kernarg_size") {
5027  if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5028  return OutOfRangeError(ValRange);
5029  KD.kernarg_size = Val;
5030  } else if (ID == ".amdhsa_user_sgpr_count") {
5031  ExplicitUserSGPRCount = Val;
5032  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5034  return Error(IDRange.Start,
5035  "directive is not supported with architected flat scratch",
5036  IDRange);
5038  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5039  Val, ValRange);
5040  if (Val)
5041  ImpliedUserSGPRCount += 4;
5042  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5044  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5045  ValRange);
5046  if (Val)
5047  ImpliedUserSGPRCount += 2;
5048  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5050  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5051  ValRange);
5052  if (Val)
5053  ImpliedUserSGPRCount += 2;
5054  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5056  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5057  Val, ValRange);
5058  if (Val)
5059  ImpliedUserSGPRCount += 2;
5060  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5062  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5063  ValRange);
5064  if (Val)
5065  ImpliedUserSGPRCount += 2;
5066  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5068  return Error(IDRange.Start,
5069  "directive is not supported with architected flat scratch",
5070  IDRange);
5072  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5073  ValRange);
5074  if (Val)
5075  ImpliedUserSGPRCount += 2;
5076  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5078  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5079  Val, ValRange);
5080  if (Val)
5081  ImpliedUserSGPRCount += 1;
5082  } else if (ID == ".amdhsa_wavefront_size32") {
5083  if (IVersion.Major < 10)
5084  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5085  EnableWavefrontSize32 = Val;
5087  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5088  Val, ValRange);
5089  } else if (ID == ".amdhsa_uses_dynamic_stack") {
5091  KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5092  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5094  return Error(IDRange.Start,
5095  "directive is not supported with architected flat scratch",
5096  IDRange);
5098  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5099  } else if (ID == ".amdhsa_enable_private_segment") {
5101  return Error(
5102  IDRange.Start,
5103  "directive is not supported without architected flat scratch",
5104  IDRange);
5106  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5107  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5109  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5110  ValRange);
5111  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5113  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5114  ValRange);
5115  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5117  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5118  ValRange);
5119  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5121  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5122  ValRange);
5123  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5125  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5126  ValRange);
5127  } else if (ID == ".amdhsa_next_free_vgpr") {
5128  VGPRRange = ValRange;
5129  NextFreeVGPR = Val;
5130  } else if (ID == ".amdhsa_next_free_sgpr") {
5131  SGPRRange = ValRange;
5132  NextFreeSGPR = Val;
5133  } else if (ID == ".amdhsa_accum_offset") {
5134  if (!isGFX90A())
5135  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5136  AccumOffset = Val;
5137  } else if (ID == ".amdhsa_reserve_vcc") {
5138  if (!isUInt<1>(Val))
5139  return OutOfRangeError(ValRange);
5140  ReserveVCC = Val;
5141  } else if (ID == ".amdhsa_reserve_flat_scratch") {
5142  if (IVersion.Major < 7)
5143  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5145  return Error(IDRange.Start,
5146  "directive is not supported with architected flat scratch",
5147  IDRange);
5148  if (!isUInt<1>(Val))
5149  return OutOfRangeError(ValRange);
5150  ReserveFlatScr = Val;
5151  } else if (ID == ".amdhsa_reserve_xnack_mask") {
5152  if (IVersion.Major < 8)
5153  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5154  if (!isUInt<1>(Val))
5155  return OutOfRangeError(ValRange);
5156  if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5157  return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5158  IDRange);
5159  } else if (ID == ".amdhsa_float_round_mode_32") {
5161  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5162  } else if (ID == ".amdhsa_float_round_mode_16_64") {
5164  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5165  } else if (ID == ".amdhsa_float_denorm_mode_32") {
5167  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5168  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5170  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5171  ValRange);
5172  } else if (ID == ".amdhsa_dx10_clamp") {
5174  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5175  } else if (ID == ".amdhsa_ieee_mode") {
5176  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5177  Val, ValRange);
5178  } else if (ID == ".amdhsa_fp16_overflow") {
5179  if (IVersion.Major < 9)
5180  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5181  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5182  ValRange);
5183  } else if (ID == ".amdhsa_tg_split") {
5184  if (!isGFX90A())
5185  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5186  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5187  ValRange);
5188  } else if (ID == ".amdhsa_workgroup_processor_mode") {
5189  if (IVersion.Major < 10)
5190  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5191  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5192  ValRange);
5193  } else if (ID == ".amdhsa_memory_ordered") {
5194  if (IVersion.Major < 10)
5195  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5196  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5197  ValRange);
5198  } else if (ID == ".amdhsa_forward_progress") {
5199  if (IVersion.Major < 10)
5200  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5201  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5202  ValRange);
5203  } else if (ID == ".amdhsa_shared_vgpr_count") {
5204  if (IVersion.Major < 10)
5205  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5206  SharedVGPRCount = Val;
5208  COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5209  ValRange);
5210  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5212  KD.compute_pgm_rsrc2,
5213  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5214  ValRange);
5215  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5217  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5218  Val, ValRange);
5219  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5221  KD.compute_pgm_rsrc2,
5222  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5223  ValRange);
5224  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5226  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5227  Val, ValRange);
5228  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5230  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5231  Val, ValRange);
5232  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5234  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5235  Val, ValRange);
5236  } else if (ID == ".amdhsa_exception_int_div_zero") {
5238  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5239  Val, ValRange);
5240  } else {
5241  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5242  }
5243 
5244 #undef PARSE_BITS_ENTRY
5245  }
5246 
5247  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5248  return TokError(".amdhsa_next_free_vgpr directive is required");
5249 
5250  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5251  return TokError(".amdhsa_next_free_sgpr directive is required");
5252 
5253  unsigned VGPRBlocks;
5254  unsigned SGPRBlocks;
5255  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5256  getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5257  EnableWavefrontSize32, NextFreeVGPR,
5258  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5259  SGPRBlocks))
5260  return true;
5261 
5262  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5263  VGPRBlocks))
5264  return OutOfRangeError(VGPRRange);
5266  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5267 
5268  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5269  SGPRBlocks))
5270  return OutOfRangeError(SGPRRange);
5272  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5273  SGPRBlocks);
5274 
5275  if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5276  return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5277  "enabled user SGPRs");
5278 
5279  unsigned UserSGPRCount =
5280  ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5281 
5282  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5283  return TokError("too many user SGPRs enabled");
5284  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5285  UserSGPRCount);
5286 
5287  if (isGFX90A()) {
5288  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5289  return TokError(".amdhsa_accum_offset directive is required");
5290  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5291  return TokError("accum_offset should be in range [4..256] in "
5292  "increments of 4");
5293  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5294  return TokError("accum_offset exceeds total VGPR allocation");
5295  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5296  (AccumOffset / 4 - 1));
5297  }
5298 
5299  if (IVersion.Major == 10) {
5300  // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5301  if (SharedVGPRCount && EnableWavefrontSize32) {
5302  return TokError("shared_vgpr_count directive not valid on "
5303  "wavefront size 32");
5304  }
5305  if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5306  return TokError("shared_vgpr_count*2 + "
5307  "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5308  "exceed 63\n");
5309  }
5310  }
5311 
5312  getTargetStreamer().EmitAmdhsaKernelDescriptor(
5313  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5314  ReserveFlatScr);
5315  return false;
5316 }
5317 
5318 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5319  uint32_t Major;
5320  uint32_t Minor;
5321 
5322  if (ParseDirectiveMajorMinor(Major, Minor))
5323  return true;
5324 
5325  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5326  return false;
5327 }
5328 
5329 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5330  uint32_t Major;
5331  uint32_t Minor;
5332  uint32_t Stepping;
5333  StringRef VendorName;
5334  StringRef ArchName;
5335 
5336  // If this directive has no arguments, then use the ISA version for the
5337  // targeted GPU.
5338  if (isToken(AsmToken::EndOfStatement)) {
5339  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5340  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5341  ISA.Stepping,
5342  "AMD", "AMDGPU");
5343  return false;
5344  }
5345 
5346  if (ParseDirectiveMajorMinor(Major, Minor))
5347  return true;
5348 
5349  if (!trySkipToken(AsmToken::Comma))
5350  return TokError("stepping version number required, comma expected");
5351 
5352  if (ParseAsAbsoluteExpression(Stepping))
5353  return TokError("invalid stepping version");
5354 
5355  if (!trySkipToken(AsmToken::Comma))
5356  return TokError("vendor name required, comma expected");
5357 
5358  if (!parseString(VendorName, "invalid vendor name"))
5359  return true;
5360 
5361  if (!trySkipToken(AsmToken::Comma))
5362  return TokError("arch name required, comma expected");
5363 
5364  if (!parseString(ArchName, "invalid arch name"))
5365  return true;
5366 
5367  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5368  VendorName, ArchName);
5369  return false;
5370 }
5371 
5372 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5373  amd_kernel_code_t &Header) {
5374  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5375  // assembly for backwards compatibility.
5376  if (ID == "max_scratch_backing_memory_byte_size") {
5377  Parser.eatToEndOfStatement();
5378  return false;
5379  }
5380 
5381  SmallString<40> ErrStr;
5382  raw_svector_ostream Err(ErrStr);
5383  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5384  return TokError(Err.str());
5385  }
5386  Lex();
5387 
5388  if (ID == "enable_wavefront_size32") {
5390  if (!isGFX10Plus())
5391  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5392  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5393  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5394  } else {
5395  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5396  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5397  }
5398  }
5399 
5400  if (ID == "wavefront_size") {
5401  if (Header.wavefront_size == 5) {
5402  if (!isGFX10Plus())
5403  return TokError("wavefront_size=5 is only allowed on GFX10+");
5404  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5405  return TokError("wavefront_size=5 requires +WavefrontSize32");
5406  } else if (Header.wavefront_size == 6) {
5407  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5408  return TokError("wavefront_size=6 requires +WavefrontSize64");
5409  }
5410  }
5411 
5412  if (ID == "enable_wgp_mode") {
5414  !isGFX10Plus())
5415  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5416  }
5417 
5418  if (ID == "enable_mem_ordered") {
5420  !isGFX10Plus())
5421  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5422  }
5423 
5424  if (ID == "enable_fwd_progress") {
5426  !isGFX10Plus())
5427  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5428  }
5429 
5430  return false;
5431 }
5432 
5433 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5434  amd_kernel_code_t Header;
5435  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5436 
5437  while (true) {
5438  // Lex EndOfStatement. This is in a while loop, because lexing a comment
5439  // will set the current token to EndOfStatement.
5440  while(trySkipToken(AsmToken::EndOfStatement));
5441 
5442  StringRef ID;
5443  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5444  return true;
5445 
5446  if (ID == ".end_amd_kernel_code_t")
5447  break;
5448 
5449  if (ParseAMDKernelCodeTValue(ID, Header))
5450  return true;
5451  }
5452 
5453  getTargetStreamer().EmitAMDKernelCodeT(Header);
5454 
5455  return false;
5456 }
5457 
5458 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5459  StringRef KernelName;
5460  if (!parseId(KernelName, "expected symbol name"))
5461  return true;
5462 
5463  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5465 
5466  KernelScope.initialize(getContext());
5467  return false;
5468 }
5469 
5470 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5471  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5472  return Error(getLoc(),
5473  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5474  "architectures");
5475  }
5476 
5477  auto TargetIDDirective = getLexer().getTok().getStringContents();
5478  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5479  return Error(getParser().getTok().getLoc(), "target id must match options");
5480 
5481  getTargetStreamer().EmitISAVersion();
5482  Lex();
5483 
5484  return false;
5485 }
5486 
5487 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5488  const char *AssemblerDirectiveBegin;
5489  const char *AssemblerDirectiveEnd;
5491  isHsaAbiVersion3AndAbove(&getSTI())
5492  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5494  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5495  HSAMD::AssemblerDirectiveEnd);
5496 
5497  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5498  return Error(getLoc(),
5499  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5500  "not available on non-amdhsa OSes")).str());
5501  }
5502 
5503  std::string HSAMetadataString;
5504  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5505  HSAMetadataString))
5506  return true;
5507 
5508  if (isHsaAbiVersion3AndAbove(&getSTI())) {
5509  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5510  return Error(getLoc(), "invalid HSA metadata");
5511  } else {
5512  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5513  return Error(getLoc(), "invalid HSA metadata");
5514  }
5515 
5516  return false;
5517 }
5518 
5519 /// Common code to parse out a block of text (typically YAML) between start and
5520 /// end directives.
5521 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5522  const char *AssemblerDirectiveEnd,
5523  std::string &CollectString) {
5524 
5525  raw_string_ostream CollectStream(CollectString);
5526 
5527  getLexer().setSkipSpace(false);
5528 
5529  bool FoundEnd = false;
5530  while (!isToken(AsmToken::Eof)) {
5531  while (isToken(AsmToken::Space)) {
5532  CollectStream << getTokenStr();
5533  Lex();
5534  }
5535 
5536  if (trySkipId(AssemblerDirectiveEnd)) {
5537  FoundEnd = true;
5538  break;
5539  }
5540 
5541  CollectStream << Parser.parseStringToEndOfStatement()
5542  << getContext().getAsmInfo()->getSeparatorString();
5543 
5544  Parser.eatToEndOfStatement();
5545  }
5546 
5547  getLexer().setSkipSpace(true);
5548 
5549  if (isToken(AsmToken::Eof) && !FoundEnd) {
5550  return TokError(Twine("expected directive ") +
5551  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5552  }
5553 
5554  CollectStream.flush();
5555  return false;
5556 }
5557 
5558 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5559 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5560  std::string String;
5561  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5563  return true;
5564 
5565  auto PALMetadata = getTargetStreamer().getPALMetadata();
5566  if (!PALMetadata->setFromString(String))
5567  return Error(getLoc(), "invalid PAL metadata");
5568  return false;
5569 }
5570 
5571 /// Parse the assembler directive for old linear-format PAL metadata.
5572 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5573  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5574  return Error(getLoc(),
5575  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5576  "not available on non-amdpal OSes")).str());
5577  }
5578 
5579  auto PALMetadata = getTargetStreamer().getPALMetadata();
5580  PALMetadata->setLegacy();
5581  for (;;) {
5582  uint32_t Key, Value;
5583  if (ParseAsAbsoluteExpression(Key)) {
5584  return TokError(Twine("invalid value in ") +
5586  }
5587  if (!trySkipToken(AsmToken::Comma)) {
5588  return TokError(Twine("expected an even number of values in ") +
5590  }
5591  if (ParseAsAbsoluteExpression(Value)) {
5592  return TokError(Twine("invalid value in ") +
5594  }
5595  PALMetadata->setRegister(Key, Value);
5596  if (!trySkipToken(AsmToken::Comma))
5597  break;
5598  }
5599  return false;
5600 }
5601 
5602 /// ParseDirectiveAMDGPULDS
5603 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5604 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5605  if (getParser().checkForValidSection())
5606  return true;
5607 
5608  StringRef Name;
5609  SMLoc NameLoc = getLoc();
5610  if (getParser().parseIdentifier(Name))
5611  return TokError("expected identifier in directive");
5612 
5613  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5614  if (parseToken(AsmToken::Comma, "expected ','"))
5615  return true;
5616 
5617  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5618 
5619  int64_t Size;
5620  SMLoc SizeLoc = getLoc();
5621  if (getParser().parseAbsoluteExpression(Size))
5622  return true;
5623  if (Size < 0)
5624  return Error(SizeLoc, "size must be non-negative");
5625  if (Size > LocalMemorySize)
5626  return Error(SizeLoc, "size is too large");
5627 
5628  int64_t Alignment = 4;
5629  if (trySkipToken(AsmToken::Comma)) {
5630  SMLoc AlignLoc = getLoc();
5631  if (getParser().parseAbsoluteExpression(Alignment))
5632  return true;
5633  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5634  return Error(AlignLoc, "alignment must be a power of two");
5635 
5636  // Alignment larger than the size of LDS is possible in theory, as long
5637  // as the linker manages to place to symbol at address 0, but we do want
5638  // to make sure the alignment fits nicely into a 32-bit integer.
5639  if (Alignment >= 1u << 31)
5640  return Error(AlignLoc, "alignment is too large");
5641  }
5642 
5643  if (parseEOL())
5644  return true;
5645 
5646  Symbol->redefineIfPossible();
5647  if (!Symbol->isUndefined())
5648  return Error(NameLoc, "invalid symbol redefinition");
5649 
5650  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5651  return false;
5652 }
5653 
5654 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5655  StringRef IDVal = DirectiveID.getString();
5656 
5657  if (isHsaAbiVersion3AndAbove(&getSTI())) {
5658  if (IDVal == ".amdhsa_kernel")
5659  return ParseDirectiveAMDHSAKernel();
5660 
5661  // TODO: Restructure/combine with PAL metadata directive.
5663  return ParseDirectiveHSAMetadata();
5664  } else {
5665  if (IDVal == ".hsa_code_object_version")
5666  return ParseDirectiveHSACodeObjectVersion();
5667 
5668  if (IDVal == ".hsa_code_object_isa")
5669  return ParseDirectiveHSACodeObjectISA();
5670 
5671  if (IDVal == ".amd_kernel_code_t")
5672  return ParseDirectiveAMDKernelCodeT();
5673 
5674  if (IDVal == ".amdgpu_hsa_kernel")
5675  return ParseDirectiveAMDGPUHsaKernel();
5676 
5677  if (IDVal == ".amd_amdgpu_isa")
5678  return ParseDirectiveISAVersion();
5679 
5681  return ParseDirectiveHSAMetadata();
5682  }
5683 
5684  if (IDVal == ".amdgcn_target")
5685  return ParseDirectiveAMDGCNTarget();
5686 
5687  if (IDVal == ".amdgpu_lds")
5688  return ParseDirectiveAMDGPULDS();
5689 
5690  if (IDVal == PALMD::AssemblerDirectiveBegin)
5691  return ParseDirectivePALMetadataBegin();
5692 
5693  if (IDVal == PALMD::AssemblerDirective)
5694  return ParseDirectivePALMetadata();
5695 
5696  return true;
5697 }
5698 
5699 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5700  unsigned RegNo) {
5701 
5702  if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5703  return isGFX9Plus();
5704 
5705  // GFX10+ has 2 more SGPRs 104 and 105.
5706  if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5707  return hasSGPR104_SGPR105();
5708 
5709  switch (RegNo) {
5710  case AMDGPU::SRC_SHARED_BASE:
5711  case AMDGPU::SRC_SHARED_LIMIT:
5712  case AMDGPU::SRC_PRIVATE_BASE:
5713  case AMDGPU::SRC_PRIVATE_LIMIT:
5714  return isGFX9Plus();
5715  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5716  return isGFX9Plus() && !isGFX11Plus();
5717  case AMDGPU::TBA:
5718  case AMDGPU::TBA_LO:
5719  case AMDGPU::TBA_HI:
5720  case AMDGPU::TMA:
5721  case AMDGPU::TMA_LO:
5722  case AMDGPU::TMA_HI:
5723  return !isGFX9Plus();
5724  case AMDGPU::XNACK_MASK:
5725  case AMDGPU::XNACK_MASK_LO:
5726  case AMDGPU::XNACK_MASK_HI:
5727  return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5728  case AMDGPU::SGPR_NULL:
5729  return isGFX10Plus();
5730  default:
5731  break;
5732  }
5733 
5734  if (isCI())
5735  return true;
5736 
5737  if (isSI() || isGFX10Plus()) {
5738  // No flat_scr on SI.
5739  // On GFX10Plus flat scratch is not a valid register operand and can only be
5740  // accessed with s_setreg/s_getreg.
5741  switch (RegNo) {
5742  case AMDGPU::FLAT_SCR:
5743  case AMDGPU::FLAT_SCR_LO:
5744  case AMDGPU::FLAT_SCR_HI:
5745  return false;
5746  default:
5747  return true;
5748  }
5749  }
5750 
5751  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5752  // SI/CI have.
5753  if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5754  return hasSGPR102_SGPR103();
5755 
5756  return true;
5757 }
5758 
5760 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5761  OperandMode Mode) {
5762  OperandMatchResultTy ResTy = parseVOPD(Operands);
5763  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5764  isToken(AsmToken::EndOfStatement))
5765  return ResTy;
5766 
5767  // Try to parse with a custom parser
5768  ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5769 
5770  // If we successfully parsed the operand or if there as an error parsing,
5771  // we are done.
5772  //
5773  // If we are parsing after we reach EndOfStatement then this means we
5774  // are appending default values to the Operands list. This is only done
5775  // by custom parser, so we shouldn't continue on to the generic parsing.
5776  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5777  isToken(AsmToken::EndOfStatement))
5778  return ResTy;
5779 
5780  SMLoc RBraceLoc;
5781  SMLoc LBraceLoc = getLoc();
5782  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5783  unsigned Prefix = Operands.size();
5784 
5785  for (;;) {
5786  auto Loc = getLoc();
5787  ResTy = parseReg(Operands);
5788  if (ResTy == MatchOperand_NoMatch)
5789  Error(Loc, "expected a register");
5790  if (ResTy != MatchOperand_Success)
5791  return MatchOperand_ParseFail;
5792 
5793  RBraceLoc = getLoc();
5794  if (trySkipToken(AsmToken::RBrac))
5795  break;
5796 
5797  if (!skipToken(AsmToken::Comma,
5798  "expected a comma or a closing square bracket")) {
5799  return MatchOperand_ParseFail;
5800  }
5801  }
5802 
5803  if (Operands.size() - Prefix > 1) {
5804  Operands.insert(Operands.begin() + Prefix,
5805  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5806  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5807  }
5808 
5809  return MatchOperand_Success;
5810  }
5811 
5812  return parseRegOrImm(Operands);
5813 }
5814 
5815 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5816  // Clear any forced encodings from the previous instruction.
5817  setForcedEncodingSize(0);
5818  setForcedDPP(false);
5819  setForcedSDWA(false);
5820 
5821  if (Name.endswith("_e64_dpp")) {
5822  setForcedDPP(true);
5823  setForcedEncodingSize(64);
5824  return Name.substr(0, Name.size() - 8);
5825  } else if (Name.endswith("_e64")) {
5826  setForcedEncodingSize(64);
5827  return Name.substr(0, Name.size() - 4);
5828  } else if (Name.endswith("_e32")) {
5829  setForcedEncodingSize(32);
5830  return Name.substr(0, Name.size() - 4);
5831  } else if (Name.endswith("_dpp")) {
5832  setForcedDPP(true);
5833  return Name.substr(0, Name.size() - 4);
5834  } else if (Name.endswith("_sdwa")) {
5835  setForcedSDWA(true);
5836  return Name.substr(0, Name.size() - 5);
5837  }
5838  return Name;
5839 }
5840 
5841 static void applyMnemonicAliases(StringRef &Mnemonic,
5842  const FeatureBitset &Features,
5843  unsigned VariantID);
5844 
5845 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5846  StringRef Name,
5847  SMLoc NameLoc, OperandVector &Operands) {
5848  // Add the instruction mnemonic
5849  Name = parseMnemonicSuffix(Name);
5850 
5851  // If the target architecture uses MnemonicAlias, call it here to parse
5852  // operands correctly.
5853  applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5854 
5855  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5856 
5857  bool IsMIMG = Name.startswith("image_");
5858 
5859  while (!trySkipToken(AsmToken::EndOfStatement)) {
5860  OperandMode Mode = OperandMode_Default;
5861  if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5862  Mode = OperandMode_NSA;
5863  CPolSeen = 0;
5864  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5865 
5866  if (Res != MatchOperand_Success) {
5867  checkUnsupportedInstruction(Name, NameLoc);
5868  if (!Parser.hasPendingError()) {
5869  // FIXME: use real operand location rather than the current location.
5870  StringRef Msg =
5871  (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5872  "not a valid operand.";
5873  Error(getLoc(), Msg);
5874  }
5875  while (!trySkipToken(AsmToken::EndOfStatement)) {
5876  lex();
5877  }
5878  return true;
5879  }
5880 
5881  // Eat the comma or space if there is one.
5882  trySkipToken(AsmToken::Comma);
5883  }
5884 
5885  return false;
5886 }
5887 
5888 //===----------------------------------------------------------------------===//
5889 // Utility functions
5890 //===----------------------------------------------------------------------===//
5891 
5893 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5894 
5895  if (!trySkipId(