LLVM  10.0.0svn
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
43 #include "llvm/MC/MCSymbol.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/SMLoc.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79  enum KindTy {
80  Token,
81  Immediate,
82  Register,
83  Expression
84  } Kind;
85 
86  SMLoc StartLoc, EndLoc;
87  const AMDGPUAsmParser *AsmParser;
88 
89 public:
90  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91  : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93  using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95  struct Modifiers {
96  bool Abs = false;
97  bool Neg = false;
98  bool Sext = false;
99 
100  bool hasFPModifiers() const { return Abs || Neg; }
101  bool hasIntModifiers() const { return Sext; }
102  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104  int64_t getFPModifiersOperand() const {
105  int64_t Operand = 0;
106  Operand |= Abs ? SISrcMods::ABS : 0u;
107  Operand |= Neg ? SISrcMods::NEG : 0u;
108  return Operand;
109  }
110 
111  int64_t getIntModifiersOperand() const {
112  int64_t Operand = 0;
113  Operand |= Sext ? SISrcMods::SEXT : 0u;
114  return Operand;
115  }
116 
117  int64_t getModifiersOperand() const {
118  assert(!(hasFPModifiers() && hasIntModifiers())
119  && "fp and int modifiers should not be used simultaneously");
120  if (hasFPModifiers()) {
121  return getFPModifiersOperand();
122  } else if (hasIntModifiers()) {
123  return getIntModifiersOperand();
124  } else {
125  return 0;
126  }
127  }
128 
129  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130  };
131 
132  enum ImmTy {
133  ImmTyNone,
134  ImmTyGDS,
135  ImmTyLDS,
136  ImmTyOffen,
137  ImmTyIdxen,
138  ImmTyAddr64,
139  ImmTyOffset,
140  ImmTyInstOffset,
141  ImmTyOffset0,
142  ImmTyOffset1,
143  ImmTyDLC,
144  ImmTyGLC,
145  ImmTySLC,
146  ImmTyTFE,
147  ImmTyD16,
148  ImmTyClampSI,
149  ImmTyOModSI,
150  ImmTyDPP8,
151  ImmTyDppCtrl,
152  ImmTyDppRowMask,
153  ImmTyDppBankMask,
154  ImmTyDppBoundCtrl,
155  ImmTyDppFi,
156  ImmTySdwaDstSel,
157  ImmTySdwaSrc0Sel,
158  ImmTySdwaSrc1Sel,
159  ImmTySdwaDstUnused,
160  ImmTyDMask,
161  ImmTyDim,
162  ImmTyUNorm,
163  ImmTyDA,
164  ImmTyR128A16,
165  ImmTyLWE,
166  ImmTyExpTgt,
167  ImmTyExpCompr,
168  ImmTyExpVM,
169  ImmTyFORMAT,
170  ImmTyHwreg,
171  ImmTyOff,
172  ImmTySendMsg,
173  ImmTyInterpSlot,
174  ImmTyInterpAttr,
175  ImmTyAttrChan,
176  ImmTyOpSel,
177  ImmTyOpSelHi,
178  ImmTyNegLo,
179  ImmTyNegHi,
180  ImmTySwizzle,
181  ImmTyGprIdxMode,
182  ImmTyHigh,
183  ImmTyBLGP,
184  ImmTyCBSZ,
185  ImmTyABID,
186  ImmTyEndpgm,
187  };
188 
189 private:
190  struct TokOp {
191  const char *Data;
192  unsigned Length;
193  };
194 
195  struct ImmOp {
196  int64_t Val;
197  ImmTy Type;
198  bool IsFPImm;
199  Modifiers Mods;
200  };
201 
202  struct RegOp {
203  unsigned RegNo;
204  Modifiers Mods;
205  };
206 
207  union {
208  TokOp Tok;
209  ImmOp Imm;
210  RegOp Reg;
211  const MCExpr *Expr;
212  };
213 
214 public:
215  bool isToken() const override {
216  if (Kind == Token)
217  return true;
218 
219  // When parsing operands, we can't always tell if something was meant to be
220  // a token, like 'gds', or an expression that references a global variable.
221  // In this case, we assume the string is an expression, and if we need to
222  // interpret is a token, then we treat the symbol name as the token.
223  return isSymbolRefExpr();
224  }
225 
226  bool isSymbolRefExpr() const {
227  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228  }
229 
230  bool isImm() const override {
231  return Kind == Immediate;
232  }
233 
234  bool isInlinableImm(MVT type) const;
235  bool isLiteralImm(MVT type) const;
236 
237  bool isRegKind() const {
238  return Kind == Register;
239  }
240 
241  bool isReg() const override {
242  return isRegKind() && !hasModifiers();
243  }
244 
245  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246  return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
247  }
248 
249  bool isRegOrImmWithInt16InputMods() const {
250  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251  }
252 
253  bool isRegOrImmWithInt32InputMods() const {
254  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
255  }
256 
257  bool isRegOrImmWithInt64InputMods() const {
258  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
259  }
260 
261  bool isRegOrImmWithFP16InputMods() const {
262  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
263  }
264 
265  bool isRegOrImmWithFP32InputMods() const {
266  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
267  }
268 
269  bool isRegOrImmWithFP64InputMods() const {
270  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
271  }
272 
273  bool isVReg() const {
274  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275  isRegClass(AMDGPU::VReg_64RegClassID) ||
276  isRegClass(AMDGPU::VReg_96RegClassID) ||
277  isRegClass(AMDGPU::VReg_128RegClassID) ||
278  isRegClass(AMDGPU::VReg_160RegClassID) ||
279  isRegClass(AMDGPU::VReg_256RegClassID) ||
280  isRegClass(AMDGPU::VReg_512RegClassID) ||
281  isRegClass(AMDGPU::VReg_1024RegClassID);
282  }
283 
284  bool isVReg32() const {
285  return isRegClass(AMDGPU::VGPR_32RegClassID);
286  }
287 
288  bool isVReg32OrOff() const {
289  return isOff() || isVReg32();
290  }
291 
292  bool isSDWAOperand(MVT type) const;
293  bool isSDWAFP16Operand() const;
294  bool isSDWAFP32Operand() const;
295  bool isSDWAInt16Operand() const;
296  bool isSDWAInt32Operand() const;
297 
298  bool isImmTy(ImmTy ImmT) const {
299  return isImm() && Imm.Type == ImmT;
300  }
301 
302  bool isImmModifier() const {
303  return isImm() && Imm.Type != ImmTyNone;
304  }
305 
306  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308  bool isDMask() const { return isImmTy(ImmTyDMask); }
309  bool isDim() const { return isImmTy(ImmTyDim); }
310  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311  bool isDA() const { return isImmTy(ImmTyDA); }
312  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313  bool isLWE() const { return isImmTy(ImmTyLWE); }
314  bool isOff() const { return isImmTy(ImmTyOff); }
315  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318  bool isOffen() const { return isImmTy(ImmTyOffen); }
319  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
324 
325  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326  bool isGDS() const { return isImmTy(ImmTyGDS); }
327  bool isLDS() const { return isImmTy(ImmTyLDS); }
328  bool isDLC() const { return isImmTy(ImmTyDLC); }
329  bool isGLC() const { return isImmTy(ImmTyGLC); }
330  bool isSLC() const { return isImmTy(ImmTySLC); }
331  bool isTFE() const { return isImmTy(ImmTyTFE); }
332  bool isD16() const { return isImmTy(ImmTyD16); }
333  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337  bool isFI() const { return isImmTy(ImmTyDppFi); }
338  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349  bool isHigh() const { return isImmTy(ImmTyHigh); }
350 
351  bool isMod() const {
352  return isClampSI() || isOModSI();
353  }
354 
355  bool isRegOrImm() const {
356  return isReg() || isImm();
357  }
358 
359  bool isRegClass(unsigned RCID) const;
360 
361  bool isInlineValue() const;
362 
363  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364  return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
365  }
366 
367  bool isSCSrcB16() const {
368  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
369  }
370 
371  bool isSCSrcV2B16() const {
372  return isSCSrcB16();
373  }
374 
375  bool isSCSrcB32() const {
376  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
377  }
378 
379  bool isSCSrcB64() const {
380  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
381  }
382 
383  bool isBoolReg() const;
384 
385  bool isSCSrcF16() const {
386  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
387  }
388 
389  bool isSCSrcV2F16() const {
390  return isSCSrcF16();
391  }
392 
393  bool isSCSrcF32() const {
394  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
395  }
396 
397  bool isSCSrcF64() const {
398  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
399  }
400 
401  bool isSSrcB32() const {
402  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
403  }
404 
405  bool isSSrcB16() const {
406  return isSCSrcB16() || isLiteralImm(MVT::i16);
407  }
408 
409  bool isSSrcV2B16() const {
410  llvm_unreachable("cannot happen");
411  return isSSrcB16();
412  }
413 
414  bool isSSrcB64() const {
415  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416  // See isVSrc64().
417  return isSCSrcB64() || isLiteralImm(MVT::i64);
418  }
419 
420  bool isSSrcF32() const {
421  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
422  }
423 
424  bool isSSrcF64() const {
425  return isSCSrcB64() || isLiteralImm(MVT::f64);
426  }
427 
428  bool isSSrcF16() const {
429  return isSCSrcB16() || isLiteralImm(MVT::f16);
430  }
431 
432  bool isSSrcV2F16() const {
433  llvm_unreachable("cannot happen");
434  return isSSrcF16();
435  }
436 
437  bool isSSrcOrLdsB32() const {
438  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439  isLiteralImm(MVT::i32) || isExpr();
440  }
441 
442  bool isVCSrcB32() const {
443  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
444  }
445 
446  bool isVCSrcB64() const {
447  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
448  }
449 
450  bool isVCSrcB16() const {
451  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
452  }
453 
454  bool isVCSrcV2B16() const {
455  return isVCSrcB16();
456  }
457 
458  bool isVCSrcF32() const {
459  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
460  }
461 
462  bool isVCSrcF64() const {
463  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
464  }
465 
466  bool isVCSrcF16() const {
467  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
468  }
469 
470  bool isVCSrcV2F16() const {
471  return isVCSrcF16();
472  }
473 
474  bool isVSrcB32() const {
475  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
476  }
477 
478  bool isVSrcB64() const {
479  return isVCSrcF64() || isLiteralImm(MVT::i64);
480  }
481 
482  bool isVSrcB16() const {
483  return isVCSrcF16() || isLiteralImm(MVT::i16);
484  }
485 
486  bool isVSrcV2B16() const {
487  return isVSrcB16() || isLiteralImm(MVT::v2i16);
488  }
489 
490  bool isVSrcF32() const {
491  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
492  }
493 
494  bool isVSrcF64() const {
495  return isVCSrcF64() || isLiteralImm(MVT::f64);
496  }
497 
498  bool isVSrcF16() const {
499  return isVCSrcF16() || isLiteralImm(MVT::f16);
500  }
501 
502  bool isVSrcV2F16() const {
503  return isVSrcF16() || isLiteralImm(MVT::v2f16);
504  }
505 
506  bool isVISrcB32() const {
507  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
508  }
509 
510  bool isVISrcB16() const {
511  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
512  }
513 
514  bool isVISrcV2B16() const {
515  return isVISrcB16();
516  }
517 
518  bool isVISrcF32() const {
519  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
520  }
521 
522  bool isVISrcF16() const {
523  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
524  }
525 
526  bool isVISrcV2F16() const {
527  return isVISrcF16() || isVISrcB32();
528  }
529 
530  bool isAISrcB32() const {
531  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
532  }
533 
534  bool isAISrcB16() const {
535  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
536  }
537 
538  bool isAISrcV2B16() const {
539  return isAISrcB16();
540  }
541 
542  bool isAISrcF32() const {
543  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
544  }
545 
546  bool isAISrcF16() const {
547  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
548  }
549 
550  bool isAISrcV2F16() const {
551  return isAISrcF16() || isAISrcB32();
552  }
553 
554  bool isAISrc_128B32() const {
555  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
556  }
557 
558  bool isAISrc_128B16() const {
559  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
560  }
561 
562  bool isAISrc_128V2B16() const {
563  return isAISrc_128B16();
564  }
565 
566  bool isAISrc_128F32() const {
567  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
568  }
569 
570  bool isAISrc_128F16() const {
571  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
572  }
573 
574  bool isAISrc_128V2F16() const {
575  return isAISrc_128F16() || isAISrc_128B32();
576  }
577 
578  bool isAISrc_512B32() const {
579  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
580  }
581 
582  bool isAISrc_512B16() const {
583  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
584  }
585 
586  bool isAISrc_512V2B16() const {
587  return isAISrc_512B16();
588  }
589 
590  bool isAISrc_512F32() const {
591  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
592  }
593 
594  bool isAISrc_512F16() const {
595  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
596  }
597 
598  bool isAISrc_512V2F16() const {
599  return isAISrc_512F16() || isAISrc_512B32();
600  }
601 
602  bool isAISrc_1024B32() const {
603  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
604  }
605 
606  bool isAISrc_1024B16() const {
607  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
608  }
609 
610  bool isAISrc_1024V2B16() const {
611  return isAISrc_1024B16();
612  }
613 
614  bool isAISrc_1024F32() const {
615  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
616  }
617 
618  bool isAISrc_1024F16() const {
619  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
620  }
621 
622  bool isAISrc_1024V2F16() const {
623  return isAISrc_1024F16() || isAISrc_1024B32();
624  }
625 
626  bool isKImmFP32() const {
627  return isLiteralImm(MVT::f32);
628  }
629 
630  bool isKImmFP16() const {
631  return isLiteralImm(MVT::f16);
632  }
633 
634  bool isMem() const override {
635  return false;
636  }
637 
638  bool isExpr() const {
639  return Kind == Expression;
640  }
641 
642  bool isSoppBrTarget() const {
643  return isExpr() || isImm();
644  }
645 
646  bool isSWaitCnt() const;
647  bool isHwreg() const;
648  bool isSendMsg() const;
649  bool isSwizzle() const;
650  bool isSMRDOffset8() const;
651  bool isSMRDOffset20() const;
652  bool isSMRDLiteralOffset() const;
653  bool isDPP8() const;
654  bool isDPPCtrl() const;
655  bool isBLGP() const;
656  bool isCBSZ() const;
657  bool isABID() const;
658  bool isGPRIdxMode() const;
659  bool isS16Imm() const;
660  bool isU16Imm() const;
661  bool isEndpgm() const;
662 
663  StringRef getExpressionAsToken() const {
664  assert(isExpr());
665  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666  return S->getSymbol().getName();
667  }
668 
669  StringRef getToken() const {
670  assert(isToken());
671 
672  if (Kind == Expression)
673  return getExpressionAsToken();
674 
675  return StringRef(Tok.Data, Tok.Length);
676  }
677 
678  int64_t getImm() const {
679  assert(isImm());
680  return Imm.Val;
681  }
682 
683  ImmTy getImmTy() const {
684  assert(isImm());
685  return Imm.Type;
686  }
687 
688  unsigned getReg() const override {
689  assert(isRegKind());
690  return Reg.RegNo;
691  }
692 
693  SMLoc getStartLoc() const override {
694  return StartLoc;
695  }
696 
697  SMLoc getEndLoc() const override {
698  return EndLoc;
699  }
700 
701  SMRange getLocRange() const {
702  return SMRange(StartLoc, EndLoc);
703  }
704 
705  Modifiers getModifiers() const {
706  assert(isRegKind() || isImmTy(ImmTyNone));
707  return isRegKind() ? Reg.Mods : Imm.Mods;
708  }
709 
710  void setModifiers(Modifiers Mods) {
711  assert(isRegKind() || isImmTy(ImmTyNone));
712  if (isRegKind())
713  Reg.Mods = Mods;
714  else
715  Imm.Mods = Mods;
716  }
717 
718  bool hasModifiers() const {
719  return getModifiers().hasModifiers();
720  }
721 
722  bool hasFPModifiers() const {
723  return getModifiers().hasFPModifiers();
724  }
725 
726  bool hasIntModifiers() const {
727  return getModifiers().hasIntModifiers();
728  }
729 
730  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
731 
732  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
733 
734  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
735 
736  template <unsigned Bitwidth>
737  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
738 
739  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740  addKImmFPOperands<16>(Inst, N);
741  }
742 
743  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744  addKImmFPOperands<32>(Inst, N);
745  }
746 
747  void addRegOperands(MCInst &Inst, unsigned N) const;
748 
749  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750  addRegOperands(Inst, N);
751  }
752 
753  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754  if (isRegKind())
755  addRegOperands(Inst, N);
756  else if (isExpr())
757  Inst.addOperand(MCOperand::createExpr(Expr));
758  else
759  addImmOperands(Inst, N);
760  }
761 
762  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763  Modifiers Mods = getModifiers();
764  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765  if (isRegKind()) {
766  addRegOperands(Inst, N);
767  } else {
768  addImmOperands(Inst, N, false);
769  }
770  }
771 
772  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773  assert(!hasIntModifiers());
774  addRegOrImmWithInputModsOperands(Inst, N);
775  }
776 
777  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778  assert(!hasFPModifiers());
779  addRegOrImmWithInputModsOperands(Inst, N);
780  }
781 
782  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783  Modifiers Mods = getModifiers();
784  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785  assert(isRegKind());
786  addRegOperands(Inst, N);
787  }
788 
789  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790  assert(!hasIntModifiers());
791  addRegWithInputModsOperands(Inst, N);
792  }
793 
794  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795  assert(!hasFPModifiers());
796  addRegWithInputModsOperands(Inst, N);
797  }
798 
799  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800  if (isImm())
801  addImmOperands(Inst, N);
802  else {
803  assert(isExpr());
804  Inst.addOperand(MCOperand::createExpr(Expr));
805  }
806  }
807 
808  static void printImmTy(raw_ostream& OS, ImmTy Type) {
809  switch (Type) {
810  case ImmTyNone: OS << "None"; break;
811  case ImmTyGDS: OS << "GDS"; break;
812  case ImmTyLDS: OS << "LDS"; break;
813  case ImmTyOffen: OS << "Offen"; break;
814  case ImmTyIdxen: OS << "Idxen"; break;
815  case ImmTyAddr64: OS << "Addr64"; break;
816  case ImmTyOffset: OS << "Offset"; break;
817  case ImmTyInstOffset: OS << "InstOffset"; break;
818  case ImmTyOffset0: OS << "Offset0"; break;
819  case ImmTyOffset1: OS << "Offset1"; break;
820  case ImmTyDLC: OS << "DLC"; break;
821  case ImmTyGLC: OS << "GLC"; break;
822  case ImmTySLC: OS << "SLC"; break;
823  case ImmTyTFE: OS << "TFE"; break;
824  case ImmTyD16: OS << "D16"; break;
825  case ImmTyFORMAT: OS << "FORMAT"; break;
826  case ImmTyClampSI: OS << "ClampSI"; break;
827  case ImmTyOModSI: OS << "OModSI"; break;
828  case ImmTyDPP8: OS << "DPP8"; break;
829  case ImmTyDppCtrl: OS << "DppCtrl"; break;
830  case ImmTyDppRowMask: OS << "DppRowMask"; break;
831  case ImmTyDppBankMask: OS << "DppBankMask"; break;
832  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833  case ImmTyDppFi: OS << "FI"; break;
834  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838  case ImmTyDMask: OS << "DMask"; break;
839  case ImmTyDim: OS << "Dim"; break;
840  case ImmTyUNorm: OS << "UNorm"; break;
841  case ImmTyDA: OS << "DA"; break;
842  case ImmTyR128A16: OS << "R128A16"; break;
843  case ImmTyLWE: OS << "LWE"; break;
844  case ImmTyOff: OS << "Off"; break;
845  case ImmTyExpTgt: OS << "ExpTgt"; break;
846  case ImmTyExpCompr: OS << "ExpCompr"; break;
847  case ImmTyExpVM: OS << "ExpVM"; break;
848  case ImmTyHwreg: OS << "Hwreg"; break;
849  case ImmTySendMsg: OS << "SendMsg"; break;
850  case ImmTyInterpSlot: OS << "InterpSlot"; break;
851  case ImmTyInterpAttr: OS << "InterpAttr"; break;
852  case ImmTyAttrChan: OS << "AttrChan"; break;
853  case ImmTyOpSel: OS << "OpSel"; break;
854  case ImmTyOpSelHi: OS << "OpSelHi"; break;
855  case ImmTyNegLo: OS << "NegLo"; break;
856  case ImmTyNegHi: OS << "NegHi"; break;
857  case ImmTySwizzle: OS << "Swizzle"; break;
858  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859  case ImmTyHigh: OS << "High"; break;
860  case ImmTyBLGP: OS << "BLGP"; break;
861  case ImmTyCBSZ: OS << "CBSZ"; break;
862  case ImmTyABID: OS << "ABID"; break;
863  case ImmTyEndpgm: OS << "Endpgm"; break;
864  }
865  }
866 
867  void print(raw_ostream &OS) const override {
868  switch (Kind) {
869  case Register:
870  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871  break;
872  case Immediate:
873  OS << '<' << getImm();
874  if (getImmTy() != ImmTyNone) {
875  OS << " type: "; printImmTy(OS, getImmTy());
876  }
877  OS << " mods: " << Imm.Mods << '>';
878  break;
879  case Token:
880  OS << '\'' << getToken() << '\'';
881  break;
882  case Expression:
883  OS << "<expr " << *Expr << '>';
884  break;
885  }
886  }
887 
888  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889  int64_t Val, SMLoc Loc,
890  ImmTy Type = ImmTyNone,
891  bool IsFPImm = false) {
892  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893  Op->Imm.Val = Val;
894  Op->Imm.IsFPImm = IsFPImm;
895  Op->Imm.Type = Type;
896  Op->Imm.Mods = Modifiers();
897  Op->StartLoc = Loc;
898  Op->EndLoc = Loc;
899  return Op;
900  }
901 
902  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903  StringRef Str, SMLoc Loc,
904  bool HasExplicitEncodingSize = true) {
905  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906  Res->Tok.Data = Str.data();
907  Res->Tok.Length = Str.size();
908  Res->StartLoc = Loc;
909  Res->EndLoc = Loc;
910  return Res;
911  }
912 
913  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914  unsigned RegNo, SMLoc S,
915  SMLoc E) {
916  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917  Op->Reg.RegNo = RegNo;
918  Op->Reg.Mods = Modifiers();
919  Op->StartLoc = S;
920  Op->EndLoc = E;
921  return Op;
922  }
923 
924  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925  const class MCExpr *Expr, SMLoc S) {
926  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927  Op->Expr = Expr;
928  Op->StartLoc = S;
929  Op->EndLoc = S;
930  return Op;
931  }
932 };
933 
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936  return OS;
937 }
938 
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
942 
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947  int SgprIndexUnusedMin = -1;
948  int VgprIndexUnusedMin = -1;
949  MCContext *Ctx = nullptr;
950 
951  void usesSgprAt(int i) {
952  if (i >= SgprIndexUnusedMin) {
953  SgprIndexUnusedMin = ++i;
954  if (Ctx) {
955  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
957  }
958  }
959  }
960 
961  void usesVgprAt(int i) {
962  if (i >= VgprIndexUnusedMin) {
963  VgprIndexUnusedMin = ++i;
964  if (Ctx) {
965  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966  Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
967  }
968  }
969  }
970 
971 public:
972  KernelScopeInfo() = default;
973 
974  void initialize(MCContext &Context) {
975  Ctx = &Context;
976  usesSgprAt(SgprIndexUnusedMin = -1);
977  usesVgprAt(VgprIndexUnusedMin = -1);
978  }
979 
980  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981  switch (RegKind) {
982  case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983  case IS_AGPR: // fall through
984  case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985  default: break;
986  }
987  }
988 };
989 
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991  MCAsmParser &Parser;
992 
993  // Number of extra operands parsed after the first optional operand.
994  // This may be necessary to skip hardcoded mandatory operands.
995  static const unsigned MAX_OPR_LOOKAHEAD = 8;
996 
997  unsigned ForcedEncodingSize = 0;
998  bool ForcedDPP = false;
999  bool ForcedSDWA = false;
1000  KernelScopeInfo KernelScope;
1001 
1002  /// @name Auto-generated Match Functions
1003  /// {
1004 
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007 
1008  /// }
1009 
1010 private:
1011  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012  bool OutOfRangeError(SMRange Range);
1013  /// Calculate VGPR/SGPR blocks required for given target, reserved
1014  /// registers, and user-specified NextFreeXGPR values.
1015  ///
1016  /// \param Features [in] Target features, used for bug corrections.
1017  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021  /// descriptor field, if valid.
1022  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026  /// \param VGPRBlocks [out] Result VGPR block count.
1027  /// \param SGPRBlocks [out] Result SGPR block count.
1028  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029  bool FlatScrUsed, bool XNACKUsed,
1030  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031  SMRange VGPRRange, unsigned NextFreeSGPR,
1032  SMRange SGPRRange, unsigned &VGPRBlocks,
1033  unsigned &SGPRBlocks);
1034  bool ParseDirectiveAMDGCNTarget();
1035  bool ParseDirectiveAMDHSAKernel();
1036  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037  bool ParseDirectiveHSACodeObjectVersion();
1038  bool ParseDirectiveHSACodeObjectISA();
1039  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040  bool ParseDirectiveAMDKernelCodeT();
1041  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042  bool ParseDirectiveAMDGPUHsaKernel();
1043 
1044  bool ParseDirectiveISAVersion();
1045  bool ParseDirectiveHSAMetadata();
1046  bool ParseDirectivePALMetadataBegin();
1047  bool ParseDirectivePALMetadata();
1048  bool ParseDirectiveAMDGPULDS();
1049 
1050  /// Common code to parse out a block of text (typically YAML) between start and
1051  /// end directives.
1052  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053  const char *AssemblerDirectiveEnd,
1054  std::string &CollectString);
1055 
1056  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057  RegisterKind RegKind, unsigned Reg1,
1058  unsigned RegNum);
1059  bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060  unsigned& RegNum, unsigned& RegWidth,
1061  unsigned *DwordRegIndex);
1062  bool isRegister();
1063  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065  void initializeGprCountSymbol(RegisterKind RegKind);
1066  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067  unsigned RegWidth);
1068  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069  bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071  bool IsGdsHardcoded);
1072 
1073 public:
1074  enum AMDGPUMatchResultTy {
1075  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1076  };
1077  enum OperandMode {
1078  OperandMode_Default,
1079  OperandMode_NSA,
1080  };
1081 
1082  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1083 
1084  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085  const MCInstrInfo &MII,
1086  const MCTargetOptions &Options)
1087  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088  MCAsmParserExtension::Initialize(Parser);
1089 
1090  if (getFeatureBits().none()) {
1091  // Set default features.
1092  copySTI().ToggleFeature("southern-islands");
1093  }
1094 
1095  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1096 
1097  {
1098  // TODO: make those pre-defined variables read-only.
1099  // Currently there is none suitable machinery in the core llvm-mc for this.
1100  // MCSymbol::isRedefinable is intended for another purpose, and
1101  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103  MCContext &Ctx = getContext();
1104  if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105  MCSymbol *Sym =
1106  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112  } else {
1113  MCSymbol *Sym =
1114  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1120  }
1121  if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122  initializeGprCountSymbol(IS_VGPR);
1123  initializeGprCountSymbol(IS_SGPR);
1124  } else
1125  KernelScope.initialize(getContext());
1126  }
1127  }
1128 
1129  bool hasXNACK() const {
1130  return AMDGPU::hasXNACK(getSTI());
1131  }
1132 
1133  bool hasMIMG_R128() const {
1134  return AMDGPU::hasMIMG_R128(getSTI());
1135  }
1136 
1137  bool hasPackedD16() const {
1138  return AMDGPU::hasPackedD16(getSTI());
1139  }
1140 
1141  bool isSI() const {
1142  return AMDGPU::isSI(getSTI());
1143  }
1144 
1145  bool isCI() const {
1146  return AMDGPU::isCI(getSTI());
1147  }
1148 
1149  bool isVI() const {
1150  return AMDGPU::isVI(getSTI());
1151  }
1152 
1153  bool isGFX9() const {
1154  return AMDGPU::isGFX9(getSTI());
1155  }
1156 
1157  bool isGFX10() const {
1158  return AMDGPU::isGFX10(getSTI());
1159  }
1160 
1161  bool hasInv2PiInlineImm() const {
1162  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1163  }
1164 
1165  bool hasFlatOffsets() const {
1166  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1167  }
1168 
1169  bool hasSGPR102_SGPR103() const {
1170  return !isVI() && !isGFX9();
1171  }
1172 
1173  bool hasSGPR104_SGPR105() const {
1174  return isGFX10();
1175  }
1176 
1177  bool hasIntClamp() const {
1178  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1179  }
1180 
1181  AMDGPUTargetStreamer &getTargetStreamer() {
1182  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183  return static_cast<AMDGPUTargetStreamer &>(TS);
1184  }
1185 
1186  const MCRegisterInfo *getMRI() const {
1187  // We need this const_cast because for some reason getContext() is not const
1188  // in MCAsmParser.
1189  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1190  }
1191 
1192  const MCInstrInfo *getMII() const {
1193  return &MII;
1194  }
1195 
1196  const FeatureBitset &getFeatureBits() const {
1197  return getSTI().getFeatureBits();
1198  }
1199 
1200  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1203 
1204  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206  bool isForcedDPP() const { return ForcedDPP; }
1207  bool isForcedSDWA() const { return ForcedSDWA; }
1208  ArrayRef<unsigned> getMatchedVariants() const;
1209 
1210  std::unique_ptr<AMDGPUOperand> parseRegister();
1211  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214  unsigned Kind) override;
1215  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216  OperandVector &Operands, MCStreamer &Out,
1217  uint64_t &ErrorInfo,
1218  bool MatchingInlineAsm) override;
1219  bool ParseDirective(AsmToken DirectiveID) override;
1220  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221  OperandMode Mode = OperandMode_Default);
1222  StringRef parseMnemonicSuffix(StringRef Name);
1223  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224  SMLoc NameLoc, OperandVector &Operands) override;
1225  //bool ProcessInstruction(MCInst &Inst);
1226 
1227  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1228 
1229  OperandMatchResultTy
1230  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232  bool (*ConvertResult)(int64_t &) = nullptr);
1233 
1234  OperandMatchResultTy
1235  parseOperandArrayWithPrefix(const char *Prefix,
1236  OperandVector &Operands,
1237  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238  bool (*ConvertResult)(int64_t&) = nullptr);
1239 
1240  OperandMatchResultTy
1241  parseNamedBit(const char *Name, OperandVector &Operands,
1242  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244  StringRef &Value);
1245 
1246  bool isModifier();
1247  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251  bool parseSP3NegModifier();
1252  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253  OperandMatchResultTy parseReg(OperandVector &Operands);
1254  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260  OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1261 
1262  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1266 
1267  bool parseCnt(int64_t &IntVal);
1268  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1270 
1271 private:
1272  struct OperandInfoTy {
1273  int64_t Id;
1274  bool IsSymbolic = false;
1275  bool IsDefined = false;
1276 
1277  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1278  };
1279 
1280  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281  bool validateSendMsg(const OperandInfoTy &Msg,
1282  const OperandInfoTy &Op,
1283  const OperandInfoTy &Stream,
1284  const SMLoc Loc);
1285 
1286  bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287  bool validateHwreg(const OperandInfoTy &HwReg,
1288  const int64_t Offset,
1289  const int64_t Width,
1290  const SMLoc Loc);
1291 
1292  void errorExpTgt();
1293  OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1295 
1296  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298  bool validateSOPLiteral(const MCInst &Inst) const;
1299  bool validateConstantBusLimitations(const MCInst &Inst);
1300  bool validateEarlyClobberLimitations(const MCInst &Inst);
1301  bool validateIntClampSupported(const MCInst &Inst);
1302  bool validateMIMGAtomicDMask(const MCInst &Inst);
1303  bool validateMIMGGatherDMask(const MCInst &Inst);
1304  bool validateMIMGDataSize(const MCInst &Inst);
1305  bool validateMIMGAddrSize(const MCInst &Inst);
1306  bool validateMIMGD16(const MCInst &Inst);
1307  bool validateMIMGDim(const MCInst &Inst);
1308  bool validateLdsDirect(const MCInst &Inst);
1309  bool validateOpSel(const MCInst &Inst);
1310  bool validateVccOperand(unsigned Reg) const;
1311  bool validateVOP3Literal(const MCInst &Inst) const;
1312  unsigned getConstantBusLimit(unsigned Opcode) const;
1313  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1314  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1315  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1316 
1317  bool isId(const StringRef Id) const;
1318  bool isId(const AsmToken &Token, const StringRef Id) const;
1319  bool isToken(const AsmToken::TokenKind Kind) const;
1320  bool trySkipId(const StringRef Id);
1321  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1322  bool trySkipToken(const AsmToken::TokenKind Kind);
1323  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1324  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1325  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1326  AsmToken::TokenKind getTokenKind() const;
1327  bool parseExpr(int64_t &Imm);
1328  bool parseExpr(OperandVector &Operands);
1329  StringRef getTokenStr() const;
1330  AsmToken peekToken();
1331  AsmToken getToken() const;
1332  SMLoc getLoc() const;
1333  void lex();
1334 
1335 public:
1336  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1337  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1338 
1339  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1340  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1341  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1342  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1343  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1344  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1345 
1346  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1347  const unsigned MinVal,
1348  const unsigned MaxVal,
1349  const StringRef ErrMsg);
1350  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1351  bool parseSwizzleOffset(int64_t &Imm);
1352  bool parseSwizzleMacro(int64_t &Imm);
1353  bool parseSwizzleQuadPerm(int64_t &Imm);
1354  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1355  bool parseSwizzleBroadcast(int64_t &Imm);
1356  bool parseSwizzleSwap(int64_t &Imm);
1357  bool parseSwizzleReverse(int64_t &Imm);
1358 
1359  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1360  int64_t parseGPRIdxMacro();
1361 
1362  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1363  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1364  void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1365  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1366  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1367 
1368  AMDGPUOperand::Ptr defaultDLC() const;
1369  AMDGPUOperand::Ptr defaultGLC() const;
1370  AMDGPUOperand::Ptr defaultSLC() const;
1371 
1372  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1373  AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1374  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1375  AMDGPUOperand::Ptr defaultFlatOffset() const;
1376 
1377  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1378 
1379  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1380  OptionalImmIndexMap &OptionalIdx);
1381  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1382  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1383  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1384 
1385  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1386 
1387  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1388  bool IsAtomic = false);
1389  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1390 
1391  OperandMatchResultTy parseDim(OperandVector &Operands);
1392  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1393  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1394  AMDGPUOperand::Ptr defaultRowMask() const;
1395  AMDGPUOperand::Ptr defaultBankMask() const;
1396  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1397  AMDGPUOperand::Ptr defaultFI() const;
1398  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1399  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1400 
1401  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1402  AMDGPUOperand::ImmTy Type);
1403  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1404  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1405  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1406  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1407  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1408  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1409  uint64_t BasicInstType, bool skipVcc = false);
1410 
1411  AMDGPUOperand::Ptr defaultBLGP() const;
1412  AMDGPUOperand::Ptr defaultCBSZ() const;
1413  AMDGPUOperand::Ptr defaultABID() const;
1414 
1415  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1416  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1417 };
1418 
1419 struct OptionalOperand {
1420  const char *Name;
1421  AMDGPUOperand::ImmTy Type;
1422  bool IsBit;
1423  bool (*ConvertResult)(int64_t&);
1424 };
1425 
1426 } // end anonymous namespace
1427 
1428 // May be called with integer type with equivalent bitwidth.
1429 static const fltSemantics *getFltSemantics(unsigned Size) {
1430  switch (Size) {
1431  case 4:
1432  return &APFloat::IEEEsingle();
1433  case 8:
1434  return &APFloat::IEEEdouble();
1435  case 2:
1436  return &APFloat::IEEEhalf();
1437  default:
1438  llvm_unreachable("unsupported fp type");
1439  }
1440 }
1441 
1442 static const fltSemantics *getFltSemantics(MVT VT) {
1443  return getFltSemantics(VT.getSizeInBits() / 8);
1444 }
1445 
1446 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1447  switch (OperandType) {
1448  case AMDGPU::OPERAND_REG_IMM_INT32:
1449  case AMDGPU::OPERAND_REG_IMM_FP32:
1450  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1451  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1452  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1453  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1454  return &APFloat::IEEEsingle();
1455  case AMDGPU::OPERAND_REG_IMM_INT64:
1456  case AMDGPU::OPERAND_REG_IMM_FP64:
1457  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1458  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1459  return &APFloat::IEEEdouble();
1460  case AMDGPU::OPERAND_REG_IMM_INT16:
1461  case AMDGPU::OPERAND_REG_IMM_FP16:
1462  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1463  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1464  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1465  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1466  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1467  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1468  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1469  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1470  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1471  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1472  return &APFloat::IEEEhalf();
1473  default:
1474  llvm_unreachable("unsupported fp type");
1475  }
1476 }
1477 
1478 //===----------------------------------------------------------------------===//
1479 // Operand
1480 //===----------------------------------------------------------------------===//
1481 
1482 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1483  bool Lost;
1484 
1485  // Convert literal to single precision
1486  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1487  APFloat::rmNearestTiesToEven,
1488  &Lost);
1489  // We allow precision lost but not overflow or underflow
1490  if (Status != APFloat::opOK &&
1491  Lost &&
1492  ((Status & APFloat::opOverflow) != 0 ||
1493  (Status & APFloat::opUnderflow) != 0)) {
1494  return false;
1495  }
1496 
1497  return true;
1498 }
1499 
1500 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1501  return isUIntN(Size, Val) || isIntN(Size, Val);
1502 }
1503 
1504 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1505 
1506  // This is a hack to enable named inline values like
1507  // shared_base with both 32-bit and 64-bit operands.
1508  // Note that these values are defined as
1509  // 32-bit operands only.
1510  if (isInlineValue()) {
1511  return true;
1512  }
1513 
1514  if (!isImmTy(ImmTyNone)) {
1515  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1516  return false;
1517  }
1518  // TODO: We should avoid using host float here. It would be better to
1519  // check the float bit values which is what a few other places do.
1520  // We've had bot failures before due to weird NaN support on mips hosts.
1521 
1522  APInt Literal(64, Imm.Val);
1523 
1524  if (Imm.IsFPImm) { // We got fp literal token
1525  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1526  return AMDGPU::isInlinableLiteral64(Imm.Val,
1527  AsmParser->hasInv2PiInlineImm());
1528  }
1529 
1530  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1531  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1532  return false;
1533 
1534  if (type.getScalarSizeInBits() == 16) {
1536  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1537  AsmParser->hasInv2PiInlineImm());
1538  }
1539 
1540  // Check if single precision literal is inlinable
1542  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1543  AsmParser->hasInv2PiInlineImm());
1544  }
1545 
1546  // We got int literal token.
1547  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1548  return AMDGPU::isInlinableLiteral64(Imm.Val,
1549  AsmParser->hasInv2PiInlineImm());
1550  }
1551 
1552  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1553  return false;
1554  }
1555 
1556  if (type.getScalarSizeInBits() == 16) {
1558  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1559  AsmParser->hasInv2PiInlineImm());
1560  }
1561 
1563  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1564  AsmParser->hasInv2PiInlineImm());
1565 }
1566 
1567 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1568  // Check that this immediate can be added as literal
1569  if (!isImmTy(ImmTyNone)) {
1570  return false;
1571  }
1572 
1573  if (!Imm.IsFPImm) {
1574  // We got int literal token.
1575 
1576  if (type == MVT::f64 && hasFPModifiers()) {
1577  // Cannot apply fp modifiers to int literals preserving the same semantics
1578  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1579  // disable these cases.
1580  return false;
1581  }
1582 
1583  unsigned Size = type.getSizeInBits();
1584  if (Size == 64)
1585  Size = 32;
1586 
1587  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1588  // types.
1589  return isSafeTruncation(Imm.Val, Size);
1590  }
1591 
1592  // We got fp literal token
1593  if (type == MVT::f64) { // Expected 64-bit fp operand
1594  // We would set low 64-bits of literal to zeroes but we accept this literals
1595  return true;
1596  }
1597 
1598  if (type == MVT::i64) { // Expected 64-bit int operand
1599  // We don't allow fp literals in 64-bit integer instructions. It is
1600  // unclear how we should encode them.
1601  return false;
1602  }
1603 
1604  // We allow fp literals with f16x2 operands assuming that the specified
1605  // literal goes into the lower half and the upper half is zero. We also
1606  // require that the literal may be losslesly converted to f16.
1607  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1608  (type == MVT::v2i16)? MVT::i16 : type;
1609 
1610  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1611  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1612 }
1613 
1614 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1615  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1616 }
1617 
1618 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1619  if (AsmParser->isVI())
1620  return isVReg32();
1621  else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1622  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1623  else
1624  return false;
1625 }
1626 
1627 bool AMDGPUOperand::isSDWAFP16Operand() const {
1628  return isSDWAOperand(MVT::f16);
1629 }
1630 
1631 bool AMDGPUOperand::isSDWAFP32Operand() const {
1632  return isSDWAOperand(MVT::f32);
1633 }
1634 
1635 bool AMDGPUOperand::isSDWAInt16Operand() const {
1636  return isSDWAOperand(MVT::i16);
1637 }
1638 
1639 bool AMDGPUOperand::isSDWAInt32Operand() const {
1640  return isSDWAOperand(MVT::i32);
1641 }
1642 
1643 bool AMDGPUOperand::isBoolReg() const {
1644  return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1645  (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1646 }
1647 
1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1649 {
1650  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1651  assert(Size == 2 || Size == 4 || Size == 8);
1652 
1653  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1654 
1655  if (Imm.Mods.Abs) {
1656  Val &= ~FpSignMask;
1657  }
1658  if (Imm.Mods.Neg) {
1659  Val ^= FpSignMask;
1660  }
1661 
1662  return Val;
1663 }
1664 
1665 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1666  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1667  Inst.getNumOperands())) {
1668  addLiteralImmOperand(Inst, Imm.Val,
1669  ApplyModifiers &
1670  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1671  } else {
1672  assert(!isImmTy(ImmTyNone) || !hasModifiers());
1673  Inst.addOperand(MCOperand::createImm(Imm.Val));
1674  }
1675 }
1676 
1677 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1678  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1679  auto OpNum = Inst.getNumOperands();
1680  // Check that this operand accepts literals
1681  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1682 
1683  if (ApplyModifiers) {
1684  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1685  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1686  Val = applyInputFPModifiers(Val, Size);
1687  }
1688 
1689  APInt Literal(64, Val);
1690  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1691 
1692  if (Imm.IsFPImm) { // We got fp literal token
1693  switch (OpTy) {
1698  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1699  AsmParser->hasInv2PiInlineImm())) {
1700  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1701  return;
1702  }
1703 
1704  // Non-inlineable
1705  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1706  // For fp operands we check if low 32 bits are zeros
1707  if (Literal.getLoBits(32) != 0) {
1708  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1709  "Can't encode literal as exact 64-bit floating-point operand. "
1710  "Low 32-bits will be set to zero");
1711  }
1712 
1713  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1714  return;
1715  }
1716 
1717  // We don't allow fp literals in 64-bit integer instructions. It is
1718  // unclear how we should encode them. This case should be checked earlier
1719  // in predicate methods (isLiteralImm())
1720  llvm_unreachable("fp literal in 64-bit integer instruction.");
1721 
1740  bool lost;
1741  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1742  // Convert literal to single precision
1743  FPLiteral.convert(*getOpFltSemantics(OpTy),
1745  // We allow precision lost but not overflow or underflow. This should be
1746  // checked earlier in isLiteralImm()
1747 
1748  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1749  Inst.addOperand(MCOperand::createImm(ImmVal));
1750  return;
1751  }
1752  default:
1753  llvm_unreachable("invalid operand size");
1754  }
1755 
1756  return;
1757  }
1758 
1759  // We got int literal token.
1760  // Only sign extend inline immediates.
1761  switch (OpTy) {
1770  if (isSafeTruncation(Val, 32) &&
1771  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1772  AsmParser->hasInv2PiInlineImm())) {
1773  Inst.addOperand(MCOperand::createImm(Val));
1774  return;
1775  }
1776 
1777  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1778  return;
1779 
1784  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1785  Inst.addOperand(MCOperand::createImm(Val));
1786  return;
1787  }
1788 
1790  return;
1791 
1798  if (isSafeTruncation(Val, 16) &&
1799  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1800  AsmParser->hasInv2PiInlineImm())) {
1801  Inst.addOperand(MCOperand::createImm(Val));
1802  return;
1803  }
1804 
1805  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1806  return;
1807 
1812  assert(isSafeTruncation(Val, 16));
1813  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1814  AsmParser->hasInv2PiInlineImm()));
1815 
1816  Inst.addOperand(MCOperand::createImm(Val));
1817  return;
1818  }
1819  default:
1820  llvm_unreachable("invalid operand size");
1821  }
1822 }
1823 
1824 template <unsigned Bitwidth>
1825 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1826  APInt Literal(64, Imm.Val);
1827 
1828  if (!Imm.IsFPImm) {
1829  // We got int literal token.
1830  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1831  return;
1832  }
1833 
1834  bool Lost;
1835  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1836  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1838  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1839 }
1840 
1841 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1842  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1843 }
1844 
1845 static bool isInlineValue(unsigned Reg) {
1846  switch (Reg) {
1847  case AMDGPU::SRC_SHARED_BASE:
1848  case AMDGPU::SRC_SHARED_LIMIT:
1849  case AMDGPU::SRC_PRIVATE_BASE:
1850  case AMDGPU::SRC_PRIVATE_LIMIT:
1851  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1852  return true;
1853  case AMDGPU::SRC_VCCZ:
1854  case AMDGPU::SRC_EXECZ:
1855  case AMDGPU::SRC_SCC:
1856  return true;
1857  case AMDGPU::SGPR_NULL:
1858  return true;
1859  default:
1860  return false;
1861  }
1862 }
1863 
1864 bool AMDGPUOperand::isInlineValue() const {
1865  return isRegKind() && ::isInlineValue(getReg());
1866 }
1867 
1868 //===----------------------------------------------------------------------===//
1869 // AsmParser
1870 //===----------------------------------------------------------------------===//
1871 
1872 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1873  if (Is == IS_VGPR) {
1874  switch (RegWidth) {
1875  default: return -1;
1876  case 1: return AMDGPU::VGPR_32RegClassID;
1877  case 2: return AMDGPU::VReg_64RegClassID;
1878  case 3: return AMDGPU::VReg_96RegClassID;
1879  case 4: return AMDGPU::VReg_128RegClassID;
1880  case 5: return AMDGPU::VReg_160RegClassID;
1881  case 8: return AMDGPU::VReg_256RegClassID;
1882  case 16: return AMDGPU::VReg_512RegClassID;
1883  case 32: return AMDGPU::VReg_1024RegClassID;
1884  }
1885  } else if (Is == IS_TTMP) {
1886  switch (RegWidth) {
1887  default: return -1;
1888  case 1: return AMDGPU::TTMP_32RegClassID;
1889  case 2: return AMDGPU::TTMP_64RegClassID;
1890  case 4: return AMDGPU::TTMP_128RegClassID;
1891  case 8: return AMDGPU::TTMP_256RegClassID;
1892  case 16: return AMDGPU::TTMP_512RegClassID;
1893  }
1894  } else if (Is == IS_SGPR) {
1895  switch (RegWidth) {
1896  default: return -1;
1897  case 1: return AMDGPU::SGPR_32RegClassID;
1898  case 2: return AMDGPU::SGPR_64RegClassID;
1899  case 4: return AMDGPU::SGPR_128RegClassID;
1900  case 8: return AMDGPU::SGPR_256RegClassID;
1901  case 16: return AMDGPU::SGPR_512RegClassID;
1902  }
1903  } else if (Is == IS_AGPR) {
1904  switch (RegWidth) {
1905  default: return -1;
1906  case 1: return AMDGPU::AGPR_32RegClassID;
1907  case 2: return AMDGPU::AReg_64RegClassID;
1908  case 4: return AMDGPU::AReg_128RegClassID;
1909  case 16: return AMDGPU::AReg_512RegClassID;
1910  case 32: return AMDGPU::AReg_1024RegClassID;
1911  }
1912  }
1913  return -1;
1914 }
1915 
1916 static unsigned getSpecialRegForName(StringRef RegName) {
1917  return StringSwitch<unsigned>(RegName)
1918  .Case("exec", AMDGPU::EXEC)
1919  .Case("vcc", AMDGPU::VCC)
1920  .Case("flat_scratch", AMDGPU::FLAT_SCR)
1921  .Case("xnack_mask", AMDGPU::XNACK_MASK)
1922  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1923  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1924  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1925  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1926  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1927  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1928  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1929  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1930  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1931  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1932  .Case("lds_direct", AMDGPU::LDS_DIRECT)
1933  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1934  .Case("m0", AMDGPU::M0)
1935  .Case("vccz", AMDGPU::SRC_VCCZ)
1936  .Case("src_vccz", AMDGPU::SRC_VCCZ)
1937  .Case("execz", AMDGPU::SRC_EXECZ)
1938  .Case("src_execz", AMDGPU::SRC_EXECZ)
1939  .Case("scc", AMDGPU::SRC_SCC)
1940  .Case("src_scc", AMDGPU::SRC_SCC)
1941  .Case("tba", AMDGPU::TBA)
1942  .Case("tma", AMDGPU::TMA)
1943  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1944  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1945  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1946  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1947  .Case("vcc_lo", AMDGPU::VCC_LO)
1948  .Case("vcc_hi", AMDGPU::VCC_HI)
1949  .Case("exec_lo", AMDGPU::EXEC_LO)
1950  .Case("exec_hi", AMDGPU::EXEC_HI)
1951  .Case("tma_lo", AMDGPU::TMA_LO)
1952  .Case("tma_hi", AMDGPU::TMA_HI)
1953  .Case("tba_lo", AMDGPU::TBA_LO)
1954  .Case("tba_hi", AMDGPU::TBA_HI)
1955  .Case("null", AMDGPU::SGPR_NULL)
1956  .Default(0);
1957 }
1958 
1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1960  SMLoc &EndLoc) {
1961  auto R = parseRegister();
1962  if (!R) return true;
1963  assert(R->isReg());
1964  RegNo = R->getReg();
1965  StartLoc = R->getStartLoc();
1966  EndLoc = R->getEndLoc();
1967  return false;
1968 }
1969 
1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1971  RegisterKind RegKind, unsigned Reg1,
1972  unsigned RegNum) {
1973  switch (RegKind) {
1974  case IS_SPECIAL:
1975  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1976  Reg = AMDGPU::EXEC;
1977  RegWidth = 2;
1978  return true;
1979  }
1980  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1981  Reg = AMDGPU::FLAT_SCR;
1982  RegWidth = 2;
1983  return true;
1984  }
1985  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1986  Reg = AMDGPU::XNACK_MASK;
1987  RegWidth = 2;
1988  return true;
1989  }
1990  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1991  Reg = AMDGPU::VCC;
1992  RegWidth = 2;
1993  return true;
1994  }
1995  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1996  Reg = AMDGPU::TBA;
1997  RegWidth = 2;
1998  return true;
1999  }
2000  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2001  Reg = AMDGPU::TMA;
2002  RegWidth = 2;
2003  return true;
2004  }
2005  return false;
2006  case IS_VGPR:
2007  case IS_SGPR:
2008  case IS_AGPR:
2009  case IS_TTMP:
2010  if (Reg1 != Reg + RegWidth) {
2011  return false;
2012  }
2013  RegWidth++;
2014  return true;
2015  default:
2016  llvm_unreachable("unexpected register kind");
2017  }
2018 }
2019 
2020 static const StringRef Registers[] = {
2021  { "v" },
2022  { "s" },
2023  { "ttmp" },
2024  { "acc" },
2025  { "a" },
2026 };
2027 
2028 bool
2029 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2030  const AsmToken &NextToken) const {
2031 
2032  // A list of consecutive registers: [s0,s1,s2,s3]
2033  if (Token.is(AsmToken::LBrac))
2034  return true;
2035 
2036  if (!Token.is(AsmToken::Identifier))
2037  return false;
2038 
2039  // A single register like s0 or a range of registers like s[0:1]
2040 
2041  StringRef RegName = Token.getString();
2042 
2043  for (StringRef Reg : Registers) {
2044  if (RegName.startswith(Reg)) {
2045  if (Reg.size() < RegName.size()) {
2046  unsigned RegNum;
2047  // A single register with an index: rXX
2048  if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2049  return true;
2050  } else {
2051  // A range of registers: r[XX:YY].
2052  if (NextToken.is(AsmToken::LBrac))
2053  return true;
2054  }
2055  }
2056  }
2057 
2058  return getSpecialRegForName(RegName);
2059 }
2060 
2061 bool
2062 AMDGPUAsmParser::isRegister()
2063 {
2064  return isRegister(getToken(), peekToken());
2065 }
2066 
2067 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2068  unsigned &RegNum, unsigned &RegWidth,
2069  unsigned *DwordRegIndex) {
2070  if (DwordRegIndex) { *DwordRegIndex = 0; }
2071  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2072  if (getLexer().is(AsmToken::Identifier)) {
2073  StringRef RegName = Parser.getTok().getString();
2074  if ((Reg = getSpecialRegForName(RegName))) {
2075  Parser.Lex();
2076  RegKind = IS_SPECIAL;
2077  } else {
2078  unsigned RegNumIndex = 0;
2079  if (RegName[0] == 'v') {
2080  RegNumIndex = 1;
2081  RegKind = IS_VGPR;
2082  } else if (RegName[0] == 's') {
2083  RegNumIndex = 1;
2084  RegKind = IS_SGPR;
2085  } else if (RegName[0] == 'a') {
2086  RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2087  RegKind = IS_AGPR;
2088  } else if (RegName.startswith("ttmp")) {
2089  RegNumIndex = strlen("ttmp");
2090  RegKind = IS_TTMP;
2091  } else {
2092  return false;
2093  }
2094  if (RegName.size() > RegNumIndex) {
2095  // Single 32-bit register: vXX.
2096  if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2097  return false;
2098  Parser.Lex();
2099  RegWidth = 1;
2100  } else {
2101  // Range of registers: v[XX:YY]. ":YY" is optional.
2102  Parser.Lex();
2103  int64_t RegLo, RegHi;
2104  if (getLexer().isNot(AsmToken::LBrac))
2105  return false;
2106  Parser.Lex();
2107 
2108  if (getParser().parseAbsoluteExpression(RegLo))
2109  return false;
2110 
2111  const bool isRBrace = getLexer().is(AsmToken::RBrac);
2112  if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2113  return false;
2114  Parser.Lex();
2115 
2116  if (isRBrace) {
2117  RegHi = RegLo;
2118  } else {
2119  if (getParser().parseAbsoluteExpression(RegHi))
2120  return false;
2121 
2122  if (getLexer().isNot(AsmToken::RBrac))
2123  return false;
2124  Parser.Lex();
2125  }
2126  RegNum = (unsigned) RegLo;
2127  RegWidth = (RegHi - RegLo) + 1;
2128  }
2129  }
2130  } else if (getLexer().is(AsmToken::LBrac)) {
2131  // List of consecutive registers: [s0,s1,s2,s3]
2132  Parser.Lex();
2133  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2134  return false;
2135  if (RegWidth != 1)
2136  return false;
2137  RegisterKind RegKind1;
2138  unsigned Reg1, RegNum1, RegWidth1;
2139  do {
2140  if (getLexer().is(AsmToken::Comma)) {
2141  Parser.Lex();
2142  } else if (getLexer().is(AsmToken::RBrac)) {
2143  Parser.Lex();
2144  break;
2145  } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2146  if (RegWidth1 != 1) {
2147  return false;
2148  }
2149  if (RegKind1 != RegKind) {
2150  return false;
2151  }
2152  if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2153  return false;
2154  }
2155  } else {
2156  return false;
2157  }
2158  } while (true);
2159  } else {
2160  return false;
2161  }
2162  switch (RegKind) {
2163  case IS_SPECIAL:
2164  RegNum = 0;
2165  RegWidth = 1;
2166  break;
2167  case IS_VGPR:
2168  case IS_SGPR:
2169  case IS_AGPR:
2170  case IS_TTMP:
2171  {
2172  unsigned Size = 1;
2173  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2174  // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2175  Size = std::min(RegWidth, 4u);
2176  }
2177  if (RegNum % Size != 0)
2178  return false;
2179  if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2180  RegNum = RegNum / Size;
2181  int RCID = getRegClass(RegKind, RegWidth);
2182  if (RCID == -1)
2183  return false;
2184  const MCRegisterClass RC = TRI->getRegClass(RCID);
2185  if (RegNum >= RC.getNumRegs())
2186  return false;
2187  Reg = RC.getRegister(RegNum);
2188  break;
2189  }
2190 
2191  default:
2192  llvm_unreachable("unexpected register kind");
2193  }
2194 
2195  if (!subtargetHasRegister(*TRI, Reg))
2196  return false;
2197  return true;
2198 }
2199 
2201 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2202  switch (RegKind) {
2203  case IS_VGPR:
2204  return StringRef(".amdgcn.next_free_vgpr");
2205  case IS_SGPR:
2206  return StringRef(".amdgcn.next_free_sgpr");
2207  default:
2208  return None;
2209  }
2210 }
2211 
2212 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2213  auto SymbolName = getGprCountSymbolName(RegKind);
2214  assert(SymbolName && "initializing invalid register kind");
2215  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2216  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2217 }
2218 
2219 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2220  unsigned DwordRegIndex,
2221  unsigned RegWidth) {
2222  // Symbols are only defined for GCN targets
2223  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2224  return true;
2225 
2226  auto SymbolName = getGprCountSymbolName(RegKind);
2227  if (!SymbolName)
2228  return true;
2229  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2230 
2231  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2232  int64_t OldCount;
2233 
2234  if (!Sym->isVariable())
2235  return !Error(getParser().getTok().getLoc(),
2236  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2237  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2238  return !Error(
2239  getParser().getTok().getLoc(),
2240  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2241 
2242  if (OldCount <= NewMax)
2243  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2244 
2245  return true;
2246 }
2247 
2248 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2249  const auto &Tok = Parser.getTok();
2250  SMLoc StartLoc = Tok.getLoc();
2251  SMLoc EndLoc = Tok.getEndLoc();
2253  unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2254 
2255  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2256  //FIXME: improve error messages (bug 41303).
2257  Error(StartLoc, "not a valid operand.");
2258  return nullptr;
2259  }
2260  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2261  if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2262  return nullptr;
2263  } else
2264  KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2265  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2266 }
2267 
2269 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2270  // TODO: add syntactic sugar for 1/(2*PI)
2271 
2272  assert(!isRegister());
2273  assert(!isModifier());
2274 
2275  const auto& Tok = getToken();
2276  const auto& NextTok = peekToken();
2277  bool IsReal = Tok.is(AsmToken::Real);
2278  SMLoc S = getLoc();
2279  bool Negate = false;
2280 
2281  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2282  lex();
2283  IsReal = true;
2284  Negate = true;
2285  }
2286 
2287  if (IsReal) {
2288  // Floating-point expressions are not supported.
2289  // Can only allow floating-point literals with an
2290  // optional sign.
2291 
2292  StringRef Num = getTokenStr();
2293  lex();
2294 
2295  APFloat RealVal(APFloat::IEEEdouble());
2296  auto roundMode = APFloat::rmNearestTiesToEven;
2297  if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2298  return MatchOperand_ParseFail;
2299  }
2300  if (Negate)
2301  RealVal.changeSign();
2302 
2303  Operands.push_back(
2304  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2305  AMDGPUOperand::ImmTyNone, true));
2306 
2307  return MatchOperand_Success;
2308 
2309  } else {
2310  int64_t IntVal;
2311  const MCExpr *Expr;
2312  SMLoc S = getLoc();
2313 
2314  if (HasSP3AbsModifier) {
2315  // This is a workaround for handling expressions
2316  // as arguments of SP3 'abs' modifier, for example:
2317  // |1.0|
2318  // |-1|
2319  // |1+x|
2320  // This syntax is not compatible with syntax of standard
2321  // MC expressions (due to the trailing '|').
2322  SMLoc EndLoc;
2323  if (getParser().parsePrimaryExpr(Expr, EndLoc))
2324  return MatchOperand_ParseFail;
2325  } else {
2326  if (Parser.parseExpression(Expr))
2327  return MatchOperand_ParseFail;
2328  }
2329 
2330  if (Expr->evaluateAsAbsolute(IntVal)) {
2331  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2332  } else {
2333  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2334  }
2335 
2336  return MatchOperand_Success;
2337  }
2338 
2339  return MatchOperand_NoMatch;
2340 }
2341 
2343 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2344  if (!isRegister())
2345  return MatchOperand_NoMatch;
2346 
2347  if (auto R = parseRegister()) {
2348  assert(R->isReg());
2349  Operands.push_back(std::move(R));
2350  return MatchOperand_Success;
2351  }
2352  return MatchOperand_ParseFail;
2353 }
2354 
2356 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2357  auto res = parseReg(Operands);
2358  if (res != MatchOperand_NoMatch) {
2359  return res;
2360  } else if (isModifier()) {
2361  return MatchOperand_NoMatch;
2362  } else {
2363  return parseImm(Operands, HasSP3AbsMod);
2364  }
2365 }
2366 
2367 bool
2368 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2369  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2370  const auto &str = Token.getString();
2371  return str == "abs" || str == "neg" || str == "sext";
2372  }
2373  return false;
2374 }
2375 
2376 bool
2377 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2378  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2379 }
2380 
2381 bool
2382 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2383  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2384 }
2385 
2386 bool
2387 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2388  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2389 }
2390 
2391 // Check if this is an operand modifier or an opcode modifier
2392 // which may look like an expression but it is not. We should
2393 // avoid parsing these modifiers as expressions. Currently
2394 // recognized sequences are:
2395 // |...|
2396 // abs(...)
2397 // neg(...)
2398 // sext(...)
2399 // -reg
2400 // -|...|
2401 // -abs(...)
2402 // name:...
2403 // Note that simple opcode modifiers like 'gds' may be parsed as
2404 // expressions; this is a special case. See getExpressionAsToken.
2405 //
2406 bool
2407 AMDGPUAsmParser::isModifier() {
2408 
2409  AsmToken Tok = getToken();
2410  AsmToken NextToken[2];
2411  peekTokens(NextToken);
2412 
2413  return isOperandModifier(Tok, NextToken[0]) ||
2414  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2415  isOpcodeModifierWithVal(Tok, NextToken[0]);
2416 }
2417 
2418 // Check if the current token is an SP3 'neg' modifier.
2419 // Currently this modifier is allowed in the following context:
2420 //
2421 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2422 // 2. Before an 'abs' modifier: -abs(...)
2423 // 3. Before an SP3 'abs' modifier: -|...|
2424 //
2425 // In all other cases "-" is handled as a part
2426 // of an expression that follows the sign.
2427 //
2428 // Note: When "-" is followed by an integer literal,
2429 // this is interpreted as integer negation rather
2430 // than a floating-point NEG modifier applied to N.
2431 // Beside being contr-intuitive, such use of floating-point
2432 // NEG modifier would have resulted in different meaning
2433 // of integer literals used with VOP1/2/C and VOP3,
2434 // for example:
2435 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2436 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2437 // Negative fp literals with preceding "-" are
2438 // handled likewise for unifomtity
2439 //
2440 bool
2441 AMDGPUAsmParser::parseSP3NegModifier() {
2442 
2443  AsmToken NextToken[2];
2444  peekTokens(NextToken);
2445 
2446  if (isToken(AsmToken::Minus) &&
2447  (isRegister(NextToken[0], NextToken[1]) ||
2448  NextToken[0].is(AsmToken::Pipe) ||
2449  isId(NextToken[0], "abs"))) {
2450  lex();
2451  return true;
2452  }
2453 
2454  return false;
2455 }
2456 
2458 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2459  bool AllowImm) {
2460  bool Neg, SP3Neg;
2461  bool Abs, SP3Abs;
2462  SMLoc Loc;
2463 
2464  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2465  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2466  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2467  return MatchOperand_ParseFail;
2468  }
2469 
2470  SP3Neg = parseSP3NegModifier();
2471 
2472  Loc = getLoc();
2473  Neg = trySkipId("neg");
2474  if (Neg && SP3Neg) {
2475  Error(Loc, "expected register or immediate");
2476  return MatchOperand_ParseFail;
2477  }
2478  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2479  return MatchOperand_ParseFail;
2480 
2481  Abs = trySkipId("abs");
2482  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2483  return MatchOperand_ParseFail;
2484 
2485  Loc = getLoc();
2486  SP3Abs = trySkipToken(AsmToken::Pipe);
2487  if (Abs && SP3Abs) {
2488  Error(Loc, "expected register or immediate");
2489  return MatchOperand_ParseFail;
2490  }
2491 
2493  if (AllowImm) {
2494  Res = parseRegOrImm(Operands, SP3Abs);
2495  } else {
2496  Res = parseReg(Operands);
2497  }
2498  if (Res != MatchOperand_Success) {
2499  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2500  }
2501 
2502  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2503  return MatchOperand_ParseFail;
2504  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2505  return MatchOperand_ParseFail;
2506  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2507  return MatchOperand_ParseFail;
2508 
2509  AMDGPUOperand::Modifiers Mods;
2510  Mods.Abs = Abs || SP3Abs;
2511  Mods.Neg = Neg || SP3Neg;
2512 
2513  if (Mods.hasFPModifiers()) {
2514  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2515  if (Op.isExpr()) {
2516  Error(Op.getStartLoc(), "expected an absolute expression");
2517  return MatchOperand_ParseFail;
2518  }
2519  Op.setModifiers(Mods);
2520  }
2521  return MatchOperand_Success;
2522 }
2523 
2525 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2526  bool AllowImm) {
2527  bool Sext = trySkipId("sext");
2528  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2529  return MatchOperand_ParseFail;
2530 
2532  if (AllowImm) {
2533  Res = parseRegOrImm(Operands);
2534  } else {
2535  Res = parseReg(Operands);
2536  }
2537  if (Res != MatchOperand_Success) {
2538  return Sext? MatchOperand_ParseFail : Res;
2539  }
2540 
2541  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2542  return MatchOperand_ParseFail;
2543 
2544  AMDGPUOperand::Modifiers Mods;
2545  Mods.Sext = Sext;
2546 
2547  if (Mods.hasIntModifiers()) {
2548  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2549  if (Op.isExpr()) {
2550  Error(Op.getStartLoc(), "expected an absolute expression");
2551  return MatchOperand_ParseFail;
2552  }
2553  Op.setModifiers(Mods);
2554  }
2555 
2556  return MatchOperand_Success;
2557 }
2558 
2560 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2561  return parseRegOrImmWithFPInputMods(Operands, false);
2562 }
2563 
2565 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2566  return parseRegOrImmWithIntInputMods(Operands, false);
2567 }
2568 
2569 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2570  auto Loc = getLoc();
2571  if (trySkipId("off")) {
2572  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2573  AMDGPUOperand::ImmTyOff, false));
2574  return MatchOperand_Success;
2575  }
2576 
2577  if (!isRegister())
2578  return MatchOperand_NoMatch;
2579 
2580  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2581  if (Reg) {
2582  Operands.push_back(std::move(Reg));
2583  return MatchOperand_Success;
2584  }
2585 
2586  return MatchOperand_ParseFail;
2587 
2588 }
2589 
2590 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2591  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2592 
2593  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2594  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2595  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2596  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2597  return Match_InvalidOperand;
2598 
2599  if ((TSFlags & SIInstrFlags::VOP3) &&
2600  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2601  getForcedEncodingSize() != 64)
2602  return Match_PreferE32;
2603 
2604  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2605  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2606  // v_mac_f32/16 allow only dst_sel == DWORD;
2607  auto OpNum =
2608  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2609  const auto &Op = Inst.getOperand(OpNum);
2610  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2611  return Match_InvalidOperand;
2612  }
2613  }
2614 
2615  return Match_Success;
2616 }
2617 
2618 // What asm variants we should check
2619 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2620  if (getForcedEncodingSize() == 32) {
2621  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2622  return makeArrayRef(Variants);
2623  }
2624 
2625  if (isForcedVOP3()) {
2626  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2627  return makeArrayRef(Variants);
2628  }
2629 
2630  if (isForcedSDWA()) {
2631  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2633  return makeArrayRef(Variants);
2634  }
2635 
2636  if (isForcedDPP()) {
2637  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2638  return makeArrayRef(Variants);
2639  }
2640 
2641  static const unsigned Variants[] = {
2644  };
2645 
2646  return makeArrayRef(Variants);
2647 }
2648 
2649 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2650  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2651  const unsigned Num = Desc.getNumImplicitUses();
2652  for (unsigned i = 0; i < Num; ++i) {
2653  unsigned Reg = Desc.ImplicitUses[i];
2654  switch (Reg) {
2655  case AMDGPU::FLAT_SCR:
2656  case AMDGPU::VCC:
2657  case AMDGPU::VCC_LO:
2658  case AMDGPU::VCC_HI:
2659  case AMDGPU::M0:
2660  return Reg;
2661  default:
2662  break;
2663  }
2664  }
2665  return AMDGPU::NoRegister;
2666 }
2667 
2668 // NB: This code is correct only when used to check constant
2669 // bus limitations because GFX7 support no f16 inline constants.
2670 // Note that there are no cases when a GFX7 opcode violates
2671 // constant bus limitations due to the use of an f16 constant.
2672 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2673  unsigned OpIdx) const {
2674  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2675 
2676  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2677  return false;
2678  }
2679 
2680  const MCOperand &MO = Inst.getOperand(OpIdx);
2681 
2682  int64_t Val = MO.getImm();
2683  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2684 
2685  switch (OpSize) { // expected operand size
2686  case 8:
2687  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2688  case 4:
2689  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2690  case 2: {
2691  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2692  if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2693  OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2694  OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2695  OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2696  OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2697  OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2698  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2699  } else {
2700  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2701  }
2702  }
2703  default:
2704  llvm_unreachable("invalid operand size");
2705  }
2706 }
2707 
2708 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2709  if (!isGFX10())
2710  return 1;
2711 
2712  switch (Opcode) {
2713  // 64-bit shift instructions can use only one scalar value input
2714  case AMDGPU::V_LSHLREV_B64:
2715  case AMDGPU::V_LSHLREV_B64_gfx10:
2716  case AMDGPU::V_LSHL_B64:
2717  case AMDGPU::V_LSHRREV_B64:
2718  case AMDGPU::V_LSHRREV_B64_gfx10:
2719  case AMDGPU::V_LSHR_B64:
2720  case AMDGPU::V_ASHRREV_I64:
2721  case AMDGPU::V_ASHRREV_I64_gfx10:
2722  case AMDGPU::V_ASHR_I64:
2723  return 1;
2724  default:
2725  return 2;
2726  }
2727 }
2728 
2729 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2730  const MCOperand &MO = Inst.getOperand(OpIdx);
2731  if (MO.isImm()) {
2732  return !isInlineConstant(Inst, OpIdx);
2733  } else if (MO.isReg()) {
2734  auto Reg = MO.getReg();
2735  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2736  return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2737  } else {
2738  return true;
2739  }
2740 }
2741 
2742 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2743  const unsigned Opcode = Inst.getOpcode();
2744  const MCInstrDesc &Desc = MII.get(Opcode);
2745  unsigned ConstantBusUseCount = 0;
2746  unsigned NumLiterals = 0;
2747  unsigned LiteralSize;
2748 
2749  if (Desc.TSFlags &
2753  SIInstrFlags::SDWA)) {
2754  // Check special imm operands (used by madmk, etc)
2755  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2756  ++ConstantBusUseCount;
2757  }
2758 
2759  SmallDenseSet<unsigned> SGPRsUsed;
2760  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2761  if (SGPRUsed != AMDGPU::NoRegister) {
2762  SGPRsUsed.insert(SGPRUsed);
2763  ++ConstantBusUseCount;
2764  }
2765 
2766  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2767  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2768  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2769 
2770  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2771 
2772  for (int OpIdx : OpIndices) {
2773  if (OpIdx == -1) break;
2774 
2775  const MCOperand &MO = Inst.getOperand(OpIdx);
2776  if (usesConstantBus(Inst, OpIdx)) {
2777  if (MO.isReg()) {
2778  const unsigned Reg = mc2PseudoReg(MO.getReg());
2779  // Pairs of registers with a partial intersections like these
2780  // s0, s[0:1]
2781  // flat_scratch_lo, flat_scratch
2782  // flat_scratch_lo, flat_scratch_hi
2783  // are theoretically valid but they are disabled anyway.
2784  // Note that this code mimics SIInstrInfo::verifyInstruction
2785  if (!SGPRsUsed.count(Reg)) {
2786  SGPRsUsed.insert(Reg);
2787  ++ConstantBusUseCount;
2788  }
2789  } else { // Expression or a literal
2790 
2791  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2792  continue; // special operand like VINTERP attr_chan
2793 
2794  // An instruction may use only one literal.
2795  // This has been validated on the previous step.
2796  // See validateVOP3Literal.
2797  // This literal may be used as more than one operand.
2798  // If all these operands are of the same size,
2799  // this literal counts as one scalar value.
2800  // Otherwise it counts as 2 scalar values.
2801  // See "GFX10 Shader Programming", section 3.6.2.3.
2802 
2803  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2804  if (Size < 4) Size = 4;
2805 
2806  if (NumLiterals == 0) {
2807  NumLiterals = 1;
2808  LiteralSize = Size;
2809  } else if (LiteralSize != Size) {
2810  NumLiterals = 2;
2811  }
2812  }
2813  }
2814  }
2815  }
2816  ConstantBusUseCount += NumLiterals;
2817 
2818  return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2819 }
2820 
2821 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2822  const unsigned Opcode = Inst.getOpcode();
2823  const MCInstrDesc &Desc = MII.get(Opcode);
2824 
2825  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2826  if (DstIdx == -1 ||
2827  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2828  return true;
2829  }
2830 
2831  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2832 
2833  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2834  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2835  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2836 
2837  assert(DstIdx != -1);
2838  const MCOperand &Dst = Inst.getOperand(DstIdx);
2839  assert(Dst.isReg());
2840  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2841 
2842  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2843 
2844  for (int SrcIdx : SrcIndices) {
2845  if (SrcIdx == -1) break;
2846  const MCOperand &Src = Inst.getOperand(SrcIdx);
2847  if (Src.isReg()) {
2848  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2849  if (isRegIntersect(DstReg, SrcReg, TRI)) {
2850  return false;
2851  }
2852  }
2853  }
2854 
2855  return true;
2856 }
2857 
2858 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2859 
2860  const unsigned Opc = Inst.getOpcode();
2861  const MCInstrDesc &Desc = MII.get(Opc);
2862 
2863  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2864  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2865  assert(ClampIdx != -1);
2866  return Inst.getOperand(ClampIdx).getImm() == 0;
2867  }
2868 
2869  return true;
2870 }
2871 
2872 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2873 
2874  const unsigned Opc = Inst.getOpcode();
2875  const MCInstrDesc &Desc = MII.get(Opc);
2876 
2877  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2878  return true;
2879 
2880  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2881  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2882  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2883 
2884  assert(VDataIdx != -1);
2885  assert(DMaskIdx != -1);
2886  assert(TFEIdx != -1);
2887 
2888  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2889  unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2890  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2891  if (DMask == 0)
2892  DMask = 1;
2893 
2894  unsigned DataSize =
2895  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2896  if (hasPackedD16()) {
2897  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2898  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2899  DataSize = (DataSize + 1) / 2;
2900  }
2901 
2902  return (VDataSize / 4) == DataSize + TFESize;
2903 }
2904 
2905 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2906  const unsigned Opc = Inst.getOpcode();
2907  const MCInstrDesc &Desc = MII.get(Opc);
2908 
2909  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2910  return true;
2911 
2912  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2913  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2914  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2915  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2916  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2917  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2918 
2919  assert(VAddr0Idx != -1);
2920  assert(SrsrcIdx != -1);
2921  assert(DimIdx != -1);
2922  assert(SrsrcIdx > VAddr0Idx);
2923 
2924  unsigned Dim = Inst.getOperand(DimIdx).getImm();
2925  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2926  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2927  unsigned VAddrSize =
2928  IsNSA ? SrsrcIdx - VAddr0Idx
2929  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2930 
2931  unsigned AddrSize = BaseOpcode->NumExtraArgs +
2932  (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2933  (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2934  (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2935  if (!IsNSA) {
2936  if (AddrSize > 8)
2937  AddrSize = 16;
2938  else if (AddrSize > 4)
2939  AddrSize = 8;
2940  }
2941 
2942  return VAddrSize == AddrSize;
2943 }
2944 
2945 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2946 
2947  const unsigned Opc = Inst.getOpcode();
2948  const MCInstrDesc &Desc = MII.get(Opc);
2949 
2950  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2951  return true;
2952  if (!Desc.mayLoad() || !Desc.mayStore())
2953  return true; // Not atomic
2954 
2955  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2956  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2957 
2958  // This is an incomplete check because image_atomic_cmpswap
2959  // may only use 0x3 and 0xf while other atomic operations
2960  // may use 0x1 and 0x3. However these limitations are
2961  // verified when we check that dmask matches dst size.
2962  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2963 }
2964 
2965 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2966 
2967  const unsigned Opc = Inst.getOpcode();
2968  const MCInstrDesc &Desc = MII.get(Opc);
2969 
2970  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2971  return true;
2972 
2973  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2974  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2975 
2976  // GATHER4 instructions use dmask in a different fashion compared to
2977  // other MIMG instructions. The only useful DMASK values are
2978  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2979  // (red,red,red,red) etc.) The ISA document doesn't mention
2980  // this.
2981  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2982 }
2983 
2984 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2985 
2986  const unsigned Opc = Inst.getOpcode();
2987  const MCInstrDesc &Desc = MII.get(Opc);
2988 
2989  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2990  return true;
2991 
2992  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2993  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2994  if (isCI() || isSI())
2995  return false;
2996  }
2997 
2998  return true;
2999 }
3000 
3001 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3002  const unsigned Opc = Inst.getOpcode();
3003  const MCInstrDesc &Desc = MII.get(Opc);
3004 
3005  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3006  return true;
3007 
3008  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3009  if (DimIdx < 0)
3010  return true;
3011 
3012  long Imm = Inst.getOperand(DimIdx).getImm();
3013  if (Imm < 0 || Imm >= 8)
3014  return false;
3015 
3016  return true;
3017 }
3018 
3019 static bool IsRevOpcode(const unsigned Opcode)
3020 {
3021  switch (Opcode) {
3022  case AMDGPU::V_SUBREV_F32_e32:
3023  case AMDGPU::V_SUBREV_F32_e64:
3024  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3025  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3026  case AMDGPU::V_SUBREV_F32_e32_vi:
3027  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3028  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3029  case AMDGPU::V_SUBREV_F32_e64_vi:
3030 
3031  case AMDGPU::V_SUBREV_I32_e32:
3032  case AMDGPU::V_SUBREV_I32_e64:
3033  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3034  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3035 
3036  case AMDGPU::V_SUBBREV_U32_e32:
3037  case AMDGPU::V_SUBBREV_U32_e64:
3038  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3039  case AMDGPU::V_SUBBREV_U32_e32_vi:
3040  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3041  case AMDGPU::V_SUBBREV_U32_e64_vi:
3042 
3043  case AMDGPU::V_SUBREV_U32_e32:
3044  case AMDGPU::V_SUBREV_U32_e64:
3045  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3046  case AMDGPU::V_SUBREV_U32_e32_vi:
3047  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3048  case AMDGPU::V_SUBREV_U32_e64_vi:
3049 
3050  case AMDGPU::V_SUBREV_F16_e32:
3051  case AMDGPU::V_SUBREV_F16_e64:
3052  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3053  case AMDGPU::V_SUBREV_F16_e32_vi:
3054  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3055  case AMDGPU::V_SUBREV_F16_e64_vi:
3056 
3057  case AMDGPU::V_SUBREV_U16_e32:
3058  case AMDGPU::V_SUBREV_U16_e64:
3059  case AMDGPU::V_SUBREV_U16_e32_vi:
3060  case AMDGPU::V_SUBREV_U16_e64_vi:
3061 
3062  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3063  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3064  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3065 
3066  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3067  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3068 
3069  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3070  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3071 
3072  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3073  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3074 
3075  case AMDGPU::V_LSHRREV_B32_e32:
3076  case AMDGPU::V_LSHRREV_B32_e64:
3077  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3078  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3079  case AMDGPU::V_LSHRREV_B32_e32_vi:
3080  case AMDGPU::V_LSHRREV_B32_e64_vi:
3081  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3082  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3083 
3084  case AMDGPU::V_ASHRREV_I32_e32:
3085  case AMDGPU::V_ASHRREV_I32_e64:
3086  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3087  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3088  case AMDGPU::V_ASHRREV_I32_e32_vi:
3089  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3090  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3091  case AMDGPU::V_ASHRREV_I32_e64_vi:
3092 
3093  case AMDGPU::V_LSHLREV_B32_e32:
3094  case AMDGPU::V_LSHLREV_B32_e64:
3095  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3096  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3097  case AMDGPU::V_LSHLREV_B32_e32_vi:
3098  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3099  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3100  case AMDGPU::V_LSHLREV_B32_e64_vi:
3101 
3102  case AMDGPU::V_LSHLREV_B16_e32:
3103  case AMDGPU::V_LSHLREV_B16_e64:
3104  case AMDGPU::V_LSHLREV_B16_e32_vi:
3105  case AMDGPU::V_LSHLREV_B16_e64_vi:
3106  case AMDGPU::V_LSHLREV_B16_gfx10:
3107 
3108  case AMDGPU::V_LSHRREV_B16_e32:
3109  case AMDGPU::V_LSHRREV_B16_e64:
3110  case AMDGPU::V_LSHRREV_B16_e32_vi:
3111  case AMDGPU::V_LSHRREV_B16_e64_vi:
3112  case AMDGPU::V_LSHRREV_B16_gfx10:
3113 
3114  case AMDGPU::V_ASHRREV_I16_e32:
3115  case AMDGPU::V_ASHRREV_I16_e64:
3116  case AMDGPU::V_ASHRREV_I16_e32_vi:
3117  case AMDGPU::V_ASHRREV_I16_e64_vi:
3118  case AMDGPU::V_ASHRREV_I16_gfx10:
3119 
3120  case AMDGPU::V_LSHLREV_B64:
3121  case AMDGPU::V_LSHLREV_B64_gfx10:
3122  case AMDGPU::V_LSHLREV_B64_vi:
3123 
3124  case AMDGPU::V_LSHRREV_B64:
3125  case AMDGPU::V_LSHRREV_B64_gfx10:
3126  case AMDGPU::V_LSHRREV_B64_vi:
3127 
3128  case AMDGPU::V_ASHRREV_I64:
3129  case AMDGPU::V_ASHRREV_I64_gfx10:
3130  case AMDGPU::V_ASHRREV_I64_vi:
3131 
3132  case AMDGPU::V_PK_LSHLREV_B16:
3133  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3134  case AMDGPU::V_PK_LSHLREV_B16_vi:
3135 
3136  case AMDGPU::V_PK_LSHRREV_B16:
3137  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3138  case AMDGPU::V_PK_LSHRREV_B16_vi:
3139  case AMDGPU::V_PK_ASHRREV_I16:
3140  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3141  case AMDGPU::V_PK_ASHRREV_I16_vi:
3142  return true;
3143  default:
3144  return false;
3145  }
3146 }
3147 
3148 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3149 
3150  using namespace SIInstrFlags;
3151  const unsigned Opcode = Inst.getOpcode();
3152  const MCInstrDesc &Desc = MII.get(Opcode);
3153 
3154  // lds_direct register is defined so that it can be used
3155  // with 9-bit operands only. Ignore encodings which do not accept these.
3156  if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3157  return true;
3158 
3159  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3160  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3161  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3162 
3163  const int SrcIndices[] = { Src1Idx, Src2Idx };
3164 
3165  // lds_direct cannot be specified as either src1 or src2.
3166  for (int SrcIdx : SrcIndices) {
3167  if (SrcIdx == -1) break;
3168  const MCOperand &Src = Inst.getOperand(SrcIdx);
3169  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3170  return false;
3171  }
3172  }
3173 
3174  if (Src0Idx == -1)
3175  return true;
3176 
3177  const MCOperand &Src = Inst.getOperand(Src0Idx);
3178  if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3179  return true;
3180 
3181  // lds_direct is specified as src0. Check additional limitations.
3182  return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3183 }
3184 
3185 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3186  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3187  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3188  if (Op.isFlatOffset())
3189  return Op.getStartLoc();
3190  }
3191  return getLoc();
3192 }
3193 
3194 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3195  const OperandVector &Operands) {
3196  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3197  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3198  return true;
3199 
3200  auto Opcode = Inst.getOpcode();
3201  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3202  assert(OpNum != -1);
3203 
3204  const auto &Op = Inst.getOperand(OpNum);
3205  if (!hasFlatOffsets() && Op.getImm() != 0) {
3206  Error(getFlatOffsetLoc(Operands),
3207  "flat offset modifier is not supported on this GPU");
3208  return false;
3209  }
3210 
3211  // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3212  // For FLAT segment the offset must be positive;
3213  // MSB is ignored and forced to zero.
3214  unsigned OffsetSize = isGFX9() ? 13 : 12;
3215  if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3216  if (!isIntN(OffsetSize, Op.getImm())) {
3217  Error(getFlatOffsetLoc(Operands),
3218  isGFX9() ? "expected a 13-bit signed offset" :
3219  "expected a 12-bit signed offset");
3220  return false;
3221  }
3222  } else {
3223  if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3224  Error(getFlatOffsetLoc(Operands),
3225  isGFX9() ? "expected a 12-bit unsigned offset" :
3226  "expected an 11-bit unsigned offset");
3227  return false;
3228  }
3229  }
3230 
3231  return true;
3232 }
3233 
3234 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3235  unsigned Opcode = Inst.getOpcode();
3236  const MCInstrDesc &Desc = MII.get(Opcode);
3237  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3238  return true;
3239 
3240  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3241  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3242 
3243  const int OpIndices[] = { Src0Idx, Src1Idx };
3244 
3245  unsigned NumLiterals = 0;
3246  uint32_t LiteralValue;
3247 
3248  for (int OpIdx : OpIndices) {
3249  if (OpIdx == -1) break;
3250 
3251  const MCOperand &MO = Inst.getOperand(OpIdx);
3252  if (MO.isImm() &&
3253  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3254  AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3255  !isInlineConstant(Inst, OpIdx)) {
3256  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3257  if (NumLiterals == 0 || LiteralValue != Value) {
3258  LiteralValue = Value;
3259  ++NumLiterals;
3260  }
3261  }
3262  }
3263 
3264  return NumLiterals <= 1;
3265 }
3266 
3267 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3268  const unsigned Opc = Inst.getOpcode();
3269  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3270  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3271  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3272  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3273 
3274  if (OpSel & ~3)
3275  return false;
3276  }
3277  return true;
3278 }
3279 
3280 // Check if VCC register matches wavefront size
3281 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3282  auto FB = getFeatureBits();
3283  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3284  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3285 }
3286 
3287 // VOP3 literal is only allowed in GFX10+ and only one can be used
3288 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3289  unsigned Opcode = Inst.getOpcode();
3290  const MCInstrDesc &Desc = MII.get(Opcode);
3292  return true;
3293 
3294  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3295  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3296  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3297 
3298  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3299 
3300  unsigned NumLiterals = 0;
3301  uint32_t LiteralValue;
3302 
3303  for (int OpIdx : OpIndices) {
3304  if (OpIdx == -1) break;
3305 
3306  const MCOperand &MO = Inst.getOperand(OpIdx);
3307  if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3308  continue;
3309 
3310  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3311  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3312  return false;
3313 
3314  if (!isInlineConstant(Inst, OpIdx)) {
3315  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3316  if (NumLiterals == 0 || LiteralValue != Value) {
3317  LiteralValue = Value;
3318  ++NumLiterals;
3319  }
3320  }
3321  }
3322 
3323  return !NumLiterals ||
3324  (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3325 }
3326 
3327 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3328  const SMLoc &IDLoc,
3329  const OperandVector &Operands) {
3330  if (!validateLdsDirect(Inst)) {
3331  Error(IDLoc,
3332  "invalid use of lds_direct");
3333  return false;
3334  }
3335  if (!validateSOPLiteral(Inst)) {
3336  Error(IDLoc,
3337  "only one literal operand is allowed");
3338  return false;
3339  }
3340  if (!validateVOP3Literal(Inst)) {
3341  Error(IDLoc,
3342  "invalid literal operand");
3343  return false;
3344  }
3345  if (!validateConstantBusLimitations(Inst)) {
3346  Error(IDLoc,
3347  "invalid operand (violates constant bus restrictions)");
3348  return false;
3349  }
3350  if (!validateEarlyClobberLimitations(Inst)) {
3351  Error(IDLoc,
3352  "destination must be different than all sources");
3353  return false;
3354  }
3355  if (!validateIntClampSupported(Inst)) {
3356  Error(IDLoc,
3357  "integer clamping is not supported on this GPU");
3358  return false;
3359  }
3360  if (!validateOpSel(Inst)) {
3361  Error(IDLoc,
3362  "invalid op_sel operand");
3363  return false;
3364  }
3365  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3366  if (!validateMIMGD16(Inst)) {
3367  Error(IDLoc,
3368  "d16 modifier is not supported on this GPU");
3369  return false;
3370  }
3371  if (!validateMIMGDim(Inst)) {
3372  Error(IDLoc, "dim modifier is required on this GPU");
3373  return false;
3374  }
3375  if (!validateMIMGDataSize(Inst)) {
3376  Error(IDLoc,
3377  "image data size does not match dmask and tfe");
3378  return false;
3379  }
3380  if (!validateMIMGAddrSize(Inst)) {
3381  Error(IDLoc,
3382  "image address size does not match dim and a16");
3383  return false;
3384  }
3385  if (!validateMIMGAtomicDMask(Inst)) {
3386  Error(IDLoc,
3387  "invalid atomic image dmask");
3388  return false;
3389  }
3390  if (!validateMIMGGatherDMask(Inst)) {
3391  Error(IDLoc,
3392  "invalid image_gather dmask: only one bit must be set");
3393  return false;
3394  }
3395  if (!validateFlatOffset(Inst, Operands)) {
3396  return false;
3397  }
3398 
3399  return true;
3400 }
3401 
3402 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3403  const FeatureBitset &FBS,
3404  unsigned VariantID = 0);
3405 
3406 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3407  OperandVector &Operands,
3408  MCStreamer &Out,
3409  uint64_t &ErrorInfo,
3410  bool MatchingInlineAsm) {
3411  MCInst Inst;
3412  unsigned Result = Match_Success;
3413  for (auto Variant : getMatchedVariants()) {
3414  uint64_t EI;
3415  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3416  Variant);
3417  // We order match statuses from least to most specific. We use most specific
3418  // status as resulting
3419  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3420  if ((R == Match_Success) ||
3421  (R == Match_PreferE32) ||
3422  (R == Match_MissingFeature && Result != Match_PreferE32) ||
3423  (R == Match_InvalidOperand && Result != Match_MissingFeature
3424  && Result != Match_PreferE32) ||
3425  (R == Match_MnemonicFail && Result != Match_InvalidOperand
3426  && Result != Match_MissingFeature
3427  && Result != Match_PreferE32)) {
3428  Result = R;
3429  ErrorInfo = EI;
3430  }
3431  if (R == Match_Success)
3432  break;
3433  }
3434 
3435  switch (Result) {
3436  default: break;
3437  case Match_Success:
3438  if (!validateInstruction(Inst, IDLoc, Operands)) {
3439  return true;
3440  }
3441  Inst.setLoc(IDLoc);
3442  Out.EmitInstruction(Inst, getSTI());
3443  return false;
3444 
3445  case Match_MissingFeature:
3446  return Error(IDLoc, "instruction not supported on this GPU");
3447 
3448  case Match_MnemonicFail: {
3449  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3450  std::string Suggestion = AMDGPUMnemonicSpellCheck(
3451  ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3452  return Error(IDLoc, "invalid instruction" + Suggestion,
3453  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3454  }
3455 
3456  case Match_InvalidOperand: {
3457  SMLoc ErrorLoc = IDLoc;
3458  if (ErrorInfo != ~0ULL) {
3459  if (ErrorInfo >= Operands.size()) {
3460  return Error(IDLoc, "too few operands for instruction");
3461  }
3462  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3463  if (ErrorLoc == SMLoc())
3464  ErrorLoc = IDLoc;
3465  }
3466  return Error(ErrorLoc, "invalid operand for instruction");
3467  }
3468 
3469  case Match_PreferE32:
3470  return Error(IDLoc, "internal error: instruction without _e64 suffix "
3471  "should be encoded as e32");
3472  }
3473  llvm_unreachable("Implement any new match types added!");
3474 }
3475 
3476 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3477  int64_t Tmp = -1;
3478  if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3479  return true;
3480  }
3481  if (getParser().parseAbsoluteExpression(Tmp)) {
3482  return true;
3483  }
3484  Ret = static_cast<uint32_t>(Tmp);
3485  return false;
3486 }
3487 
3488 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3489  uint32_t &Minor) {
3490  if (ParseAsAbsoluteExpression(Major))
3491  return TokError("invalid major version");
3492 
3493  if (getLexer().isNot(AsmToken::Comma))
3494  return TokError("minor version number required, comma expected");
3495  Lex();
3496 
3497  if (ParseAsAbsoluteExpression(Minor))
3498  return TokError("invalid minor version");
3499 
3500  return false;
3501 }
3502 
3503 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3504  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3505  return TokError("directive only supported for amdgcn architecture");
3506 
3507  std::string Target;
3508 
3509  SMLoc TargetStart = getTok().getLoc();
3510  if (getParser().parseEscapedString(Target))
3511  return true;
3512  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3513 
3514  std::string ExpectedTarget;
3515  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3516  IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3517 
3518  if (Target != ExpectedTargetOS.str())
3519  return getParser().Error(TargetRange.Start, "target must match options",
3520  TargetRange);
3521 
3522  getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3523  return false;
3524 }
3525 
3526 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3527  return getParser().Error(Range.Start, "value out of range", Range);
3528 }
3529 
3530 bool AMDGPUAsmParser::calculateGPRBlocks(
3531  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3532  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3533  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3534  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3535  // TODO(scott.linder): These calculations are duplicated from
3536  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3537  IsaVersion Version = getIsaVersion(getSTI().getCPU());
3538 
3539  unsigned NumVGPRs = NextFreeVGPR;
3540  unsigned NumSGPRs = NextFreeSGPR;
3541 
3542  if (Version.Major >= 10)
3543  NumSGPRs = 0;
3544  else {
3545  unsigned MaxAddressableNumSGPRs =
3547 
3548  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3549  NumSGPRs > MaxAddressableNumSGPRs)
3550  return OutOfRangeError(SGPRRange);
3551 
3552  NumSGPRs +=
3553  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3554 
3555  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3556  NumSGPRs > MaxAddressableNumSGPRs)
3557  return OutOfRangeError(SGPRRange);
3558 
3559  if (Features.test(FeatureSGPRInitBug))
3561  }
3562 
3563  VGPRBlocks =
3564  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3565  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3566 
3567  return false;
3568 }
3569 
3570 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3571  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3572  return TokError("directive only supported for amdgcn architecture");
3573 
3574  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3575  return TokError("directive only supported for amdhsa OS");
3576 
3577  StringRef KernelName;
3578  if (getParser().parseIdentifier(KernelName))
3579  return true;
3580 
3582 
3583  StringSet<> Seen;
3584 
3585  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3586 
3587  SMRange VGPRRange;
3588  uint64_t NextFreeVGPR = 0;
3589  SMRange SGPRRange;
3590  uint64_t NextFreeSGPR = 0;
3591  unsigned UserSGPRCount = 0;
3592  bool ReserveVCC = true;
3593  bool ReserveFlatScr = true;
3594  bool ReserveXNACK = hasXNACK();
3595  Optional<bool> EnableWavefrontSize32;
3596 
3597  while (true) {
3598  while (getLexer().is(AsmToken::EndOfStatement))
3599  Lex();
3600 
3601  if (getLexer().isNot(AsmToken::Identifier))
3602  return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3603 
3604  StringRef ID = getTok().getIdentifier();
3605  SMRange IDRange = getTok().getLocRange();
3606  Lex();
3607 
3608  if (ID == ".end_amdhsa_kernel")
3609  break;
3610 
3611  if (Seen.find(ID) != Seen.end())
3612  return TokError(".amdhsa_ directives cannot be repeated");
3613  Seen.insert(ID);
3614 
3615  SMLoc ValStart = getTok().getLoc();
3616  int64_t IVal;
3617  if (getParser().parseAbsoluteExpression(IVal))
3618  return true;
3619  SMLoc ValEnd = getTok().getLoc();
3620  SMRange ValRange = SMRange(ValStart, ValEnd);
3621 
3622  if (IVal < 0)
3623  return OutOfRangeError(ValRange);
3624 
3625  uint64_t Val = IVal;
3626 
3627 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3628  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3629  return OutOfRangeError(RANGE); \
3630  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3631 
3632  if (ID == ".amdhsa_group_segment_fixed_size") {
3633  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3634  return OutOfRangeError(ValRange);
3635  KD.group_segment_fixed_size = Val;
3636  } else if (ID == ".amdhsa_private_segment_fixed_size") {
3637  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3638  return OutOfRangeError(ValRange);
3639  KD.private_segment_fixed_size = Val;
3640  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3642  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3643  Val, ValRange);
3644  if (Val)
3645  UserSGPRCount += 4;
3646  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3648  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3649  ValRange);
3650  if (Val)
3651  UserSGPRCount += 2;
3652  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3654  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3655  ValRange);
3656  if (Val)
3657  UserSGPRCount += 2;
3658  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3660  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3661  Val, ValRange);
3662  if (Val)
3663  UserSGPRCount += 2;
3664  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3666  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3667  ValRange);
3668  if (Val)
3669  UserSGPRCount += 2;
3670  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3672  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3673  ValRange);
3674  if (Val)
3675  UserSGPRCount += 2;
3676  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3678  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3679  Val, ValRange);
3680  if (Val)
3681  UserSGPRCount += 1;
3682  } else if (ID == ".amdhsa_wavefront_size32") {
3683  if (IVersion.Major < 10)
3684  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3685  IDRange);
3686  EnableWavefrontSize32 = Val;
3688  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3689  Val, ValRange);
3690  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3692  KD.compute_pgm_rsrc2,
3693  COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3694  ValRange);
3695  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3697  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3698  ValRange);
3699  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3701  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3702  ValRange);
3703  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3705  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3706  ValRange);
3707  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3709  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3710  ValRange);
3711  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3713  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3714  ValRange);
3715  } else if (ID == ".amdhsa_next_free_vgpr") {
3716  VGPRRange = ValRange;
3717  NextFreeVGPR = Val;
3718  } else if (ID == ".amdhsa_next_free_sgpr") {
3719  SGPRRange = ValRange;
3720  NextFreeSGPR = Val;
3721  } else if (ID == ".amdhsa_reserve_vcc") {
3722  if (!isUInt<1>(Val))
3723  return OutOfRangeError(ValRange);
3724  ReserveVCC = Val;
3725  } else if (ID == ".amdhsa_reserve_flat_scratch") {
3726  if (IVersion.Major < 7)
3727  return getParser().Error(IDRange.Start, "directive requires gfx7+",
3728  IDRange);
3729  if (!isUInt<1>(Val))
3730  return OutOfRangeError(ValRange);
3731  ReserveFlatScr = Val;
3732  } else if (ID == ".amdhsa_reserve_xnack_mask") {
3733  if (IVersion.Major < 8)
3734  return getParser().Error(IDRange.Start, "directive requires gfx8+",
3735  IDRange);
3736  if (!isUInt<1>(Val))
3737  return OutOfRangeError(ValRange);
3738  ReserveXNACK = Val;
3739  } else if (ID == ".amdhsa_float_round_mode_32") {
3741  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3742  } else if (ID == ".amdhsa_float_round_mode_16_64") {
3744  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3745  } else if (ID == ".amdhsa_float_denorm_mode_32") {
3747  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3748  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3750  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3751  ValRange);
3752  } else if (ID == ".amdhsa_dx10_clamp") {
3754  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3755  } else if (ID == ".amdhsa_ieee_mode") {
3756  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3757  Val, ValRange);
3758  } else if (ID == ".amdhsa_fp16_overflow") {
3759  if (IVersion.Major < 9)
3760  return getParser().Error(IDRange.Start, "directive requires gfx9+",
3761  IDRange);
3762  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3763  ValRange);
3764  } else if (ID == ".amdhsa_workgroup_processor_mode") {
3765  if (IVersion.Major < 10)
3766  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3767  IDRange);
3768  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3769  ValRange);
3770  } else if (ID == ".amdhsa_memory_ordered") {
3771  if (IVersion.Major < 10)
3772  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3773  IDRange);
3774  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3775  ValRange);
3776  } else if (ID == ".amdhsa_forward_progress") {
3777  if (IVersion.Major < 10)
3778  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3779  IDRange);
3780  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3781  ValRange);
3782  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3784  KD.compute_pgm_rsrc2,
3785  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3786  ValRange);
3787  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3789  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3790  Val, ValRange);
3791  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3793  KD.compute_pgm_rsrc2,
3794  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3795  ValRange);
3796  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3798  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3799  Val, ValRange);
3800  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3802  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3803  Val, ValRange);
3804  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3806  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3807  Val, ValRange);
3808  } else if (ID == ".amdhsa_exception_int_div_zero") {
3810  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3811  Val, ValRange);
3812  } else {
3813  return getParser().Error(IDRange.Start,
3814  "unknown .amdhsa_kernel directive", IDRange);
3815  }
3816 
3817 #undef PARSE_BITS_ENTRY
3818  }
3819 
3820  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3821  return TokError(".amdhsa_next_free_vgpr directive is required");
3822 
3823  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3824  return TokError(".amdhsa_next_free_sgpr directive is required");
3825 
3826  unsigned VGPRBlocks;
3827  unsigned SGPRBlocks;
3828  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3829  ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3830  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3831  SGPRBlocks))
3832  return true;
3833 
3834  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3835  VGPRBlocks))
3836  return OutOfRangeError(VGPRRange);
3838  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3839 
3840  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3841  SGPRBlocks))
3842  return OutOfRangeError(SGPRRange);
3844  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3845  SGPRBlocks);
3846 
3847  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3848  return TokError("too many user SGPRs enabled");
3849  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3850  UserSGPRCount);
3851 
3852  getTargetStreamer().EmitAmdhsaKernelDescriptor(
3853  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3854  ReserveFlatScr, ReserveXNACK);
3855  return false;
3856 }
3857 
3858 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3859  uint32_t Major;
3860  uint32_t Minor;
3861 
3862  if (ParseDirectiveMajorMinor(Major, Minor))
3863  return true;
3864 
3865  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3866  return false;
3867 }
3868 
3869 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3870  uint32_t Major;
3871  uint32_t Minor;
3872  uint32_t Stepping;
3873  StringRef VendorName;
3874  StringRef ArchName;
3875 
3876  // If this directive has no arguments, then use the ISA version for the
3877  // targeted GPU.
3878  if (getLexer().is(AsmToken::EndOfStatement)) {
3879  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3880  getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3881  ISA.Stepping,
3882  "AMD", "AMDGPU");
3883  return false;
3884  }
3885 
3886  if (ParseDirectiveMajorMinor(Major, Minor))
3887  return true;
3888 
3889  if (getLexer().isNot(AsmToken::Comma))
3890  return TokError("stepping version number required, comma expected");
3891  Lex();
3892 
3893  if (ParseAsAbsoluteExpression(Stepping))
3894  return TokError("invalid stepping version");
3895 
3896  if (getLexer().isNot(AsmToken::Comma))
3897  return TokError("vendor name required, comma expected");
3898  Lex();
3899 
3900  if (getLexer().isNot(AsmToken::String))
3901  return TokError("invalid vendor name");
3902 
3903  VendorName = getLexer().getTok().getStringContents();
3904  Lex();
3905 
3906  if (getLexer().isNot(AsmToken::Comma))
3907  return TokError("arch name required, comma expected");
3908  Lex();
3909 
3910  if (getLexer().isNot(AsmToken::String))
3911  return TokError("invalid arch name");
3912 
3913  ArchName = getLexer().getTok().getStringContents();
3914  Lex();
3915 
3916  getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3917  VendorName, ArchName);
3918  return false;
3919 }
3920 
3921 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3922  amd_kernel_code_t &Header) {
3923  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3924  // assembly for backwards compatibility.
3925  if (ID == "max_scratch_backing_memory_byte_size") {
3926  Parser.eatToEndOfStatement();
3927  return false;
3928  }
3929 
3930  SmallString<40> ErrStr;
3931  raw_svector_ostream Err(ErrStr);
3932  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3933  return TokError(Err.str());
3934  }
3935  Lex();
3936 
3937  if (ID == "enable_wavefront_size32") {
3939  if (!isGFX10())
3940  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3941  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3942  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3943  } else {
3944  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3945  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3946  }
3947  }
3948 
3949  if (ID == "wavefront_size") {
3950  if (Header.wavefront_size == 5) {
3951  if (!isGFX10())
3952  return TokError("wavefront_size=5 is only allowed on GFX10+");
3953  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3954  return TokError("wavefront_size=5 requires +WavefrontSize32");
3955  } else if (Header.wavefront_size == 6) {
3956  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3957  return TokError("wavefront_size=6 requires +WavefrontSize64");
3958  }
3959  }
3960 
3961  if (ID == "enable_wgp_mode") {
3963  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3964  }
3965 
3966  if (ID == "enable_mem_ordered") {
3968  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3969  }
3970 
3971  if (ID == "enable_fwd_progress") {
3973  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3974  }
3975 
3976  return false;
3977 }
3978 
3979 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3980  amd_kernel_code_t Header;
3981  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3982 
3983  while (true) {
3984  // Lex EndOfStatement. This is in a while loop, because lexing a comment
3985  // will set the current token to EndOfStatement.
3986  while(getLexer().is(AsmToken::EndOfStatement))
3987  Lex();
3988 
3989  if (getLexer().isNot(AsmToken::Identifier))
3990  return TokError("expected value identifier or .end_amd_kernel_code_t");
3991 
3992  StringRef ID = getLexer().getTok().getIdentifier();
3993  Lex();
3994 
3995  if (ID == ".end_amd_kernel_code_t")
3996  break;
3997 
3998  if (ParseAMDKernelCodeTValue(ID, Header))
3999  return true;
4000  }
4001 
4002  getTargetStreamer().EmitAMDKernelCodeT(Header);
4003 
4004  return false;
4005 }
4006 
4007 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4008  if (getLexer().isNot(AsmToken::Identifier))
4009  return TokError("expected symbol name");
4010 
4011  StringRef KernelName = Parser.getTok().getString();
4012 
4013  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4015  Lex();
4016  if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4017  KernelScope.initialize(getContext());
4018  return false;
4019 }
4020 
4021 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4022  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4023  return Error(getParser().getTok().getLoc(),
4024  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4025  "architectures");
4026  }
4027 
4028  auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4029 
4030  std::string ISAVersionStringFromSTI;
4031  raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4032  IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4033 
4034  if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4035  return Error(getParser().getTok().getLoc(),
4036  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4037  "arguments specified through the command line");
4038  }
4039 
4040  getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4041  Lex();
4042 
4043  return false;
4044 }
4045 
4046 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4047  const char *AssemblerDirectiveBegin;
4048  const char *AssemblerDirectiveEnd;
4049  std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4051  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4053  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4055 
4056  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4057  return Error(getParser().getTok().getLoc(),
4058  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4059  "not available on non-amdhsa OSes")).str());
4060  }
4061 
4062  std::string HSAMetadataString;
4063  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4064  HSAMetadataString))
4065  return true;
4066 
4067  if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4068  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4069  return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4070  } else {
4071  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4072  return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4073  }
4074 
4075  return false;
4076 }
4077 
4078 /// Common code to parse out a block of text (typically YAML) between start and
4079 /// end directives.
4080 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4081  const char *AssemblerDirectiveEnd,
4082  std::string &CollectString) {
4083 
4084  raw_string_ostream CollectStream(CollectString);
4085 
4086  getLexer().setSkipSpace(false);
4087 
4088  bool FoundEnd = false;
4089  while (!getLexer().is(AsmToken::Eof)) {
4090  while (getLexer().is(AsmToken::Space)) {
4091  CollectStream << getLexer().getTok().getString();
4092  Lex();
4093  }
4094 
4095  if (getLexer().is(AsmToken::Identifier)) {
4096  StringRef ID = getLexer().getTok().getIdentifier();
4097  if (ID == AssemblerDirectiveEnd) {
4098  Lex();
4099  FoundEnd = true;
4100  break;
4101  }
4102  }
4103 
4104  CollectStream << Parser.parseStringToEndOfStatement()
4105  << getContext().getAsmInfo()->getSeparatorString();
4106 
4107  Parser.eatToEndOfStatement();
4108  }
4109 
4110  getLexer().setSkipSpace(true);
4111 
4112  if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4113  return TokError(Twine("expected directive ") +
4114  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4115  }
4116 
4117  CollectStream.flush();
4118  return false;
4119 }
4120 
4121 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4122 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4123  std::string String;
4124  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4126  return true;
4127 
4128  auto PALMetadata = getTargetStreamer().getPALMetadata();
4129  if (!PALMetadata->setFromString(String))
4130  return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4131  return false;
4132 }
4133 
4134 /// Parse the assembler directive for old linear-format PAL metadata.
4135 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4136  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4137  return Error(getParser().getTok().getLoc(),
4138  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4139  "not available on non-amdpal OSes")).str());
4140  }
4141 
4142  auto PALMetadata = getTargetStreamer().getPALMetadata();
4143  PALMetadata->setLegacy();
4144  for (;;) {
4145  uint32_t Key, Value;
4146  if (ParseAsAbsoluteExpression(Key)) {
4147  return TokError(Twine("invalid value in ") +
4149  }
4150  if (getLexer().isNot(AsmToken::Comma)) {
4151  return TokError(Twine("expected an even number of values in ") +
4153  }
4154  Lex();
4155  if (ParseAsAbsoluteExpression(Value)) {
4156  return TokError(Twine("invalid value in ") +
4158  }
4159  PALMetadata->setRegister(Key, Value);
4160  if (getLexer().isNot(AsmToken::Comma))
4161  break;
4162  Lex();
4163  }
4164  return false;
4165 }
4166 
4167 /// ParseDirectiveAMDGPULDS
4168 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4169 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4170  if (getParser().checkForValidSection())
4171  return true;
4172 
4173  StringRef Name;
4174  SMLoc NameLoc = getLexer().getLoc();
4175  if (getParser().parseIdentifier(Name))
4176  return TokError("expected identifier in directive");
4177 
4178  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4179  if (parseToken(AsmToken::Comma, "expected ','"))
4180  return true;
4181 
4182  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4183 
4184  int64_t Size;
4185  SMLoc SizeLoc = getLexer().getLoc();
4186  if (getParser().parseAbsoluteExpression(Size))
4187  return true;
4188  if (Size < 0)
4189  return Error(SizeLoc, "size must be non-negative");
4190  if (Size > LocalMemorySize)
4191  return Error(SizeLoc, "size is too large");
4192 
4193  int64_t Align = 4;
4194  if (getLexer().is(AsmToken::Comma)) {
4195  Lex();
4196  SMLoc AlignLoc = getLexer().getLoc();
4197  if (getParser().parseAbsoluteExpression(Align))
4198  return true;
4199  if (Align < 0 || !isPowerOf2_64(Align))
4200  return Error(AlignLoc, "alignment must be a power of two");
4201 
4202  // Alignment larger than the size of LDS is possible in theory, as long
4203  // as the linker manages to place to symbol at address 0, but we do want
4204  // to make sure the alignment fits nicely into a 32-bit integer.
4205  if (Align >= 1u << 31)
4206  return Error(AlignLoc, "alignment is too large");
4207  }
4208 
4209  if (parseToken(AsmToken::EndOfStatement,
4210  "unexpected token in '.amdgpu_lds' directive"))
4211  return true;
4212 
4213  Symbol->redefineIfPossible();
4214  if (!Symbol->isUndefined())
4215  return Error(NameLoc, "invalid symbol redefinition");
4216 
4217  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4218  return false;
4219 }
4220 
4221 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4222  StringRef IDVal = DirectiveID.getString();
4223 
4224  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4225  if (IDVal == ".amdgcn_target")
4226  return ParseDirectiveAMDGCNTarget();
4227 
4228  if (IDVal == ".amdhsa_kernel")
4229  return ParseDirectiveAMDHSAKernel();
4230 
4231  // TODO: Restructure/combine with PAL metadata directive.
4233  return ParseDirectiveHSAMetadata();
4234  } else {
4235  if (IDVal == ".hsa_code_object_version")
4236  return ParseDirectiveHSACodeObjectVersion();
4237 
4238  if (IDVal == ".hsa_code_object_isa")
4239  return ParseDirectiveHSACodeObjectISA();
4240 
4241  if (IDVal == ".amd_kernel_code_t")
4242  return ParseDirectiveAMDKernelCodeT();
4243 
4244  if (IDVal == ".amdgpu_hsa_kernel")
4245  return ParseDirectiveAMDGPUHsaKernel();
4246 
4247  if (IDVal == ".amd_amdgpu_isa")
4248  return ParseDirectiveISAVersion();
4249 
4251  return ParseDirectiveHSAMetadata();
4252  }
4253 
4254  if (IDVal == ".amdgpu_lds")
4255  return ParseDirectiveAMDGPULDS();
4256 
4257  if (IDVal == PALMD::AssemblerDirectiveBegin)
4258  return ParseDirectivePALMetadataBegin();
4259 
4260  if (IDVal == PALMD::AssemblerDirective)
4261  return ParseDirectivePALMetadata();
4262 
4263  return true;
4264 }
4265 
4266 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4267  unsigned RegNo) const {
4268 
4269  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4270  R.isValid(); ++R) {
4271  if (*R == RegNo)
4272  return isGFX9() || isGFX10();
4273  }
4274 
4275  // GFX10 has 2 more SGPRs 104 and 105.
4276  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4277  R.isValid(); ++R) {
4278  if (*R == RegNo)
4279  return hasSGPR104_SGPR105();
4280  }
4281 
4282  switch (RegNo) {
4283  case AMDGPU::SRC_SHARED_BASE:
4284  case AMDGPU::SRC_SHARED_LIMIT:
4285  case AMDGPU::SRC_PRIVATE_BASE:
4286  case AMDGPU::SRC_PRIVATE_LIMIT:
4287  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4288  return !isCI() && !isSI() && !isVI();
4289  case AMDGPU::TBA:
4290  case AMDGPU::TBA_LO:
4291  case AMDGPU::TBA_HI:
4292  case AMDGPU::TMA:
4293  case AMDGPU::TMA_LO:
4294  case AMDGPU::TMA_HI:
4295  return !isGFX9() && !isGFX10();
4296  case AMDGPU::XNACK_MASK:
4297  case AMDGPU::XNACK_MASK_LO:
4298  case AMDGPU::XNACK_MASK_HI:
4299  return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4300  case AMDGPU::SGPR_NULL:
4301  return isGFX10();
4302  default:
4303  break;
4304  }
4305 
4306  if (isCI())
4307  return true;
4308 
4309  if (isSI() || isGFX10()) {
4310  // No flat_scr on SI.
4311  // On GFX10 flat scratch is not a valid register operand and can only be
4312  // accessed with s_setreg/s_getreg.
4313  switch (RegNo) {
4314  case AMDGPU::FLAT_SCR:
4315  case AMDGPU::FLAT_SCR_LO:
4316  case AMDGPU::FLAT_SCR_HI:
4317  return false;
4318  default:
4319  return true;
4320  }
4321  }
4322 
4323  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4324  // SI/CI have.
4325  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4326  R.isValid(); ++R) {
4327  if (*R == RegNo)
4328  return hasSGPR102_SGPR103();
4329  }
4330 
4331  return true;
4332 }
4333 
4335 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4336  OperandMode Mode) {
4337  // Try to parse with a custom parser
4338  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4339 
4340  // If we successfully parsed the operand or if there as an error parsing,
4341  // we are done.
4342  //
4343  // If we are parsing after we reach EndOfStatement then this means we
4344  // are appending default values to the Operands list. This is only done
4345  // by custom parser, so we shouldn't continue on to the generic parsing.
4346  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4347  getLexer().is(AsmToken::EndOfStatement))
4348  return ResTy;
4349 
4350  if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4351  unsigned Prefix = Operands.size();
4352  SMLoc LBraceLoc = getTok().getLoc();
4353  Parser.Lex(); // eat the '['
4354 
4355  for (;;) {
4356  ResTy = parseReg(Operands);
4357  if (ResTy != MatchOperand_Success)
4358  return ResTy;
4359 
4360  if (getLexer().is(AsmToken::RBrac))
4361  break;
4362 
4363  if (getLexer().isNot(AsmToken::Comma))
4364  return MatchOperand_ParseFail;
4365  Parser.Lex();
4366  }
4367 
4368  if (Operands.size() - Prefix > 1) {
4369  Operands.insert(Operands.begin() + Prefix,
4370  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4371  Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4372  getTok().getLoc()));
4373  }
4374 
4375  Parser.Lex(); // eat the ']'
4376  return MatchOperand_Success;
4377  }
4378 
4379  return parseRegOrImm(Operands);
4380 }
4381 
4382 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4383  // Clear any forced encodings from the previous instruction.
4384  setForcedEncodingSize(0);
4385  setForcedDPP(false);
4386  setForcedSDWA(false);
4387 
4388  if (Name.endswith("_e64")) {
4389  setForcedEncodingSize(64);
4390  return Name.substr(0, Name.size() - 4);
4391  } else if (Name.endswith("_e32")) {
4392  setForcedEncodingSize(32);
4393  return Name.substr(0, Name.size() - 4);
4394  } else if (Name.endswith("_dpp")) {
4395  setForcedDPP(true);
4396  return Name.substr(0, Name.size() - 4);
4397  } else if (Name.endswith("_sdwa")) {
4398  setForcedSDWA(true);
4399  return Name.substr(0, Name.size() - 5);
4400  }
4401  return Name;
4402 }
4403 
4404 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4405  StringRef Name,
4406  SMLoc NameLoc, OperandVector &Operands) {
4407  // Add the instruction mnemonic
4408  Name = parseMnemonicSuffix(Name);
4409  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4410 
4411  bool IsMIMG = Name.startswith("image_");
4412 
4413  while (!getLexer().is(AsmToken::EndOfStatement)) {
4414  OperandMode Mode = OperandMode_Default;
4415  if (IsMIMG && isGFX10() && Operands.size() == 2)
4416  Mode = OperandMode_NSA;
4417  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4418 
4419  // Eat the comma or space if there is one.
4420  if (getLexer().is(AsmToken::Comma))
4421  Parser.Lex();
4422 
4423  switch (Res) {
4424  case MatchOperand_Success: break;
4426  // FIXME: use real operand location rather than the current location.
4427  Error(getLexer().getLoc(), "failed parsing operand.");
4428  while (!getLexer().is(AsmToken::EndOfStatement)) {
4429  Parser.Lex();
4430  }
4431  return true;
4432  case MatchOperand_NoMatch:
4433  // FIXME: use real operand location rather than the current location.
4434  Error(getLexer().getLoc(), "not a valid operand.");
4435  while (!getLexer().is(AsmToken::EndOfStatement)) {
4436  Parser.Lex();
4437  }
4438  return true;
4439  }
4440  }
4441 
4442  return false;
4443 }
4444 
4445 //===----------------------------------------------------------------------===//
4446 // Utility functions
4447 //===----------------------------------------------------------------------===//
4448 
4450 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4451 
4452  if (!trySkipId(Prefix, AsmToken::Colon))
4453  return MatchOperand_NoMatch;
4454 
4455  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4456 }
4457 
4459 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4460  AMDGPUOperand::ImmTy ImmTy,
4461  bool (*ConvertResult)(int64_t&)) {
4462  SMLoc S = getLoc();
4463  int64_t Value = 0;
4464 
4465  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4466  if (Res != MatchOperand_Success)
4467  return Res;
4468 
4469  if (ConvertResult && !ConvertResult(Value)) {
4470  Error(S, "invalid " + StringRef(Prefix) + " value.");
4471  }
4472 
4473  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4474  return MatchOperand_Success;
4475 }
4476 
4478 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4479  OperandVector &Operands,
4480  AMDGPUOperand::ImmTy ImmTy,
4481  bool (*ConvertResult)(int64_t&)) {
4482  SMLoc S = getLoc();
4483  if (!trySkipId(Prefix, AsmToken::Colon))
4484  return MatchOperand_NoMatch;
4485 
4486  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4487  return MatchOperand_ParseFail;
4488 
4489  unsigned Val = 0;
4490  const unsigned MaxSize = 4;
4491 
4492  // FIXME: How to verify the number of elements matches the number of src
4493  // operands?
4494  for (int I = 0; ; ++I) {
4495  int64_t Op;
4496  SMLoc Loc = getLoc();
4497  if (!parseExpr(Op))
4498  return MatchOperand_ParseFail;
4499 
4500  if (Op != 0 && Op != 1) {
4501  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4502  return MatchOperand_ParseFail;
4503  }
4504 
4505  Val |= (Op << I);
4506 
4507  if (trySkipToken(AsmToken::RBrac))
4508  break;
4509 
4510  if (I + 1 == MaxSize) {
4511  Error(getLoc(), "expected a closing square bracket");
4512  return MatchOperand_ParseFail;
4513  }
4514 
4515  if (!skipToken(AsmToken::Comma, "expected a comma"))
4516  return MatchOperand_ParseFail;
4517  }
4518 
4519  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4520  return MatchOperand_Success;
4521 }
4522 
4524 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4525  AMDGPUOperand::ImmTy ImmTy) {
4526  int64_t Bit = 0;
4527  SMLoc S = Parser.getTok().getLoc();
4528 
4529  // We are at the end of the statement, and this is a default argument, so
4530  // use a default value.
4531  if (getLexer().isNot(AsmToken::EndOfStatement)) {
4532  switch(getLexer().getKind()) {
4533  case AsmToken::Identifier: {
4534  StringRef Tok = Parser.getTok().getString();
4535  if (Tok == Name) {
4536  if (Tok == "r128" && isGFX9())
4537  Error(S, "r128 modifier is not supported on this GPU");
4538  if (Tok == "a16" && !isGFX9() && !isGFX10())
4539  Error(S, "a16 modifier is not supported on this GPU");
4540  Bit = 1;
4541  Parser.Lex();
4542  } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4543  Bit = 0;
4544  Parser.Lex();
4545  } else {
4546  return MatchOperand_NoMatch;
4547  }
4548  break;
4549  }
4550  default:
4551  return MatchOperand_NoMatch;
4552  }
4553  }
4554 
4555  if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4556  return MatchOperand_ParseFail;
4557 
4558  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4559  return MatchOperand_Success;
4560 }
4561 
4563  MCInst& Inst, const OperandVector& Operands,
4564  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4565  AMDGPUOperand::ImmTy ImmT,
4566  int64_t Default = 0) {
4567  auto i = OptionalIdx.find(ImmT);
4568  if (i != OptionalIdx.end()) {
4569  unsigned Idx = i->second;
4570  ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4571  } else {
4572  Inst.addOperand(MCOperand::createImm(Default));
4573  }
4574 }
4575 
4577 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4578  if (getLexer().isNot(AsmToken::Identifier)) {
4579  return MatchOperand_NoMatch;
4580  }
4581  StringRef Tok = Parser.getTok().getString();
4582  if (Tok != Prefix) {
4583  return MatchOperand_NoMatch;
4584  }
4585 
4586  Parser.Lex();
4587  if (getLexer().isNot(AsmToken::Colon)) {
4588  return MatchOperand_ParseFail;
4589  }
4590 
4591  Parser.Lex();
4592  if (getLexer().isNot(AsmToken::Identifier)) {
4593  return MatchOperand_ParseFail;
4594  }
4595 
4596  Value = Parser.getTok().getString();
4597  return MatchOperand_Success;
4598 }
4599 
4600 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4601 // values to live in a joint format operand in the MCInst encoding.
4603 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4604  SMLoc S = Parser.getTok().getLoc();
4605  int64_t Dfmt = 0, Nfmt = 0;
4606  // dfmt and nfmt can appear in either order, and each is optional.
4607  bool GotDfmt = false, GotNfmt = false;
4608  while (!GotDfmt || !GotNfmt) {
4609  if (!GotDfmt) {
4610  auto Res = parseIntWithPrefix("dfmt", Dfmt);
4611  if (Res != MatchOperand_NoMatch) {
4612  if (Res != MatchOperand_Success)
4613  return Res;
4614  if (Dfmt >= 16) {
4615  Error(Parser.getTok().getLoc(), "out of range dfmt");
4616  return MatchOperand_ParseFail;
4617  }
4618  GotDfmt = true;
4619  Parser.Lex();
4620  continue;
4621  }
4622  }
4623  if (!GotNfmt) {
4624  auto Res = parseIntWithPrefix("nfmt", Nfmt);
4625  if (Res != MatchOperand_NoMatch) {
4626  if (Res != MatchOperand_Success)
4627  return Res;
4628  if (Nfmt >= 8) {
4629  Error(Parser.getTok().getLoc(), "out of range nfmt");
4630  return MatchOperand_ParseFail;
4631  }
4632  GotNfmt = true;
4633  Parser.Lex();
4634  continue;
4635  }
4636  }
4637  break;
4638  }
4639  if (!GotDfmt && !GotNfmt)
4640  return MatchOperand_NoMatch;
4641  auto Format = Dfmt | Nfmt << 4;
4642  Operands.push_back(
4643  AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4644  return MatchOperand_Success;
4645 }
4646 
4647 //===----------------------------------------------------------------------===//
4648 // ds
4649 //===----------------------------------------------------------------------===//
4650 
4651 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4652  const OperandVector &Operands) {
4653  OptionalImmIndexMap OptionalIdx;
4654 
4655  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4656  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4657 
4658  // Add the register arguments
4659  if (Op.isReg()) {
4660  Op.addRegOperands(Inst, 1);
4661  continue;
4662  }
4663 
4664  // Handle optional arguments
4665  OptionalIdx[Op.getImmTy()] = i;
4666  }
4667 
4668  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4669  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4670  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4671 
4672  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4673 }
4674 
4675 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4676  bool IsGdsHardcoded) {
4677  OptionalImmIndexMap OptionalIdx;
4678 
4679  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4680  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4681 
4682  // Add the register arguments
4683  if (Op.isReg()) {
4684  Op.addRegOperands(Inst, 1);
4685  continue;
4686  }
4687 
4688  if (Op.isToken() && Op.getToken() == "gds") {
4689  IsGdsHardcoded = true;
4690  continue;
4691  }
4692 
4693  // Handle optional arguments
4694  OptionalIdx[Op.getImmTy()] = i;
4695  }
4696 
4697  AMDGPUOperand::ImmTy OffsetType =
4698  (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4699  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4700  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4701  AMDGPUOperand::ImmTyOffset;
4702 
4703  addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4704 
4705  if (!IsGdsHardcoded) {
4706  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4707  }
4708  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4709 }
4710 
4711 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4712  OptionalImmIndexMap OptionalIdx;
4713 
4714  unsigned OperandIdx[4];
4715  unsigned EnMask = 0;
4716  int SrcIdx = 0;
4717 
4718  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4719  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4720 
4721  // Add the register arguments
4722  if (Op.isReg()) {
4723  assert(SrcIdx < 4);
4724  OperandIdx[SrcIdx] = Inst.size();
4725  Op.addRegOperands(Inst, 1);
4726  ++SrcIdx;
4727  continue;
4728  }
4729 
4730  if (Op.isOff()) {
4731  assert(SrcIdx < 4);
4732  OperandIdx[SrcIdx] = Inst.size();
4733  Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4734  ++SrcIdx;
4735  continue;
4736  }
4737 
4738  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4739  Op.addImmOperands(Inst, 1);
4740  continue;
4741  }
4742 
4743  if (Op.isToken() && Op.getToken() == "done")
4744  continue;
4745 
4746  // Handle optional arguments
4747  OptionalIdx[Op.getImmTy()] = i;
4748  }
4749 
4750  assert(SrcIdx == 4);
4751 
4752  bool Compr = false;
4753  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4754  Compr = true;
4755  Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4756  Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4757  Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4758  }
4759 
4760  for (auto i = 0; i < SrcIdx; ++i) {
4761  if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4762  EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4763  }
4764  }
4765 
4766  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4767  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4768 
4769  Inst.addOperand(MCOperand::createImm(EnMask));
4770 }
4771 
4772 //===----------------------------------------------------------------------===//
4773 // s_waitcnt
4774 //===----------------------------------------------------------------------===//
4775 
4776 static bool
4778  const AMDGPU::IsaVersion ISA,
4779  int64_t &IntVal,
4780  int64_t CntVal,
4781  bool Saturate,
4782  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4783  unsigned (*decode)(const IsaVersion &Version, unsigned))
4784 {
4785  bool Failed = false;
4786 
4787  IntVal = encode(ISA, IntVal, CntVal);
4788  if (CntVal != decode(ISA, IntVal)) {
4789  if (Saturate) {
4790  IntVal = encode(ISA, IntVal, -1);
4791  } else {
4792  Failed = true;
4793  }
4794  }
4795  return Failed;
4796 }
4797 
4798 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4799 
4800  SMLoc CntLoc = getLoc();
4801  StringRef CntName = getTokenStr();
4802 
4803  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4804  !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4805  return false;
4806 
4807  int64_t CntVal;
4808  SMLoc ValLoc = getLoc();
4809  if (!parseExpr(CntVal))
4810  return false;
4811 
4812  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4813 
4814  bool Failed = true;
4815  bool Sat = CntName.endswith("_sat");
4816 
4817  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4818  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4819  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4820  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4821  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4822  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4823  } else {
4824  Error(CntLoc, "invalid counter name " + CntName);
4825  return false;
4826  }
4827 
4828  if (Failed) {
4829  Error(ValLoc, "too large value for " + CntName);
4830  return false;
4831  }
4832 
4833  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4834  return false;
4835 
4836  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4837  if (isToken(AsmToken::EndOfStatement)) {
4838  Error(getLoc(), "expected a counter name");
4839  return false;
4840  }
4841  }
4842 
4843  return true;
4844 }
4845 
4847 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4848  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4849  int64_t Waitcnt = getWaitcntBitMask(ISA);
4850  SMLoc S = getLoc();
4851 
4852  // If parse failed, do not return error code
4853  // to avoid excessive error messages.
4854  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4855  while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4856  } else {
4857  parseExpr(Waitcnt);
4858  }
4859 
4860  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4861  return MatchOperand_Success;
4862 }
4863 
4864 bool
4865 AMDGPUOperand::isSWaitCnt() const {
4866  return isImm();
4867 }
4868 
4869 //===----------------------------------------------------------------------===//
4870 // hwreg
4871 //===----------------------------------------------------------------------===//
4872 
4873 bool
4874 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4875  int64_t &Offset,
4876  int64_t &Width) {
4877  using namespace llvm::AMDGPU::Hwreg;
4878 
4879  // The register may be specified by name or using a numeric code
4880  if (isToken(AsmToken::Identifier) &&
4881  (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4882  HwReg.IsSymbolic = true;
4883  lex(); // skip message name
4884  } else if (!parseExpr(HwReg.Id)) {
4885  return false;
4886  }
4887 
4888  if (trySkipToken(AsmToken::RParen))
4889  return true;
4890 
4891  // parse optional params
4892  return
4893  skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4894  parseExpr(Offset) &&
4895  skipToken(AsmToken::Comma, "expected a comma") &&
4896  parseExpr(Width) &&
4897  skipToken(AsmToken::RParen, "expected a closing parenthesis");
4898 }
4899 
4900 bool
4901 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4902  const int64_t Offset,
4903  const int64_t Width,
4904  const SMLoc Loc) {
4905 
4906  using namespace llvm::AMDGPU::Hwreg;
4907 
4908  if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4909  Error(Loc, "specified hardware register is not supported on this GPU");
4910  return false;
4911  } else if (!isValidHwreg(HwReg.Id)) {
4912  Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4913  return false;
4914  } else if (!isValidHwregOffset(Offset)) {
4915  Error(Loc, "invalid bit offset: only 5-bit values are legal");
4916  return false;
4917  } else if (!isValidHwregWidth(Width)) {
4918  Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4919  return false;
4920  }
4921  return true;
4922 }
4923 
4925 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4926  using namespace llvm::AMDGPU::Hwreg;
4927 
4928  int64_t ImmVal = 0;
4929  SMLoc Loc = getLoc();
4930 
4931  // If parse failed, do not return error code
4932  // to avoid excessive error messages.
4933  if (trySkipId("hwreg", AsmToken::LParen)) {
4934  OperandInfoTy HwReg(ID_UNKNOWN_);
4935  int64_t Offset = OFFSET_DEFAULT_;
4936  int64_t Width = WIDTH_DEFAULT_;
4937  if (parseHwregBody(HwReg, Offset, Width) &&
4938  validateHwreg(HwReg, Offset, Width, Loc)) {
4939  ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4940  }
4941  } else if (parseExpr(ImmVal)) {
4942  if (ImmVal < 0 || !isUInt<16>(ImmVal))
4943  Error(Loc, "invalid immediate: only 16-bit values are legal");
4944  }
4945 
4946  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4947  return MatchOperand_Success;
4948 }
4949 
4950 bool AMDGPUOperand::isHwreg() const {
4951  return isImmTy(ImmTyHwreg);
4952 }
4953 
4954 //===----------------------------------------------------------------------===//
4955 // sendmsg
4956 //===----------------------------------------------------------------------===//
4957 
4958 bool
4959 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4960  OperandInfoTy &Op,
4961  OperandInfoTy &Stream) {
4962  using namespace llvm::AMDGPU::SendMsg;
4963 
4964  if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4965  Msg.IsSymbolic = true;
4966  lex(); // skip message name
4967  } else if (!parseExpr(Msg.Id)) {
4968  return false;
4969  }
4970 
4971  if (trySkipToken(AsmToken::Comma)) {
4972  Op.IsDefined = true;
4973  if (isToken(AsmToken::Identifier) &&
4974  (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4975  lex(); // skip operation name
4976  } else if (!parseExpr(Op.Id)) {
4977  return false;
4978  }
4979 
4980  if (trySkipToken(AsmToken::Comma)) {
4981  Stream.IsDefined = true;
4982  if (!parseExpr(Stream.Id))
4983  return false;
4984  }
4985  }
4986 
4987  return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4988 }
4989 
4990 bool
4991 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4992  const OperandInfoTy &Op,
4993  const OperandInfoTy &Stream,
4994  const SMLoc S) {
4995  using namespace llvm::AMDGPU::SendMsg;
4996 
4997  // Validation strictness depends on whether message is specified
4998  // in a symbolc or in a numeric form. In the latter case
4999  // only encoding possibility is checked.
5000  bool Strict = Msg.IsSymbolic;
5001 
5002  if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5003  Error(S, "invalid message id");
5004  return false;
5005  } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5006  Error(S, Op.IsDefined ?
5007  "message does not support operations" :
5008  "missing message operation");
5009  return false;
5010  } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5011  Error(S, "invalid operation id");
5012  return false;
5013  } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5014  Error(S, "message operation does not support streams");
5015  return false;
5016  } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5017  Error(S, "invalid message stream id");
5018  return false;
5019  }
5020  return true;
5021 }
5022 
5024 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5025  using namespace llvm::AMDGPU::SendMsg;
5026 
5027  int64_t ImmVal = 0;
5028  SMLoc Loc = getLoc();
5029 
5030  // If parse failed, do not return error code
5031  // to avoid excessive error messages.
5032  if (trySkipId("sendmsg", AsmToken::LParen)) {
5033  OperandInfoTy Msg(ID_UNKNOWN_);
5034  OperandInfoTy Op(OP_NONE_);
5035  OperandInfoTy Stream(STREAM_ID_NONE_);
5036  if (parseSendMsgBody(Msg, Op, Stream) &&
5037  validateSendMsg(Msg, Op, Stream, Loc)) {
5038  ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5039  }
5040  } else if (parseExpr(ImmVal)) {
5041  if (ImmVal < 0 || !isUInt<16>(ImmVal))
5042  Error(Loc, "invalid immediate: only 16-bit values are legal");
5043  }
5044 
5045  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5046  return MatchOperand_Success;
5047 }
5048 
5049 bool AMDGPUOperand::isSendMsg() const {
5050  return isImmTy(ImmTySendMsg);
5051 }
5052 
5053 //===----------------------------------------------------------------------===//
5054 // v_interp
5055 //===----------------------------------------------------------------------===//
5056 
5057 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5058  if (getLexer().getKind() != AsmToken::Identifier)
5059  return MatchOperand_NoMatch;
5060 
5061  StringRef Str = Parser.getTok().getString();
5062  int Slot = StringSwitch<int>(Str)
5063  .Case("p10", 0)
5064  .Case("p20", 1)
5065  .Case("p0", 2)
5066  .Default(-1);
5067 
5068  SMLoc S = Parser.getTok().getLoc();
5069  if (Slot == -1)
5070  return MatchOperand_ParseFail;
5071 
5072  Parser.Lex();
5073  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5074  AMDGPUOperand::ImmTyInterpSlot));
5075  return MatchOperand_Success;
5076 }
5077 
5078 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5079  if (getLexer().getKind() != AsmToken::Identifier)
5080  return MatchOperand_NoMatch;
5081 
5082  StringRef Str = Parser.getTok().getString();
5083  if (!Str.startswith("attr"))
5084  return MatchOperand_NoMatch;
5085 
5086  StringRef Chan = Str.take_back(2);
5087  int AttrChan = StringSwitch<int>(Chan)
5088  .Case(".x", 0)
5089  .Case(".y", 1)
5090  .Case(".z", 2)
5091  .Case(".w", 3)
5092  .Default(-1);
5093  if (AttrChan == -1)
5094  return MatchOperand_ParseFail;
5095 
5096  Str = Str.drop_back(2).drop_front(4);
5097 
5098  uint8_t Attr;
5099  if (Str.getAsInteger(10, Attr))
5100  return MatchOperand_ParseFail;
5101 
5102  SMLoc S = Parser.getTok().getLoc();
5103  Parser.Lex();
5104  if (Attr > 63) {
5105  Error(S, "out of bounds attr");
5106  return MatchOperand_Success;
5107  }
5108 
5109  SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5110 
5111  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5112  AMDGPUOperand::ImmTyInterpAttr));
5113  Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5114  AMDGPUOperand::ImmTyAttrChan));
5115  return MatchOperand_Success;
5116 }
5117 
5118 //===----------------------------------------------------------------------===//
5119 // exp
5120 //===----------------------------------------------------------------------===//
5121 
5122 void AMDGPUAsmParser::errorExpTgt() {
5123  Error(Parser.getTok().getLoc(), "invalid exp target");
5124 }
5125 
5126 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5127  uint8_t &Val) {
5128  if (Str == "null") {
5129  Val = 9;
5130  return MatchOperand_Success;
5131  }
5132 
5133  if (Str.startswith("mrt")) {
5134  Str = Str.drop_front(3);
5135  if (Str == "z") { // == mrtz
5136  Val = 8;
5137  return MatchOperand_Success;
5138  }
5139 
5140  if (Str.getAsInteger(10, Val))
5141  return MatchOperand_ParseFail;
5142 
5143  if (Val > 7)
5144  errorExpTgt();
5145 
5146  return MatchOperand_Success;
5147  }
5148 
5149  if (Str.startswith("pos")) {
5150  Str = Str.drop_front(3);
5151  if (Str.getAsInteger(10, Val))
5152  return MatchOperand_ParseFail;
5153 
5154  if (Val > 4 || (Val == 4 && !isGFX10()))
5155  errorExpTgt();
5156 
5157  Val += 12;
5158  return MatchOperand_Success;
5159  }
5160 
5161  if (isGFX10() && Str == "prim") {
5162  Val = 20;
5163  return MatchOperand_Success;
5164  }
5165 
5166  if (Str.startswith("param")) {
5167  Str = Str.drop_front(5);
5168  if (Str.getAsInteger(10, Val))
5169  return MatchOperand_ParseFail;
5170 
5171  if (Val >= 32)
5172  errorExpTgt();
5173 
5174  Val += 32;
5175  return MatchOperand_Success;
5176  }
5177 
5178  if (Str.startswith("invalid_target_")) {
5179  Str = Str.drop_front(15);
5180  if (Str.getAsInteger(10, Val))
5181  return MatchOperand_ParseFail;
5182 
5183  errorExpTgt();
5184  return MatchOperand_Success;
5185  }
5186 
5187  return MatchOperand_NoMatch;
5188 }
5189 
5190 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5191  uint8_t Val;
5192  StringRef Str = Parser.getTok().getString();
5193 
5194  auto Res = parseExpTgtImpl(Str, Val);
5195  if (Res != MatchOperand_Success)
5196  return Res;
5197 
5198  SMLoc S = Parser.getTok().getLoc();
5199  Parser.Lex();
5200 
5201  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5202  AMDGPUOperand::ImmTyExpTgt));
5203  return MatchOperand_Success;
5204 }
5205 
5206 //===----------------------------------------------------------------------===//
5207 // parser helpers
5208 //===----------------------------------------------------------------------===//
5209 
5210 bool
5211 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5212  return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5213 }
5214 
5215 bool
5216 AMDGPUAsmParser::isId(const StringRef Id) const {
5217  return isId(getToken(), Id);
5218 }
5219 
5220 bool
5221 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5222  return getTokenKind() == Kind;
5223 }
5224 
5225 bool
5226 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5227  if (isId(Id)) {
5228  lex();
5229  return true;
5230  }
5231  return false;
5232 }
5233 
5234 bool
5235 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5236  if (isId(Id) && peekToken().is(Kind)) {
5237  lex();
5238  lex();
5239  return true;
5240  }
5241  return false;
5242 }
5243 
5244 bool
5245 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5246  if (isToken(Kind)) {
5247  lex();
5248  return true;
5249  }
5250  return false;
5251 }
5252 
5253 bool
5254 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5255  const StringRef ErrMsg) {
5256  if (!trySkipToken(Kind)) {
5257  Error(getLoc(), ErrMsg);
5258  return false;
5259  }
5260  return true;
5261 }
5262 
5263 bool
5264 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5265  return !getParser().parseAbsoluteExpression(Imm);
5266 }
5267 
5268 bool
5269 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5270  SMLoc S = getLoc();
5271 
5272  const MCExpr *Expr;
5273  if (Parser.parseExpression(Expr))
5274  return false;
5275 
5276  int64_t IntVal;
5277  if (Expr->evaluateAsAbsolute(IntVal)) {
5278  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5279  } else {
5280  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5281  }
5282  return true;
5283 }
5284 
5285 bool
5286 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5287  if (isToken(AsmToken::String)) {
5288  Val = getToken().getStringContents();
5289  lex();
5290  return true;
5291  } else {
5292  Error(getLoc(), ErrMsg);
5293  return false;
5294  }
5295 }
5296 
5297 AsmToken
5298 AMDGPUAsmParser::getToken() const {
5299  return Parser.getTok();
5300 }
5301 
5302 AsmToken
5303 AMDGPUAsmParser::peekToken() {
5304  return getLexer().peekTok();
5305 }
5306 
5307 void
5308 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5309  auto TokCount = getLexer().peekTokens(Tokens);
5310 
5311  for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5312  Tokens[Idx] = AsmToken(AsmToken::Error, "");
5313 }
5314 
5316 AMDGPUAsmParser::getTokenKind() const {
5317  return getLexer().getKind();
5318 }
5319 
5320 SMLoc
5321 AMDGPUAsmParser::getLoc() const {
5322  return getToken().getLoc();
5323 }
5324 
5325 StringRef
5326 AMDGPUAsmParser::getTokenStr() const {
5327  return getToken().getString();
5328 }
5329 
5330 void
5331 AMDGPUAsmParser::lex() {
5332  Parser.Lex();
5333 }
5334 
5335 //===----------------------------------------------------------------------===//
5336 // swizzle
5337 //===----------------------------------------------------------------------===//
5338 
5340 static unsigned
5341 encodeBitmaskPerm(const unsigned AndMask,
5342  const unsigned OrMask,
5343  const unsigned XorMask) {
5344  using namespace llvm::AMDGPU::Swizzle;
5345 
5346  return BITMASK_PERM_ENC |
5347  (AndMask << BITMASK_AND_SHIFT) |
5348  (OrMask << BITMASK_OR_SHIFT) |
5349  (XorMask << BITMASK_XOR_SHIFT);
5350 }
5351 
5352 bool
5353 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5354  const unsigned MinVal,
5355  const unsigned MaxVal,
5356  const StringRef ErrMsg) {
5357  for (unsigned i = 0; i < OpNum; ++i) {
5358  if (!skipToken(AsmToken::Comma, "expected a comma")){
5359  return false;
5360  }
5361  SMLoc ExprLoc = Parser.getTok().getLoc();
5362  if (!parseExpr(Op[i])) {
5363  return false;
5364  }
5365  if (Op[i] < MinVal || Op[i] > MaxVal) {
5366  Error(ExprLoc, ErrMsg);
5367  return false;
5368  }
5369  }
5370 
5371  return true;
5372 }
5373 
5374 bool
5375 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5376  using namespace llvm::AMDGPU::Swizzle;
5377 
5378  int64_t Lane[LANE_NUM];
5379  if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5380  "expected a 2-bit lane id")) {
5381  Imm = QUAD_PERM_ENC;
5382  for (unsigned I = 0; I < LANE_NUM; ++I) {
5383  Imm |= Lane[I] << (LANE_SHIFT * I);
5384  }
5385  return true;
5386  }
5387  return false;
5388 }
5389 
5390 bool
5391 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5392  using namespace llvm::AMDGPU::Swizzle;
5393 
5394  SMLoc S = Parser.getTok().getLoc();
5395  int64_t GroupSize;
5396  int64_t LaneIdx;
5397 
5398  if (!parseSwizzleOperands(1, &GroupSize,
5399  2, 32,
5400  "group size must be in the interval [2,32]")) {
5401  return false;
5402  }
5403  if (!isPowerOf2_64(GroupSize)) {
5404  Error(S, "group size must be a power of two");
5405  return false;
5406  }
5407  if (parseSwizzleOperands(1, &LaneIdx,
5408  0, GroupSize - 1,
5409  "lane id must be in the interval [0,group size - 1]")) {
5410  Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5411  return true;
5412  }
5413  return false;
5414 }
5415 
5416 bool
5417 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5418  using namespace llvm::AMDGPU::Swizzle;
5419 
5420  SMLoc S = Parser.getTok().getLoc();
5421  int64_t GroupSize;
5422 
5423  if (!parseSwizzleOperands(1, &GroupSize,
5424  2, 32, "group size must be in the interval [2,32]")) {
5425  return false;
5426  }
5427  if (!isPowerOf2_64(GroupSize)) {
5428  Error(S, "group size must be a power of two");
5429  return false;
5430  }
5431 
5432  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5433  return true;
5434 }
5435 
5436 bool
5437 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5438  using namespace llvm::AMDGPU::Swizzle;
5439 
5440  SMLoc S = Parser.getTok().getLoc();
5441  int64_t GroupSize;
5442 
5443  if (!parseSwizzleOperands(1, &GroupSize,
5444  1, 16, "group size must be in the interval [1,16]")) {
5445  return false;
5446  }
5447  if (!isPowerOf2_64(GroupSize)) {
5448  Error(S, "group size must be a power of two");
5449  return false;
5450  }
5451 
5452  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5453  return true;
5454 }
5455 
5456 bool
5457 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5458  using namespace llvm::AMDGPU::Swizzle;
5459 
5460  if (!skipToken(AsmToken::Comma, "expected a comma")) {
5461  return false;
5462  }
5463 
5464  StringRef Ctl;
5465  SMLoc StrLoc = Parser.getTok().getLoc();
5466  if (!parseString(Ctl)) {
5467  return false;
5468  }
5469  if (Ctl.size() != BITMASK_WIDTH) {
5470  Error(StrLoc, "expected a 5-character mask");
5471  return false;
5472  }
5473 
5474  unsigned AndMask = 0;
5475  unsigned OrMask = 0;
5476  unsigned XorMask = 0;
5477 
5478  for (size_t i = 0; i < Ctl.size(); ++i) {
5479  unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5480  switch(Ctl[i]) {
5481  default:
5482  Error(StrLoc, "invalid mask");
5483  return false;
5484  case '0':
5485  break;
5486  case '1':
5487  OrMask |= Mask;
5488  break;
5489  case 'p':
5490  AndMask |= Mask;
5491  break;
5492  case 'i':
5493  AndMask |= Mask;
5494  XorMask |= Mask;
5495  break;
5496  }
5497  }
5498 
5499  Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5500  return true;
5501 }
5502 
5503 bool
5504 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5505 
5506  SMLoc OffsetLoc = Parser.getTok().getLoc();
5507 
5508  if (!parseExpr(Imm)) {
5509  return false;
5510  }
5511  if (!isUInt<16>(Imm)) {
5512  Error(OffsetLoc, "expected a 16-bit offset");
5513  return false;
5514  }
5515  return true;
5516 }
5517 
5518 bool
5519 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5520  using namespace llvm::AMDGPU::Swizzle;
5521 
5522  if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5523 
5524  SMLoc ModeLoc = Parser.getTok().getLoc();
5525  bool Ok = false;
5526 
5527  if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5528  Ok = parseSwizzleQuadPerm(Imm);
5529  } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5530  Ok = parseSwizzleBitmaskPerm(Imm);
5531  } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5532  Ok = parseSwizzleBroadcast(Imm);
5533  } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5534  Ok = parseSwizzleSwap(Imm);
5535  } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5536  Ok = parseSwizzleReverse(Imm);
5537  } else {
5538  Error(ModeLoc, "expected a swizzle mode");
5539  }
5540 
5541  return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5542  }
5543 
5544  return false;
5545 }
5546 
5548 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5549  SMLoc S = Parser.getTok().getLoc();
5550  int64_t Imm = 0;
5551 
5552  if (trySkipId("offset")) {
5553 
5554  bool Ok = false;
5555  if (skipToken(AsmToken::Colon, "expected a colon")) {
5556  if (trySkipId("swizzle")) {
5557  Ok = parseSwizzleMacro(Imm);
5558  } else {
5559  Ok = parseSwizzleOffset(Imm);
5560  }
5561  }
5562 
5563  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5564 
5566  } else {
5567  // Swizzle "offset" operand is optional.
5568  // If it is omitted, try parsing other optional operands.
5569  return parseOptionalOpr(Operands);
5570  }
5571 }
5572 
5573 bool
5574 AMDGPUOperand::isSwizzle() const {
5575  return isImmTy(ImmTySwizzle);
5576 }
5577 
5578 //===----------------------------------------------------------------------===//
5579 // VGPR Index Mode
5580 //===----------------------------------------------------------------------===//
5581 
5582 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5583 
5584  using namespace llvm::AMDGPU::VGPRIndexMode;
5585 
5586  if (trySkipToken(AsmToken::RParen)) {
5587  return OFF;
5588  }
5589 
5590  int64_t Imm = 0;
5591 
5592  while (true) {
5593  unsigned Mode = 0;
5594  SMLoc S = Parser.getTok().getLoc();
5595 
5596  for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5597  if (trySkipId(IdSymbolic[ModeId])) {
5598  Mode = 1 << ModeId;
5599  break;
5600  }
5601  }
5602 
5603  if (Mode == 0) {
5604  Error(S, (Imm == 0)?
5605  "expected a VGPR index mode or a closing parenthesis" :
5606  "expected a VGPR index mode");
5607  break;
5608  }
5609 
5610  if (Imm & Mode) {
5611  Error(S, "duplicate VGPR index mode");
5612  break;
5613  }
5614  Imm |= Mode;
5615 
5616  if (trySkipToken(AsmToken::RParen))
5617  break;
5618  if (!skipToken(AsmToken::Comma,
5619  "expected a comma or a closing parenthesis"))
5620  break;
5621  }
5622 
5623  return Imm;
5624 }
5625 
5627 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5628 
5629  int64_t Imm = 0;
5630  SMLoc S = Parser.getTok().getLoc();
5631 
5632  if (getLexer().getKind() == AsmToken::Identifier &&
5633  Parser.getTok().getString() == "gpr_idx" &&
5634  getLexer().peekTok().is(AsmToken::LParen)) {
5635 
5636  Parser.Lex();
5637  Parser.Lex();
5638 
5639  // If parse failed, trigger an error but do not return error code
5640  // to avoid excessive error messages.
5641  Imm = parseGPRIdxMacro();
5642 
5643  } else {
5644  if (getParser().parseAbsoluteExpression(Imm))
5645  return MatchOperand_NoMatch;
5646  if (Imm < 0 || !isUInt<4>(Imm)) {
5647  Error(S, "invalid immediate: only 4-bit values are legal");
5648  }
5649  }
5650 
5651  Operands.push_back(
5652  AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5653  return MatchOperand_Success;
5654 }
5655 
5656 bool AMDGPUOperand::isGPRIdxMode() const {
5657  return isImmTy(ImmTyGprIdxMode);
5658 }
5659 
5660 //===----------------------------------------------------------------------===//
5661 // sopp branch targets
5662 //===----------------------------------------------------------------------===//
5663 
5665 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5666 
5667  // Make sure we are not parsing something
5668  // that looks like a label or an expression but is not.
5669  // This will improve error messages.
5670  if (isRegister() || isModifier())
5671  return MatchOperand_NoMatch;
5672 
5673  if (parseExpr(Operands)) {
5674 
5675  AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5676  assert(Opr.isImm() || Opr.isExpr());
5677  SMLoc Loc = Opr.getStartLoc();
5678 
5679  // Currently we do not support arbitrary expressions as branch targets.
5680  // Only labels and absolute expressions are accepted.
5681  if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5682  Error(Loc, "expected an absolute expression or a label");
5683  } else if (Opr.isImm() && !Opr.isS16Imm()) {
5684  Error(Loc, "expected a 16-bit signed jump offset");
5685  }
5686  }
5687 
5688  return MatchOperand_Success; // avoid excessive error messages
5689 }
5690 
5691 //===----------------------------------------------------------------------===//
5692 // Boolean holding registers
5693 //===----------------------------------------------------------------------===//
5694 
5696 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5697  return parseReg(Operands);
5698 }
5699 
5700 //===----------------------------------------------------------------------===//
5701 // mubuf
5702 //===----------------------------------------------------------------------===//
5703 
5704 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5705  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5706 }
5707 
5708 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5709  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5710 }
5711 
5712 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5713  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5714 }
5715 
5716 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5717  const OperandVector &Operands,
5718  bool IsAtomic,
5719  bool IsAtomicReturn,
5720  bool IsLds) {
5721  bool IsLdsOpcode = IsLds;
5722  bool HasLdsModifier = false;
5723  OptionalImmIndexMap OptionalIdx;
5724  assert(IsAtomicReturn ? IsAtomic : true);
5725  unsigned FirstOperandIdx = 1;
5726 
5727  for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5728  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5729 
5730  // Add the register arguments
5731  if (Op.isReg()) {
5732  Op.addRegOperands(Inst, 1);
5733  // Insert a tied src for atomic return dst.
5734  // This cannot be postponed as subsequent calls to
5735  // addImmOperands rely on correct number of MC operands.
5736  if (IsAtomicReturn && i == FirstOperandIdx)
5737  Op.addRegOperands(Inst, 1);
5738  continue;
5739  }
5740 
5741  // Handle the case where soffset is an immediate
5742  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5743  Op.addImmOperands(Inst, 1);
5744  continue;
5745  }
5746 
5747  HasLdsModifier |= Op.isLDS();
5748 
5749  // Handle tokens like 'offen' which are sometimes hard-coded into the
5750  // asm string. There are no MCInst operands for these.
5751  if (Op.isToken()) {
5752  continue;
5753  }
5754  assert(Op.isImm());
5755 
5756  // Handle optional arguments
5757  OptionalIdx[Op.getImmTy()] = i;
5758  }
5759 
5760  // This is a workaround for an llvm quirk which may result in an
5761  // incorrect instruction selection. Lds and non-lds versions of
5762  // MUBUF instructions are identical except that lds versions
5763  // have mandatory 'lds' modifier. However this modifier follows
5764  // optional modifiers and llvm asm matcher regards this 'lds'
5765  // modifier as an optional one. As a result, an lds version
5766  // of opcode may be selected even if it has no 'lds' modifier.
5767  if (IsLdsOpcode && !HasLdsModifier) {
5768  int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5769  if (NoLdsOpcode != -1) { // Got lds version - correct it.
5770  Inst.setOpcode(NoLdsOpcode);
5771  IsLdsOpcode = false;
5772  }
5773  }
5774 
5775  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5776  if (!IsAtomic) { // glc is hard-coded.
5777  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5778  }
5779  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5780 
5781  if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5782  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5783  }
5784 
5785  if (isGFX10())
5786  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5787 }
5788 
5789 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5790  OptionalImmIndexMap OptionalIdx;
5791 
5792  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5793  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5794 
5795  // Add the register arguments
5796  if (Op.isReg()) {
5797  Op.addRegOperands(Inst, 1);
5798  continue;
5799  }
5800 
5801  // Handle the case where soffset is an immediate
5802  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5803  Op.addImmOperands(Inst, 1);
5804  continue;
5805  }
5806 
5807  // Handle tokens like 'offen' which are sometimes hard-coded into the
5808  // asm string. There are no MCInst operands for these.
5809  if (Op.isToken()) {
5810  continue;
5811  }
5812  assert(Op.isImm());
5813 
5814  // Handle optional arguments
5815  OptionalIdx[Op.getImmTy()] = i;
5816  }
5817 
5818  addOptionalImmOperand(Inst, Operands, OptionalIdx,
5819  AMDGPUOperand::ImmTyOffset);
5820  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5821  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5822  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5823  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5824 
5825  if (isGFX10())
5826  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5827 }
5828 
5829 //===----------------------------------------------------------------------===//
5830 // mimg
5831 //===----------------------------------------------------------------------===//
5832 
5833 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5834  bool IsAtomic) {
5835  unsigned I = 1;
5836  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5837  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5838  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5839  }
5840 
5841  if (IsAtomic) {
5842  // Add src, same as dst
5843  assert(Desc.getNumDefs() == 1);
5844  ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5845  }
5846 
5847  OptionalImmIndexMap OptionalIdx;
5848 
5849  for (unsigned E = Operands.size(); I != E; ++I) {
5850  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5851 
5852  // Add the register arguments
5853  if (Op.isReg()) {
5854  Op.addRegOperands(Inst, 1);
5855  } else if (Op.isImmModifier()) {
5856  OptionalIdx[Op.getImmTy()] = I;
5857  } else if (!Op.isToken()) {
5858  llvm_unreachable("unexpected operand type");
5859  }
5860  }
5861 
5862  bool IsGFX10 = isGFX10();
5863 
5864  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5865  if (IsGFX10)
5866  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5867  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5868  if (IsGFX10)
5869  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5870  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5871  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5872  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5873  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5874  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5875  if (!IsGFX10)
5876  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5877  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5878 }
5879 
5880 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5881  cvtMIMG(Inst, Operands, true);
5882 }
5883 
5884 //===----------------------------------------------------------------------===//
5885 // smrd
5886 //===----------------------------------------------------------------------===//
5887 
5888 bool AMDGPUOperand::isSMRDOffset8() const {
5889  return isImm() && isUInt<8>(getImm());
5890 }
5891 
5892 bool AMDGPUOperand::isSMRDOffset20() const {
5893  return isImm() && isUInt<20>(getImm());
5894 }
5895 
5896 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5897  // 32-bit literals are only supported on CI and we only want to use them
5898  // when the offset is > 8-bits.
5899  return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5900 }
5901 
5902 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5903  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5904 }
5905 
5906 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5907  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5908 }
5909 
5910 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5911  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5912 }
5913 
5914 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5915  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5916 }
5917 
5918 //===----------------------------------------------------------------------===//
5919 // vop3
5920 //===----------------------------------------------------------------------===//
5921 
5922 static bool ConvertOmodMul(int64_t &Mul) {
5923  if (Mul != 1 && Mul != 2 && Mul != 4)
5924  return false;
5925 
5926  Mul >>= 1;
5927  return true;
5928 }
5929 
5930 static bool ConvertOmodDiv(int64_t &Div) {
5931  if (Div == 1) {
5932  Div = 0;
5933  return true;
5934  }
5935 
5936  if (Div == 2) {
5937  Div = 3;
5938  return true;
5939  }
5940 
5941  return false;
5942 }
5943 
5944 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5945  if (BoundCtrl == 0) {
5946  BoundCtrl = 1;
5947  return true;
5948  }
5949 
5950  if (BoundCtrl == -1) {
5951  BoundCtrl = 0;
5952  return true;
5953  }
5954 
5955  return false;
5956 }
5957 
5958 // Note: the order in this table matches the order of operands in AsmString.
5959 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5960  {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
5961  {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
5962  {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
5963  {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5964  {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5965  {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
5966  {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
5967  {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
5968  {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5969  {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
5970  {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5971  {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
5972  {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
5973  {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
5974  {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5975  {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
5976  {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
5977  {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5978  {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
5979  {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
5980  {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5981  {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5982  {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
5983  {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5984  {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
5985  {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
5986  {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5987  {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5988  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5989  {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
5990  {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5991  {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5992  {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5993  {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5994  {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5995  {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5996  {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5997  {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5998  {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5999  {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6000  {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6001  {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6002  {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6003 };
6004 
6005 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6006  unsigned size = Operands.size();
6007  assert(size > 0);
6008 
6009  OperandMatchResultTy res = parseOptionalOpr(Operands);
6010 
6011  // This is a hack to enable hardcoded mandatory operands which follow
6012  // optional operands.
6013  //
6014  // Current design assumes that all operands after the first optional operand
6015  // are also optional. However implementation of some instructions violates
6016  // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6017  //
6018  // To alleviate this problem, we have to (implicitly) parse extra operands
6019  // to make sure autogenerated parser of custom operands never hit hardcoded
6020  // mandatory operands.
6021 
6022  if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
6023 
6024  // We have parsed the first optional operand.
6025  // Parse as many operands as necessary to skip all mandatory operands.
6026 
6027  for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6028  if (res != MatchOperand_Success ||
6029  getLexer().is(AsmToken::EndOfStatement)) break;
6030  if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6031  res = parseOptionalOpr(Operands);
6032  }
6033  }
6034 
6035  return res;
6036 }
6037 
6038 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6040  for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6041  // try to parse any optional operand here
6042  if (Op.IsBit) {
6043  res = parseNamedBit(Op.Name, Operands, Op.Type);
6044  } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6045  res = parseOModOperand(Operands);
6046  } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6047  Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6048  Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6049  res = parseSDWASel(Operands, Op.Name, Op.Type);
6050  } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6051  res = parseSDWADstUnused(Operands);
6052  } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6053  Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6054  Op.Type == AMDGPUOperand::ImmTyNegLo ||
6055  Op.Type == AMDGPUOperand::ImmTyNegHi) {
6056  res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6057  Op.ConvertResult);
6058  } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6059  res = parseDim(Operands);
6060  } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6061  res = parseDfmtNfmt(Operands);
6062  } else {
6063  res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6064  }
6065  if (res != MatchOperand_NoMatch) {
6066  return res;
6067  }
6068  }
6069  return MatchOperand_NoMatch;
6070 }
6071 
6072 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6073  StringRef Name = Parser.getTok().getString();
6074  if (Name == "mul") {
6075  return parseIntWithPrefix("mul", Operands,
6076  AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6077  }
6078 
6079  if (Name == "div") {
6080  return parseIntWithPrefix("div", Operands,
6081  AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6082  }
6083 
6084  return MatchOperand_NoMatch;
6085 }
6086 
6087 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6088  cvtVOP3P(Inst, Operands);
6089 
6090  int Opc = Inst.getOpcode();
6091 
6092  int SrcNum;
6093  const int Ops[] = { AMDGPU::OpName::src0,
6094  AMDGPU::OpName::src1,
6095  AMDGPU::OpName::src2 };
6096  for (SrcNum = 0;
6097  SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6098  ++SrcNum);
6099  assert(SrcNum > 0);
6100 
6101  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6102  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6103 
6104  if ((OpSel & (1 << SrcNum)) != 0) {
6105  int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6106  uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6107  Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6108  }
6109 }
6110 
6111 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6112  // 1. This operand is input modifiers
6113  return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6114  // 2. This is not last operand
6115  && Desc.NumOperands > (OpNum + 1)
6116  // 3. Next operand is register class
6117  && Desc.OpInfo[OpNum + 1].RegClass != -1
6118  // 4. Next register is not tied to any other operand
6119  && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6120 }
6121 
6122 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6123 {
6124  OptionalImmIndexMap OptionalIdx;
6125  unsigned Opc = Inst.getOpcode();
6126 
6127  unsigned I = 1;
6128  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6129  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6130  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6131  }
6132 
6133  for (unsigned E = Operands.size(); I != E; ++I) {
6134  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6135  if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6136  Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6137  } else if (Op.isInterpSlot() ||
6138  Op.isInterpAttr() ||
6139  Op.isAttrChan()) {
6140  Inst.addOperand(MCOperand::createImm(Op.getImm()));
6141  } else if (Op.isImmModifier()) {
6142  OptionalIdx[Op.getImmTy()] = I;
6143  } else {
6144  llvm_unreachable("unhandled operand type");
6145  }
6146  }
6147 
6148  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6149  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6150  }
6151 
6152  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6153  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6154  }
6155 
6156  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6157  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6158  }
6159 }
6160 
6161 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6162  OptionalImmIndexMap &OptionalIdx) {
6163  unsigned Opc = Inst.getOpcode();
6164 
6165  unsigned I = 1;
6166  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6167  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6168  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6169  }
6170 
6171  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6172  // This instruction has src modifiers
6173  for (unsigned E = Operands.size(); I != E; ++I) {
6174  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6175  if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6176  Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6177  } else if (Op.isImmModifier()) {
6178  OptionalIdx[Op.getImmTy()] = I;
6179  } else if (Op.isRegOrImm()) {
6180  Op.addRegOrImmOperands(Inst, 1);
6181  } else {
6182  llvm_unreachable("unhandled operand type");
6183  }
6184  }
6185  } else {
6186  // No src modifiers
6187  for (unsigned E = Operands.size(); I != E; ++I) {
6188  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6189  if (Op.isMod()) {
6190  OptionalIdx[Op.getImmTy()] = I;
6191  } else {
6192  Op.addRegOrImmOperands(Inst, 1);
6193  }
6194  }
6195  }
6196 
6197  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6198  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6199  }
6200 
6201  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6202  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6203  }
6204 
6205  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6206  // it has src2 register operand that is tied to dst operand
6207  // we don't allow modifiers for this operand in assembler so src2_modifiers
6208  // should be 0.
6209  if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6210  Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6211  Opc == AMDGPU::V_MAC_F32_e64_vi ||
6212  Opc == AMDGPU::V_MAC_F16_e64_vi ||
6213  Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6214  Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6215  Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6216  auto it = Inst.begin();
6217  std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6218  it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6219  ++it;
6220  Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6221  }
6222 }
6223 
6224 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6225  OptionalImmIndexMap OptionalIdx;
6226  cvtVOP3(Inst, Operands, OptionalIdx);
6227 }
6228 
6229 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6230  const OperandVector &Operands) {
6231  OptionalImmIndexMap OptIdx;
6232  const int Opc = Inst.getOpcode();
6233  const MCInstrDesc &Desc = MII.get(Opc);
6234 
6235  const bool